In [None]:
!pip install transformers
!pip install torch torchvision
!pip install pillow
!pip install tqdm



In [None]:
from transformers import ViTImageProcessor, ViTModel
import torch
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


ViTModel(
  (embeddings): ViTEmbeddings(
    (patch_embeddings): ViTPatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): ViTEncoder(
    (layer): ModuleList(
      (0-11): 12 x ViTLayer(
        (attention): ViTSdpaAttention(
          (attention): ViTSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): ViTSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ViTIntermediate(
          (dense): Linear(in_features=768, out_features=3072, bias=True)
          (intermediate_act_fn): GELUAct

In [None]:
import os
import pandas as pd
from tqdm import tqdm

def extract_clip_features(image_folder, model, processor, device):
    features = []
    pet_ids = []

    images = os.listdir(image_folder)
    images.sort()

    # group images by PetId, me quedo con la primer foto nada mas
    image_groups = {}
    for image_name in images:
        pet_id = image_name.split('-')[0]
        if pet_id not in image_groups:
          image_groups[pet_id] = image_name
        else:
          continue

    pet_id_features = {}

    pets_ids = list(image_groups.keys())
    image_paths = [os.path.join(image_folder, image_groups[pet_id]) for pet_id in pets_ids]

    # hago batches de 10 imagenes para ahorrar memoria
    batches = [image_paths[i:i+10] for i in range(0, len(image_paths), 10)]
    batche_pets_ids = [pets_ids[i:i+10] for i in range(0, len(pets_ids), 10)]

    for batch, pets_ids in tqdm(zip(batches, batche_pets_ids)):
        images = [Image.open(image_path).convert('RGB') for image_path in batch]
        inputs = processor(images=images, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
            last_hidden_states = outputs.last_hidden_state
            last_hidden_states = torch.mean(last_hidden_states, dim=1)
            for pet_id, last_hidden_states_mean in zip(pets_ids, last_hidden_states):
                last_hidden_states_mean = last_hidden_states_mean.cpu()
                pet_id_features[pet_id] = last_hidden_states_mean.detach().numpy().tolist()

                # clean model memory
            del inputs, outputs
            torch.cuda.empty_cache()

    return pet_id_features


In [None]:
!pip install kaggle
from google.colab import files
files.upload()
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c 'petfinder-adoption-prediction'



Saving kaggle.json to kaggle (1).json
petfinder-adoption-prediction.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
# Unzip the downloaded data
import zipfile
data_zip_path = "/content/petfinder-adoption-prediction.zip"
with zipfile.ZipFile(data_zip_path, 'r') as zip_ref:
  zip_ref.extractall("/content")  # Extract to the same directory

KeyboardInterrupt: 

In [None]:
image_folder = "/content/train_images"

# Extract CLIP features from the images
clip_features_df = extract_clip_features(image_folder, model, processor, device)

1466it [04:28,  5.47it/s]


In [None]:
len(clip_features_df.keys())

14652

In [None]:
import os
from google.colab import drive

drive.mount('/content/drive')
# Ensure the processed data directory exists in Google Drive
processed_data_dir = '/content/drive/MyDrive/ProcessedData'
if not os.path.exists(processed_data_dir):
    os.makedirs(processed_data_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# save clip_features_df as json in drive
import json
with open(os.path.join(processed_data_dir, 'clip_features.json'), 'w') as f:
    json.dump(clip_features_df, f)

In [None]:
image_folder_test = "/content/test_images"

# Extract CLIP features from the images
clip_features_test_df = extract_clip_features(image_folder_test, model, processor, device)


In [None]:
import json
with open(os.path.join(processed_data_dir, 'clip_features_test.json'), 'w') as f:
    json.dump(clip_features_test_df, f)