In [1]:
import torch
import pickle
from transformers import CLIPProcessor, CLIPModel
import numpy as np
from tqdm.notebook import tqdm
import os
from PIL import Image

def dump_data(data, filename):
    print('writing file: ' + filename)
    with open(filename, 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

# Load CLIP model and processor
project_dir = "/projects/crunchie/boyanova/EEG_Things/Grouping-Embeddings"
image_dir = "/projects/crunchie/boyanova/EEG_Things/data_set/Images"

cache_dir = os.path.join(project_dir, "models")
fmri_stim = np.load(os.path.join(project_dir, "files", "fmri_train_stim.npy"), allow_pickle=True)
imagePaths = []

for im in tqdm(fmri_stim):
    im_cat = im.split(".")[0][0:-4]
    imagePaths.append(os.path.join(image_dir, im_cat, im))

model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14",         
                                  torch_dtype=torch.float16,
                                  device_map="auto",
                                  cache_dir=cache_dir)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14",                                                                             cache_dir=cache_dir)
model = model.to("cuda")

  0%|          | 0/8640 [00:00<?, ?it/s]



config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

In [2]:
batch_size = 8 
all_image_embeddings = []

# Process images in batches
for i in tqdm(range(0, len(imagePaths), batch_size)):
    # Load and preprocess a batch of images
    batch_images = []
    for path in imagePaths[i:i + batch_size]:
        image = Image.open(path).convert("RGB")
        batch_images.append(image)

    # Use the CLIP processor to prepare the batch of images
    inputs = processor(images=batch_images, return_tensors="pt", padding=True).to("cuda")

    # Obtain the image embeddings
    with torch.no_grad():
        image_embeddings = model.get_image_features(**inputs)

    # Detach the embeddings and move to CPU
    all_image_embeddings.append(image_embeddings.detach().cpu().numpy())

# Convert the list of embeddings to a single NumPy array
all_image_embeddings = np.concatenate(all_image_embeddings, axis=0)
print("Total Image Embeddings Shape:", all_image_embeddings.shape)


  0%|          | 0/1080 [00:00<?, ?it/s]

Total Image Embeddings Shape: (8640, 768)


In [5]:
CLIP_vis = {"stimuli": fmri_stim,
            "stimuli_paths": imagePaths,
            "embeddings": all_image_embeddings}

In [6]:
file_name = "CLIP_vis_fmri.pickle"
save_dir = os.path.join(project_dir, "files", file_name)
dump_data(CLIP_vis, save_dir)

writing file: /projects/crunchie/boyanova/EEG_Things/eeg_prep/scripts/CLIP_vis_fmri_512.pickle
