In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from datasets import Dataset
from torch.utils.data import DataLoader
from transformers import CLIPProcessor, CLIPModel

In [2]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [3]:
SEM_MEN_ROOT = "assets/sem-men/"
sem_men_manifest = pd.read_csv(SEM_MEN_ROOT + "manifest.csv")
sem_men_ds = Dataset.from_pandas(sem_men_manifest)

In [4]:
def collator(data):
    return {k: [ex[k] for ex in data] for k in data[0]}

In [5]:
sem_men_dl = DataLoader(sem_men_ds, batch_size=16, collate_fn=collator)

In [13]:
def get_text_feats(dataloader, processor, model):
    all_feats = []
    for d in dataloader:
        inputs = processor(text=d["text"], return_tensors="pt", padding=True)
        feats = model.get_text_features(**inputs).detach().numpy()
        all_feats.append(feats)
    return np.concatenate(all_feats, axis=0)

In [14]:
feats = get_text_feats(sem_men_dl, processor, model)

In [16]:
txt_sims = cosine_similarity(feats)

In [18]:
os.makedirs("evals/sem-men", exist_ok=True)
np.save("evals/sem-men/clip.npy", txt_sims)