In [2]:
import torch
import numpy as np
import clip as clip

In [3]:
test_embeddings_raw = np.load('data/embeddings/test_raw.npy')
train_embeddings_raw = np.load('data/embeddings/train_raw.npy')
val_embeddings_raw = np.load('data/embeddings/val_raw.npy')



In [4]:
test_embeddings_normalized = test_embeddings_raw / np.linalg.norm(test_embeddings_raw, axis=1)[:, None]
train_embeddings_normalized = train_embeddings_raw / np.linalg.norm(train_embeddings_raw, axis=1)[:, None]
val_embeddings_normalized = val_embeddings_raw / np.linalg.norm(val_embeddings_raw, axis=1)[:, None]

train_center = np.mean(train_embeddings_normalized, axis=0)
np.save('data/embeddings/train_center.npy', train_center)

test_embeddings = test_embeddings_normalized - train_center
train_embeddings = train_embeddings_normalized - train_center
val_embeddings = val_embeddings_normalized - train_center

test_embeddings = test_embeddings / np.linalg.norm(test_embeddings, axis=1)[:, None]
train_embeddings = train_embeddings / np.linalg.norm(train_embeddings, axis=1)[:, None]
val_embeddings = val_embeddings / np.linalg.norm(val_embeddings, axis=1)[:, None]

np.save('data/embeddings/test_centered.npy', test_embeddings)
np.save('data/embeddings/train_centered.npy', train_embeddings)
np.save('data/embeddings/val_centered.npy', val_embeddings)

In [6]:
print(train_embeddings.shape)

(118287, 512)


In [None]:
with open('data/dict/mscoco.txt', 'r') as file:
    vocab = file.read().splitlines()
vocab = [w.split(",")[0] for w in vocab]
concepts = vocab[-10000:]

torch.cuda.empty_cache()
gc.collect()

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/16", device=device)

batch_size = 50
for i in tqdm(range(0, len(concepts), batch_size)):
    text = clip.tokenize(concepts[i:i+batch_size]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text)
        text_features /= text_features.norm(dim=-1, keepdim=True)
    if i == 0:
        all_text_features = text_features
    else:
        all_text_features = torch.cat((all_text_features, text_features), 0)

In [None]:
np.save("data/dict/mscoco_clip.npy", all_text_features.cpu().numpy())

torch.cuda.empty_cache()
gc.collect()

concept_center = torch.mean(all_text_features, 0)
print(concept_center.shape)
center_text_features = all_text_features - concept_center
center_text_features /= center_text_features.norm(dim=-1, keepdim=True)

D = center_text_features.cpu()

np.save("data/dict/mscoco_clip_centered.npy", D)

print("Done")
