In [2]:
import torch
import clip
import pandas as pd
import numpy as np

In [3]:
print('\nLoading model...')
available_models = ['RN50', 'RN101', 'RN50x4', 'RN50x16']
layers = ['layer4', 'layer3', 'layer2', 'layer1']

clip_model = available_models[0]
saliency_layer = layers[0]

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load(clip_model, device=device, jit=False)
print(f"Done! Model loaded to {device} device")


Loading model...
Done! Model loaded to cuda device


In [4]:
woman_embds_df = pd.read_pickle('../data/woman_embeddings.csv')
man_embds_df = pd.read_pickle('../data/man_embeddings.csv')

In [5]:
def get_similarities(img_embs, classes):
    image_features = torch.from_numpy(img_embs).to(device)

    text_inputs = torch.cat(
        [clip.tokenize(f"a photo of a {c}") for c in classes]).to(device)

    with torch.no_grad():
        text_features = model.encode_text(text_inputs)

    text_features /= text_features.norm(dim=-1, keepdim=True)
    
    # is this the cosine distance approximation?
    similarity = (100.0 * image_features @ text_features.T)
    return similarity

In [6]:
woman_classes = ['girl', 'lady', 'woman']
man_classes = ['boy', 'gentleman', 'man']

In [7]:
woman_img_emb = woman_embds_df.iloc[0]['embeddings']
woman_sims = get_similarities(woman_img_emb, woman_classes)
print(woman_sims)

tensor([[20.4844, 20.0312, 20.5000]], device='cuda:0', dtype=torch.float16)


In [8]:
man_sims = get_similarities(woman_img_emb, man_classes)
print(man_sims)

tensor([[17.3906, 14.6641, 15.2578]], device='cuda:0', dtype=torch.float16)


In [9]:
def get_synms_winner(sims):
    np_sims = sims.cpu().numpy()
    np_loc = np.where(np_sims[0] == np_sims.max())
    return np_loc[0][0]

In [10]:
final_man = man_classes[get_synms_winner(man_sims)]
final_woman = woman_classes[get_synms_winner(woman_sims)]
final_classes = [final_man, final_woman]
final_classes

['boy', 'woman']

In [15]:
def run_clip_classifier(img_emb, classes):
    """Run classes by CLIP to choose the closest one"""
    sims = get_similarities(img_emb, classes)
    sims_max = sims.softmax(dim=-1)
    values, indices = sims_max[0].topk(len(sims_max[0]))
    scores = []
    for value, index in zip(values, indices):
        scores.append(
            (classes[index], round(100 * value.item(), 2)))
    return scores

In [16]:
print(run_clip_classifier(woman_img_emb, final_classes))

[('woman', 95.75), ('boy', 4.27)]
