In [2]:
import torch
import clip
import pandas as pd
import numpy as np
import json

In [3]:
available_models = clip.available_models()
print(available_models)

['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14', 'ViT-L/14@336px']


In [4]:
print('\nLoading model...')

clip_model = 'ViT-B/16'

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load(clip_model, device=device, jit=False)
print(f"Done! Model {clip_model} loaded to {device} device")


Loading model...


100%|███████████████████████████████████████| 335M/335M [01:07<00:00, 5.19MiB/s]


Done! Model ViT-B/16 loaded to cuda device


In [16]:
img_embs = pd.read_pickle('../data/fface_val_img_embs.pkl')

txt_embs = torch.load('../data/synms-gender-labels.pt')

with open('../data/synms_gender_labels.json', encoding='utf-8') as json_data:
    data = json.load(json_data)
    fface_classes = list(data.keys())
    prompts = list(data.values())
    man_prompts = prompts[0]
    woman_prompts = prompts[1]
    fface_prompts = man_prompts + woman_prompts

fface_df = pd.read_csv('../data/fface_val.csv')

In [10]:
def get_similarities(img, txts):
    """Grab similarity between text and image embeddings."""
    image_features = torch.from_numpy(img).to('cuda')
    similarity = 100.0 * image_features @ txts.T

    return similarity

In [None]:
def get_synms_winner(simis):
    """transform similarity tensor into numpy array,
        then grab the index of the highest element"""
    np_loc = np.where(np_simis[0] == np_simis.max())
    return np_loc[0][0]

In [92]:
def get_top_synm(final_dict):
    files = final_dict.keys()
    wins = []
    for key, val in final_dict.items():
        np_loc = np.where(np.array(list(val.values())).max())
        windex = np_loc[0][0]
        winner = list(val.keys())[windex]
        wins.append(winner)

    top_synm_dict = {'file': files, 'winner': wins}
    return top_synm_dict

In [93]:
def get_sum_synms(final_dict):
    files = final_dict.keys()
    preds = []
    
    for key, val in final_dict.items():
        man_score = 0
        woman_score = 0
        for k, v in val.items():
            if k in man_prompts:
                man_score += v
            else:
                woman_score += v
        preds.append(f'Male, {man_score}' if man_score > woman_score else f'Female, {woman_score}')

    sum_dict = {'file': files, 'preds': preds}
    return(sum_dict)

In [28]:
final_dict = {}
for idx, emb in img_embs.iterrows():
    name = emb['file']
    img_features = emb['embeddings']
    img_sims = get_similarities(img_features, txt_embs)
    sims_dict = {}
    for label, score in zip(fface_prompts, img_sims[0]):
        sims_dict[label] = score.cpu().numpy().item()
    final_dict[name] = sims_dict

In [94]:
voting_df = pd.DataFrame(data=voting_dict)

In [49]:
voting_df

Unnamed: 0,files,preds
0,val/1.jpg,"Male, 228.75"
1,val/2.jpg,"Female, 231.359375"
2,val/3.jpg,"Male, 228.3125"
3,val/4.jpg,"Female, 222.6875"
4,val/5.jpg,"Male, 245.9375"
...,...,...
10949,val/10950.jpg,"Male, 242.4375"
10950,val/10951.jpg,"Male, 211.109375"
10951,val/10952.jpg,"Male, 262.71875"
10952,val/10953.jpg,"Female, 237.703125"


In [52]:
files = []
scores = []

for key, val in final_dict.items():
    files.append(key)
    scores.append(val)

In [53]:
scores_dict = { 'files': files, 'scores': scores}
scores_df = pd.DataFrame(data=scores_dict)

In [54]:
scores_df

Unnamed: 0,files,scores
0,val/1.jpg,"{'young man': 24.6875, 'adult male': 18.765625..."
1,val/2.jpg,"{'young man': 20.953125, 'adult male': 17.9218..."
2,val/3.jpg,"{'young man': 22.75, 'adult male': 20.5, 'male..."
3,val/4.jpg,"{'young man': 19.359375, 'adult male': 18.2656..."
4,val/5.jpg,"{'young man': 23.984375, 'adult male': 21.6562..."
...,...,...
10949,val/10950.jpg,"{'young man': 23.953125, 'adult male': 23.7812..."
10950,val/10951.jpg,"{'young man': 20.71875, 'adult male': 17.40625..."
10951,val/10952.jpg,"{'young man': 24.46875, 'adult male': 26.20312..."
10952,val/10953.jpg,"{'young man': 20.8125, 'adult male': 19.828125..."


In [57]:
voting_df.to_csv('../data/voting_df.csv')

In [56]:
scores_df.to_csv('../data/scores_df.csv')

In [58]:
final_dict

{'val/1.jpg': {'young man': 24.6875,
  'adult male': 18.765625,
  'male': 21.671875,
  'man': 22.25,
  'guy': 24.4375,
  'boy': 25.96875,
  'middle-aged man': 23.296875,
  'old man': 22.46875,
  'grandfather': 22.890625,
  'grandpa': 22.3125,
  'young woman': 19.59375,
  'adult female': 16.09375,
  'female': 17.765625,
  'woman': 20.640625,
  'lady': 19.734375,
  'girl': 22.265625,
  'madam': 21.234375,
  'old woman': 21.453125,
  'grandmother': 21.671875,
  'grandma': 21.359375},
 'val/2.jpg': {'young man': 20.953125,
  'adult male': 17.921875,
  'male': 21.078125,
  'man': 22.1875,
  'guy': 22.15625,
  'boy': 21.953125,
  'middle-aged man': 23.09375,
  'old man': 21.453125,
  'grandfather': 21.53125,
  'grandpa': 20.859375,
  'young woman': 22.3125,
  'adult female': 19.265625,
  'female': 20.03125,
  'woman': 24.71875,
  'lady': 23.046875,
  'girl': 22.65625,
  'madam': 25.015625,
  'old woman': 24.8125,
  'grandmother': 25.078125,
  'grandma': 24.421875},
 'val/3.jpg': {'young man'