In [1]:
import torch
import open_clip
import pandas as pd

from src.fairface import get_img_name
from src.utils import load_json
from src.encoder import CLIPEncoder

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
backbone = "ViT-H-14"
datasource = "laion2b_s32b_b79k"
model, _, preprocess = open_clip.create_model_and_transforms(backbone, pretrained=datasource)
model.eval()
tokenizer = open_clip.get_tokenizer(backbone)

KeyboardInterrupt: 

In [None]:
label_list = load_json("labels/age_race_gender_latino.json")
fface = "/home/lucasmc/Documents/ufrgs/data/datasets/FairFace/"
split = "val"
fface_csv = f"{fface}/fface_{split}.csv"
fface_df = pd.read_csv(fface_csv)
files = fface_df['file']
embs_path = f"{fface}/embeddings/{split}/{backbone}/{datasource}"
text = tokenizer(label_list)

FileNotFoundError: [Errno 2] No such file or directory: 'labels/age_race_gender_latino.json'

In [None]:
def get_label_from_prompt(label):
    if "white" in label:
        return "White"
    elif "latino" in label:
        return "Latino_Hispanic"
    elif "southeast asian" in label:
        return "Southeast Asian"
    elif "east asian" in label:
        return "East Asian"
    elif "black" in label:
        return "Black"
    elif "indian" in label:
        return "Indian"
    elif "middle eastern" in label:
        return "Middle Eastern"
    else:
        return "None"

In [16]:
preds_dict = {
    "file": [],
    "race_preds": []
}

In [17]:
with torch.no_grad(), torch.autocast("cuda"):
    text_features = model.encode_text(text).to(device="cuda")
    text_features /= text_features.norm(dim=-1, keepdim=True)
    for file in files:
        img_name = get_img_name(file)
        image_features = CLIPEncoder.load_embeddings(f"{embs_path}/{img_name}.npy")
        image_features = image_features.to(device="cuda")
        text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
        winner = text_probs.argmax()
        label_winner = label_list[winner]
        preds_dict['file'].append(file)
        preds_dict['race_preds'].append(get_label_from_prompt(label_winner))


  return torch.load(emb_path)


In [18]:
preds_df = pd.DataFrame(preds_dict)
result_df = pd.merge(fface_df, preds_df, on="file")
result_df.set_index('file')
result_df.drop(columns=['service_test'], inplace=True)
result_df.head()

Unnamed: 0,file,age,gender,race,race_preds
0,val/1.jpg,3-9,Male,East Asian,Southeast Asian
1,val/2.jpg,50-59,Female,East Asian,East Asian
2,val/3.jpg,30-39,Male,White,White
3,val/4.jpg,20-29,Female,Latino_Hispanic,Latino_Hispanic
4,val/5.jpg,20-29,Male,Southeast Asian,Southeast Asian


In [None]:
out_f = f"./results/argp_latino_race_{backbone}_{datasource}_{split}.csv"
result_df.to_csv(out_f, index=False)