In [2]:
import clip
import torch
import pandas as pd
import numpy as np
from PIL import Image

In [3]:
def filter_df(df, race=None, gender=None):
    new_df = df
    if gender:
        new_df = new_df[new_df['gender'] == gender]
    if race:
        new_df = new_df[new_df['race'] == race]
    return new_df

In [4]:
%matplotlib inline

print('\nLoading model...')
available_models = ['RN50', 'RN101', 'RN50x4', 'RN50x16']
layers = ['layer4', 'layer3', 'layer2', 'layer1']

clip_model = available_models[0]
saliency_layer = layers[0]

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load(clip_model, device=device, jit=False)
print(f"Done! Model loaded to {device} device")


Loading model...
Done! Model loaded to cuda device


In [5]:
path = "/home/lazye/Documents/ufrgs/mcs/datasets/FairFace/"
fface_df = pd.read_csv(f"{path}/train/fairface_label_train.csv")

In [6]:
man_df = filter_df(fface_df, gender='Male')
woman_df = filter_df(fface_df, gender='Female')

In [7]:
def generate_embeddings_dataframe(df):
    files = list()
    embs = list()

    for file in df:
        img_path = path + file
        img = Image.open(img_path)
        img_input = preprocess(img).unsqueeze(0).to(device)

        with torch.no_grad():
            image_features = model.encode_image(img_input)

        image_features /= image_features.norm(dim=-1, keepdim=True)
        files.append(file)
        embs.append(image_features.cpu().numpy())

    d = {'file': files, 'embeddings': embs}

    df_out = pd.DataFrame(data=d)
    return df_out

In [16]:
man_embs_df = generate_embeddings_dataframe(man_df['file'])

In [17]:
man_embs_df.head()

Unnamed: 0,file,embeddings
0,train/1.jpg,"[[-0.01399, 0.03305, -0.02834, 0.0002656, -0.0..."
1,train/6.jpg,"[[-0.001924, 0.03046, -0.0184, 0.0004253, -0.0..."
2,train/7.jpg,"[[-0.006527, 0.03595, 0.02142, 0.001106, -0.00..."
3,train/9.jpg,"[[0.005096, 0.01072, -0.011765, 1.42e-05, -0.0..."
4,train/10.jpg,"[[-0.02644, 0.02724, -0.01637, -0.0001937, -0...."


In [18]:
man_embs_df.to_pickle('man_embeddings.csv')

In [19]:
embds = pd.read_pickle('man_embeddings.csv')

In [20]:
embds

Unnamed: 0,file,embeddings
0,train/1.jpg,"[[-0.01399, 0.03305, -0.02834, 0.0002656, -0.0..."
1,train/6.jpg,"[[-0.001924, 0.03046, -0.0184, 0.0004253, -0.0..."
2,train/7.jpg,"[[-0.006527, 0.03595, 0.02142, 0.001106, -0.00..."
3,train/9.jpg,"[[0.005096, 0.01072, -0.011765, 1.42e-05, -0.0..."
4,train/10.jpg,"[[-0.02644, 0.02724, -0.01637, -0.0001937, -0...."
...,...,...
45981,train/86738.jpg,"[[-0.01335, 0.02893, -0.01352, 0.001271, -0.02..."
45982,train/86739.jpg,"[[-0.01167, 0.03613, 0.0142, 0.000888, -0.0283..."
45983,train/86740.jpg,"[[-0.00935, 0.04648, -0.0034, -0.0008087, -0.0..."
45984,train/86741.jpg,"[[-0.00777, 0.01926, -0.003849, -0.000738, -0...."
