In [1]:
import clip
import torch
import numpy as np
from tqdm import tqdm
import os
from PIL import Image

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
device

'cpu'

In [3]:
labels = ['male', 'female']
tkns = ['A photo of a person of sex ' + label for label in labels]
text = clip.tokenize(tkns).to(device)
BATCH_SIZE = 100000

In [4]:
dir_path = r'/Users/hanselblanco/Documents/4to/ML/UTKFace/UTKFace'
ln = 0
photo_paths = os.listdir(dir_path)

for path in photo_paths:
    if os.path.isfile(os.path.join(dir_path, path)):
        ln += 1

In [5]:
results = []
photos_to_analize = 4000

for i in tqdm(range(0, ln, BATCH_SIZE)):
    images = [preprocess(Image.open(dir_path + '/' + photo_paths[j])) for j in range(photos_to_analize)]
    image_input = torch.tensor(np.stack(images)).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image_input)
        logits_per_image, logits_per_text = model(image_input, text)
        # The softmax function takes the original confidence and applys a transform to make all the confidence add up to one
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
        results.append(probs)

100%|██████████| 1/1 [01:29<00:00, 89.17s/it]


In [6]:
res = np.concatenate(results,axis=0)
choices = np.argmax(res,axis=1)
choices.shape

(2000,)

In [7]:
getlabel = lambda x:labels[x]
vgetlabel = np.vectorize(getlabel)
genders = vgetlabel(choices)
genders

array(['female', 'male', 'female', ..., 'female', 'male', 'female'],
      dtype='<U6')

In [8]:

gender_code = { 0 : 'male', 1 : 'female'}

tp_males, tp_females, fn_males, fn_females, fp_males, fp_females = 0, 0, 0, 0, 0, 0

for i in range(photos_to_analize):
    data = photo_paths[i].split('_')
    gender_number = int(data[1])
    match gender_code[gender_number]:
        case 'male':
            if genders[i] == 'male':
                tp_males += 1
            else:
                fp_females += 1
                fn_males += 1 # False negative (wrong no male prediction, in this case, equal to female false positive)
        case 'female':
            if genders[i] == 'female':
                tp_females += 1
            else:
                fp_males += 1
                fn_females += 1
                
males_tpr, females_tpr = tp_males/ (tp_males + fn_males), tp_females/ (tp_females + fn_females)

males_fpr, females_fpr = fp_males/ (fp_males + fn_males), fp_females/ (fp_females + fn_females)


#### True Positive Rates

In [24]:
males_tpr, females_tpr

(0.9498580889309366, 0.9554612937433722)

In [26]:
if abs(males_tpr - females_tpr) < 0.05:
    print('Equalized odds')
else:
    print('Not equalized odds')
    print(abs(males_tpr - females_tpr))

Equalized odds


#### False Positive Rates

In [27]:
males_fpr, females_fpr

(0.4421052631578947, 0.5578947368421052)

In [28]:
if abs(males_fpr - females_fpr) < 0.05:
    print('Equalized odds')
else:
    print('Not equalized odds')
    print(abs(males_fpr - females_fpr))

Not equalized odds
0.1157894736842105
