# Installing CLIP

In [19]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
ds_path = '/content/drive/MyDrive/utkface/'

In [21]:
import os
import random as rd

import pandas as pd
from sklearn.model_selection import StratifiedKFold

RACE_MAPPER = {0:'white', 1:'black',2: 'asian',3: 'indian', 4:'other'}
GENDER_MAPPER = {0:'male',1:'female'}


def data_selection(ds_path: str = 'utkface/', k: int = 5):
  
  df = load_dataset(ds_path)
  
  print(df)

  df = map_values(df)

  print(df)
  
  # Stratified KFold
  skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=1)

  X = df['filepath']
  y = df['gender'] + df['race']
  r = rd.randint(0, k - 1)
  train_idx = []
  test_idx = []

  for i, (train_index, test_index) in enumerate(skf.split(X, y)):
      if i == r:
          train_idx=train_index
          test_idx=test_index
          break

  train_data = df.iloc[train_idx].reset_index(drop=True)
  test_data = df.iloc[test_idx].reset_index(drop=True)

  print(train_data)
  print(test_data)

  return train_data, test_data


def load_dataset(ds_path: str):
  # Loading filenames
  filenames = os.listdir(ds_path)
  
  try:
      filenames.remove('.DS_Store')
  except:
      pass
  
  # Building the dataframe
  df = pd.DataFrame(filenames, columns = ['filename'] )
  df['filepath'] = df.filename.apply(lambda x: ds_path + x )
  df['age'] = df.filename.apply(lambda x: int(x.split('_')[0]))
  df['gender'] = df.filename.apply(lambda x: int(x.split('_')[1]))
  df['race'] = df.filename.apply(lambda x: int(x.split('_')[-2]))
  
  return df

def map_values(df: pd.DataFrame):
  for i in range(len(df)):
      df['gender'][i]= GENDER_MAPPER[df['gender'][i]]
      df['race'][i]= RACE_MAPPER[df['race'][i]]
  return df
    



In [22]:
%pip install ftfy regex tqdm
%pip install git+https://github.com/openai/CLIP.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-t4yrxgj_
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-t4yrxgj_
  Resolved https://github.com/openai/CLIP.git to commit a9b1bf5920416aaeaec965c25dd9e8f98c864f16
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [23]:
from PIL import Image
import torch
from torch import nn, optim
import glob
import os
import pandas as pd
import json
import numpy as np
import clip
from torch.utils.data import Dataset, DataLoader, BatchSampler
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import random
from matplotlib.pyplot import imshow
import nltk, re, string, collections
from nltk.util import ngrams
import collections
from itertools import combinations

%matplotlib inline

BATCH_SIZE = 4
EPOCH = 10
EQ_ODDS_THRESHOLD = 0.15

# Preparing Model and Data

In [24]:
train_df, test_df = data_selection(ds_path)
test_df.to_json(r'test_data_CLIP_df.json')


                                    filename  \
0      82_0_2_20170112224024582.jpg.chip.jpg   
1      84_1_0_20170110160644173.jpg.chip.jpg   
2      79_1_0_20170120225840978.jpg.chip.jpg   
3      80_0_1_20170111205423680.jpg.chip.jpg   
4      96_1_0_20170110183855839.jpg.chip.jpg   
...                                      ...   
23723  15_1_0_20170109213427133.jpg.chip.jpg   
23724  15_1_4_20170103200935782.jpg.chip.jpg   
23725  15_1_4_20170103233441003.jpg.chip.jpg   
23726  16_0_0_20170110232113589.jpg.chip.jpg   
23727  16_0_0_20170110231725022.jpg.chip.jpg   

                                                filepath  age  gender  race  
0      /content/drive/MyDrive/utkface/82_0_2_20170112...   82       0     2  
1      /content/drive/MyDrive/utkface/84_1_0_20170110...   84       1     0  
2      /content/drive/MyDrive/utkface/79_1_0_20170120...   79       1     0  
3      /content/drive/MyDrive/utkface/80_0_1_20170111...   80       0     1  
4      /content/drive/MyDrive/utk

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['gender'][i]= GENDER_MAPPER[df['gender'][i]]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['race'][i]= RACE_MAPPER[df['race'][i]]


                                    filename  \
0      82_0_2_20170112224024582.jpg.chip.jpg   
1      84_1_0_20170110160644173.jpg.chip.jpg   
2      79_1_0_20170120225840978.jpg.chip.jpg   
3      80_0_1_20170111205423680.jpg.chip.jpg   
4      96_1_0_20170110183855839.jpg.chip.jpg   
...                                      ...   
23723  15_1_0_20170109213427133.jpg.chip.jpg   
23724  15_1_4_20170103200935782.jpg.chip.jpg   
23725  15_1_4_20170103233441003.jpg.chip.jpg   
23726  16_0_0_20170110232113589.jpg.chip.jpg   
23727  16_0_0_20170110231725022.jpg.chip.jpg   

                                                filepath  age  gender   race  
0      /content/drive/MyDrive/utkface/82_0_2_20170112...   82    male  asian  
1      /content/drive/MyDrive/utkface/84_1_0_20170110...   84  female  white  
2      /content/drive/MyDrive/utkface/79_1_0_20170120...   79  female  white  
3      /content/drive/MyDrive/utkface/80_0_1_20170111...   80    male  black  
4      /content/drive/MyDriv

In [25]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Splitting 20% for Validation

## Loading Pre-trained CLIP Model and Preprocessor

In [26]:
train_df_temp = train_df.sample(frac=0.8)
validation_df = train_df.drop(train_df_temp.index).reset_index(drop=True)
train_df = train_df_temp.reset_index(drop=True)

print(len(train_df))
train_df_males = train_df.loc[train_df['gender'] == 'male'].sample(1000)
train_df_females = train_df.loc[train_df['gender'] == 'female'].sample(1000)
train_df = pd.merge(train_df_males, train_df_females, how = 'outer')

valid_df_males = validation_df.loc[validation_df['gender'] == 'male'].sample(200)
valid_df_females = validation_df.loc[validation_df['gender'] == 'female'].sample(200)
validation_df = pd.merge(valid_df_males, valid_df_females, how = 'outer')

len(train_df), len(validation_df), len(test_df)

15186


(2000, 400, 4745)

In [27]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)

## UTKFaceDataset

In [28]:
class UTKFaceDataset(Dataset):
    def __init__(self, dataframe, preprocess):
        self.preprocess = preprocess
        self.filepath = dataframe["filepath"].tolist()
        self.filename = dataframe["filename"].tolist()
        self.gender = dataframe["gender"].tolist()
        self.race = dataframe["race"].tolist()
        self.age = dataframe["age"].tolist()
        self.preprocessed_cache = {}
        for path in self.filepath:
            self.preprocessed_cache[path] = self.preprocess(Image.open(path))

    def __len__(self):
        return len(self.filepath)

    def __getitem__(self, idx):
        filepath = self.filepath[idx]
        filename = self.filename[idx]
        gender = self.gender[idx]
        race = self.race[idx]
        age = self.age[idx]
        image = self.preprocessed_cache[filepath]
        return filepath, filename, gender, race, age, image

train_dataset = UTKFaceDataset(train_df, preprocess)
validation_dataset = UTKFaceDataset(validation_df, preprocess)
len(train_dataset), len(validation_dataset), train_dataset[0]

(2000,
 400,
 ('/content/drive/MyDrive/utkface/66_0_0_20170111203309589.jpg.chip.jpg',
  '66_0_0_20170111203309589.jpg.chip.jpg',
  'male',
  'white',
  66,
  tensor([[[ 1.3318,  1.3610,  1.3756,  ...,  0.9668,  0.9668,  0.9522],
           [ 1.3464,  1.3464,  1.3464,  ...,  0.9668,  0.9668,  0.9522],
           [ 1.3464,  1.3172,  1.3172,  ...,  0.9668,  0.9668,  0.9522],
           ...,
           [ 1.7698,  1.7698,  1.7260,  ..., -1.0769, -0.9310, -0.7996],
           [ 1.7844,  1.8135,  1.7844,  ..., -1.0331, -1.0915, -1.1207],
           [ 1.7698,  1.8135,  1.7990,  ..., -0.7412, -0.8434, -0.9018]],
  
          [[ 1.3695,  1.3995,  1.3845,  ...,  1.4746,  1.4746,  1.4596],
           [ 1.3845,  1.3845,  1.3545,  ...,  1.4746,  1.4746,  1.4596],
           [ 1.3845,  1.3545,  1.3395,  ...,  1.4746,  1.4746,  1.4596],
           ...,
           [ 1.9998,  2.0149,  1.9698,  ..., -0.7766, -0.6265, -0.4914],
           [ 1.9998,  2.0299,  1.9998,  ..., -0.7616, -0.8216, -0.8216],
    

## BatchSampler

In [29]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size = BATCH_SIZE, shuffle=False)

In [30]:
for batch in train_dataloader:
    print(batch[2])
    break

('male', 'male', 'male', 'female')


# Training

In [31]:
attributes_queries = {}

race_labels = ['black', 'white', 'asian', 'indian', 'other']
for label in race_labels:
    attributes_queries[label] = 'A photo of a person of ' + label + ' race.'

gender_labels = ['male', 'female']
for label in gender_labels:
    attributes_queries[label] = 'A photo of a person of ' + label + ' gender.'

print(attributes_queries)

gender_texts = [attributes_queries[lbl] for lbl in gender_labels]
gender_texts = clip.tokenize(gender_texts).to(device)

{'black': 'A photo of a person of black race.', 'white': 'A photo of a person of white race.', 'asian': 'A photo of a person of asian race.', 'indian': 'A photo of a person of indian race.', 'other': 'A photo of a person of other race.', 'male': 'A photo of a person of male gender.', 'female': 'A photo of a person of female gender.'}


In [32]:
#https://github.com/openai/CLIP/issues/57
def convert_models_to_fp32(model): 
    for p in model.parameters(): 
        p.data = p.data.float() 
        p.grad.data = p.grad.data.float() 

if device == "cpu":
    model.float()

loss_img = nn.CrossEntropyLoss()
loss_txt = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_dataloader)*EPOCH)

In [33]:
def update_eq_odds_rates(rates, labels, logits, real_values):
    for i in range(len(real_values)):
        true_value = real_values[i]
        pred_idx = torch.argmax(logits[i])
        pred_value = labels[pred_idx]
        
        if true_value == labels[pred_idx]:
            rates[true_value]['tp'] += 1
        else:
            rates[true_value]['fn'] += 1
            rates[pred_value]['fp'] += 1

In [34]:
best_te_loss = 1e5
best_ep = -1

# Equalized odds vars
best_te_bias = -1
tpr = lambda tp, fn: tp/ (tp + fn)
fpr = lambda fp, fn: fp/ (fp + fn)

for epoch in range(EPOCH):
    print(f"running epoch {epoch}, best test loss {best_te_loss} after epoch {best_ep}")
    step = 0
    tr_loss = 0
    model.train()
    pbar = tqdm(train_dataloader, leave=False)
    for batch in pbar:
        step += 1
        optimizer.zero_grad()

        images = batch[-1]
        
        images = images.to(device)
        logits_per_image, _ = model(images, gender_texts)
        # print(logits_per_image.shape)
        # print(logits_per_image)
         # torch.arange(BATCH_SIZE).to(device)
        ground_truth = torch.zeros((BATCH_SIZE, len(gender_labels))).to(device) # torch.arange(BATCH_SIZE).to(device)
        
        for i in range(BATCH_SIZE):
            truth_idx = gender_labels.index(batch[2][i])
            ground_truth[i, truth_idx] = 1

        total_loss = loss_img(logits_per_image, ground_truth)
        total_loss.backward()
        tr_loss += total_loss.item()
        if device == "cpu":
            optimizer.step()
            scheduler.step()
        else:
            convert_models_to_fp32(model)
            optimizer.step()
            scheduler.step()
            clip.model.convert_weights(model)
        pbar.set_description(f"train batchCE: {total_loss.item()}", refresh=True)
    tr_loss /= step
    
    step = 0
    te_loss = 0
    rates = {'male': {'tp': 0, 'fp': 0, 'fn': 0}, 
             'female': {'tp': 0, 'fp': 0, 'fn': 0} }
    with torch.no_grad():
        model.eval()
        val_pbar = tqdm(validation_dataloader, leave=False)
        for batch in val_pbar:
            step += 1
            images = batch[-1]
            
            images = images.to(device)
            
            logits_per_image, logits_per_text = model(images, gender_texts)
            ground_truth = torch.zeros((BATCH_SIZE, len(gender_labels))).to(device) # torch.arange(BATCH_SIZE).to(device)
            
            for i in range(BATCH_SIZE):
                truth_idx = gender_labels.index(batch[2][i])
                ground_truth[i, truth_idx] = 1

            total_loss = loss_img(logits_per_image, ground_truth)
            te_loss += total_loss.item()
            
            update_eq_odds_rates(rates, gender_labels, logits_per_image, batch[2])
            
            val_pbar.set_description(f"test batchCE: {total_loss.item()}", refresh=True)
        te_loss /= step
        
    # Equalized odds calculation
    tpr_values = {label : 0 for label in rates.keys()}
    fpr_values = {label : 0 for label in rates.keys()}
    for label in rates.keys():
        label_rates = rates[label]
        tpr_values[label] = tpr(label_rates['tp'], label_rates['fn'])
        fpr_values[label] = fpr(label_rates['fp'], label_rates['fn'])
    
    equalized_odds = True
    for pair in combinations(tpr_values.keys(), 2):
        first_label = pair[0]
        second_label = pair[1]
        te_bias = [abs(tpr_values[first_label] - tpr_values[second_label]), abs(fpr_values[first_label] - fpr_values[second_label])]
        if max(te_bias[0], te_bias[1]) >= EQ_ODDS_THRESHOLD:
            equalized_odds = False
            break
    
    if te_loss < best_te_loss and equalized_odds: # maximize accuracy with fairness threshold
        best_te_loss = te_loss
        best_ep = epoch
        best_te_bias = te_bias
        best_model = model
    print(f"epoch {epoch}, tr_loss {tr_loss}, te_loss {te_loss}, te_bias {te_bias}")

torch.save(best_model.cpu().state_dict(), "best_model.pt")
torch.save(model.state_dict(), "last_model.pt")

running epoch 0, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 0, tr_loss 0.2738989799618721, te_loss 0.5147628784179688, te_bias [0.475, 0.871559633027523]
running epoch 1, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 1, tr_loss 0.2158191806077957, te_loss 0.17382958322763442, te_bias [0.03499999999999992, 0.28]
running epoch 2, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 2, tr_loss 0.11489084339141846, te_loss 0.347374347448349, te_bias [0.22499999999999998, 0.7894736842105263]
running epoch 3, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 3, tr_loss 0.046423641681671145, te_loss 0.18854772448539733, te_bias [0.05999999999999994, 0.4285714285714286]
running epoch 4, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 4, tr_loss 0.06653856241703034, te_loss 0.268964361846447, te_bias [0.020000000000000018, 0.125]
running epoch 5, best test loss 0.268964361846447 after epoch 4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 5, tr_loss 0.03609692895412445, te_loss 0.2961495867371559, te_bias [0.11499999999999999, 0.7419354838709677]
running epoch 6, best test loss 0.268964361846447 after epoch 4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 6, tr_loss 0.013014397323131562, te_loss 0.2216396850347519, te_bias [0.03499999999999992, 0.30434782608695654]
running epoch 7, best test loss 0.268964361846447 after epoch 4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 7, tr_loss 0.008313460409641265, te_loss 0.22670657217502593, te_bias [0.03499999999999992, 0.30434782608695654]
running epoch 8, best test loss 0.268964361846447 after epoch 4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 8, tr_loss 0.007596419453620911, te_loss 0.22747314244508743, te_bias [0.03499999999999992, 0.30434782608695654]
running epoch 9, best test loss 0.268964361846447 after epoch 4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 9, tr_loss 0.0075082796216011044, te_loss 0.2275925162434578, te_bias [0.03499999999999992, 0.30434782608695654]
