# Installing CLIP

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!unzip gdrive/MyDrive/utkface.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: utkface/30_1_0_20170117143902329.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._30_1_0_20170117143902329.jpg.chip.jpg  
  inflating: utkface/41_0_0_20170116214404363.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._41_0_0_20170116214404363.jpg.chip.jpg  
  inflating: utkface/58_0_1_20170113193258196.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._58_0_1_20170113193258196.jpg.chip.jpg  
  inflating: utkface/36_1_1_20170116155035948.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._36_1_1_20170116155035948.jpg.chip.jpg  
  inflating: utkface/25_0_0_20170113181532013.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._25_0_0_20170113181532013.jpg.chip.jpg  
  inflating: utkface/66_1_0_20170110123144558.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._66_1_0_20170110123144558.jpg.chip.jpg  
  inflating: utkface/40_0_1_20170117134411786.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._40_0_1_20170117134411786.jpg.chip.jpg  
  in

In [3]:
import os
import random as rd

import pandas as pd
from sklearn.model_selection import StratifiedKFold

RACE_MAPPER = {0:'white', 1:'black',2: 'asian',3: 'indian', 4:'other'}
GENDER_MAPPER = {0:'male',1:'female'}


def data_selection(ds_path: str = 'utkface/', k: int = 5):
  
  df = load_dataset(ds_path)
  
  print(df)

  df = map_values(df)

  print(df)
  
  # Stratified KFold
  skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=1)

  X = df['filepath']
  y = df['gender'] + df['race']
  r = rd.randint(0, k - 1)
  train_idx = []
  test_idx = []

  for i, (train_index, test_index) in enumerate(skf.split(X, y)):
      if i == r:
          train_idx=train_index
          test_idx=test_index
          break

  train_data = df.iloc[train_idx].reset_index(drop=True)
  test_data = df.iloc[test_idx].reset_index(drop=True)

  print(train_data)
  print(test_data)

  return train_data, test_data


def load_dataset(ds_path: str):
  # Loading filenames
  filenames = os.listdir(ds_path)
  
  try:
      filenames.remove('.DS_Store')
  except:
      pass
  
  # Building the dataframe
  df = pd.DataFrame(filenames, columns = ['filename'] )
  df['filepath'] = df.filename.apply(lambda x: ds_path + x )
  df['age'] = df.filename.apply(lambda x: int(x.split('_')[0]))
  df['gender'] = df.filename.apply(lambda x: int(x.split('_')[1]))
  df['race'] = df.filename.apply(lambda x: int(x.split('_')[-2]))
  
  return df

def map_values(df: pd.DataFrame):
  for i in range(len(df)):
      df['gender'][i]= GENDER_MAPPER[df['gender'][i]]
      df['race'][i]= RACE_MAPPER[df['race'][i]]
  return df
    



In [4]:
%pip install ftfy regex tqdm
%pip install git+https://github.com/openai/CLIP.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ftfy
  Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ftfy
Successfully installed ftfy-6.1.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-0023eek_
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-0023eek_
  Resolved https://github.com/openai/CLIP.git to commit a9b1bf5920416aaeaec965c25dd9e8f98c864f16
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-n

In [5]:
from PIL import Image
import torch
from torch import nn, optim
import glob
import os
import pandas as pd
import json
import numpy as np
import clip
from torch.utils.data import Dataset, DataLoader, BatchSampler
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import random
from matplotlib.pyplot import imshow
import nltk, re, string, collections
from nltk.util import ngrams
import collections
from itertools import combinations

%matplotlib inline

BATCH_SIZE = 4
EPOCH = 10
EQ_ODDS_THRESHOLD = 0.15

# Preparing Model and Data

In [6]:
train_df, test_df = data_selection()


                                    filename  \
0       6_0_0_20170110215531428.jpg.chip.jpg   
1      27_1_3_20170117191505796.jpg.chip.jpg   
2      35_0_0_20170105163316787.jpg.chip.jpg   
3      35_1_1_20170112205159460.jpg.chip.jpg   
4      26_1_3_20170117174543231.jpg.chip.jpg   
...                                      ...   
23702  13_0_0_20170110232526929.jpg.chip.jpg   
23703  27_1_0_20170116173136538.jpg.chip.jpg   
23704  36_1_0_20170109134525414.jpg.chip.jpg   
23705  31_1_1_20170105001003876.jpg.chip.jpg   
23706  48_0_3_20170119151033366.jpg.chip.jpg   

                                            filepath  age  gender  race  
0       utkface/6_0_0_20170110215531428.jpg.chip.jpg    6       0     0  
1      utkface/27_1_3_20170117191505796.jpg.chip.jpg   27       1     3  
2      utkface/35_0_0_20170105163316787.jpg.chip.jpg   35       0     0  
3      utkface/35_1_1_20170112205159460.jpg.chip.jpg   35       1     1  
4      utkface/26_1_3_20170117174543231.jpg.chip.jpg 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['gender'][i]= GENDER_MAPPER[df['gender'][i]]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['race'][i]= RACE_MAPPER[df['race'][i]]


                                    filename  \
0       6_0_0_20170110215531428.jpg.chip.jpg   
1      27_1_3_20170117191505796.jpg.chip.jpg   
2      35_0_0_20170105163316787.jpg.chip.jpg   
3      35_1_1_20170112205159460.jpg.chip.jpg   
4      26_1_3_20170117174543231.jpg.chip.jpg   
...                                      ...   
23702  13_0_0_20170110232526929.jpg.chip.jpg   
23703  27_1_0_20170116173136538.jpg.chip.jpg   
23704  36_1_0_20170109134525414.jpg.chip.jpg   
23705  31_1_1_20170105001003876.jpg.chip.jpg   
23706  48_0_3_20170119151033366.jpg.chip.jpg   

                                            filepath  age  gender    race  
0       utkface/6_0_0_20170110215531428.jpg.chip.jpg    6    male   white  
1      utkface/27_1_3_20170117191505796.jpg.chip.jpg   27  female  indian  
2      utkface/35_0_0_20170105163316787.jpg.chip.jpg   35    male   white  
3      utkface/35_1_1_20170112205159460.jpg.chip.jpg   35  female   black  
4      utkface/26_1_3_20170117174543231.jpg

## Splitting 20% for Validation

In [7]:
train_df_temp = train_df.sample(frac=0.8)
validation_df = train_df.drop(train_df_temp.index).reset_index(drop=True)
train_df = train_df_temp.reset_index(drop=True)

train_df = train_df.sample(2000)
validation_df = validation_df.sample(400)
len(train_df), len(validation_df), len(test_df)

(2000, 400, 4741)

## Loading Pre-trained CLIP Model and Preprocessor

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/16", device=device, jit=False)

100%|███████████████████████████████████████| 335M/335M [00:04<00:00, 73.9MiB/s]


## UTKFaceDataset

In [9]:
class UTKFaceDataset(Dataset):
    def __init__(self, dataframe, preprocess):
        self.preprocess = preprocess
        self.filepath = dataframe["filepath"].tolist()
        self.filename = dataframe["filename"].tolist()
        self.gender = dataframe["gender"].tolist()
        self.race = dataframe["race"].tolist()
        self.age = dataframe["age"].tolist()
        self.preprocessed_cache = {}
        for path in self.filepath:
            self.preprocessed_cache[path] = self.preprocess(Image.open(path))

    def __len__(self):
        return len(self.filepath)

    def __getitem__(self, idx):
        filepath = self.filepath[idx]
        filename = self.filename[idx]
        gender = self.gender[idx]
        race = self.race[idx]
        age = self.age[idx]
        image = self.preprocessed_cache[filepath]
        return filepath, filename, gender, race, age, image

train_dataset = UTKFaceDataset(train_df, preprocess)
validation_dataset = UTKFaceDataset(validation_df, preprocess)
len(train_dataset), len(validation_dataset), train_dataset[0]

(2000,
 400,
 ('utkface/43_1_0_20170117155106415.jpg.chip.jpg',
  '43_1_0_20170117155106415.jpg.chip.jpg',
  'female',
  'white',
  43,
  tensor([[[-1.4127e+00, -1.4273e+00, -1.4419e+00,  ...,  1.9011e+00,
             1.9157e+00,  1.9157e+00],
           [-1.4127e+00, -1.4273e+00, -1.4565e+00,  ...,  1.9011e+00,
             1.9157e+00,  1.9157e+00],
           [-1.4127e+00, -1.4127e+00, -1.4273e+00,  ...,  1.9011e+00,
             1.9157e+00,  1.9157e+00],
           ...,
           [ 6.0188e-01,  6.4567e-01,  6.8947e-01,  ...,  1.9011e+00,
             1.9011e+00,  1.9157e+00],
           [ 5.5808e-01,  6.1648e-01,  6.4567e-01,  ...,  1.9011e+00,
             1.9011e+00,  1.9157e+00],
           [ 5.2889e-01,  5.8728e-01,  6.1648e-01,  ...,  1.9011e+00,
             1.9011e+00,  1.9157e+00]],
  
          [[-1.6621e+00, -1.6771e+00, -1.6771e+00,  ...,  2.0599e+00,
             2.0749e+00,  2.0749e+00],
           [-1.6621e+00, -1.6771e+00, -1.6771e+00,  ...,  2.0599e+00,
           

## BatchSampler

In [10]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size = BATCH_SIZE, shuffle=False)

In [11]:
for batch in train_dataloader:
    print(batch[2])
    break

('male', 'male', 'female', 'male')


# Training

In [24]:
attributes_queries = {}

race_labels = ['black', 'white', 'asian', 'indian', 'other']
for label in race_labels:
    attributes_queries[label] = 'A photo of a person of ' + label + ' race.'

gender_labels = ['male', 'female']
for label in gender_labels:
    attributes_queries[label] = 'A photo of a person of ' + label + ' gender.'

print(attributes_queries)

gender_texts = [attributes_queries[lbl] for lbl in gender_labels]
gender_texts = clip.tokenize(gender_texts).to(device)

{'black': 'A photo of a person of black race.', 'white': 'A photo of a person of white race.', 'asian': 'A photo of a person of asian race.', 'indian': 'A photo of a person of indian race.', 'other': 'A photo of a person of other race.', 'male': 'A photo of a person of male gender.', 'female': 'A photo of a person of female gender.'}


In [25]:
#https://github.com/openai/CLIP/issues/57
def convert_models_to_fp32(model): 
    for p in model.parameters(): 
        p.data = p.data.float() 
        p.grad.data = p.grad.data.float() 

if device == "cpu":
    model.float()

loss_img = nn.CrossEntropyLoss()
loss_txt = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_dataloader)*EPOCH)

In [26]:
def update_eq_odds_rates(rates, labels, logits, real_values):
    for i in range(len(real_values)):
        true_value = real_values[i]
        pred_idx = torch.argmax(logits[i])
        pred_value = labels[pred_idx]
        
        if true_value == labels[pred_idx]:
            rates[true_value]['tp'] += 1
        else:
            rates[true_value]['fn'] += 1
            rates[pred_value]['fp'] += 1

In [32]:
best_te_loss = 1e5
best_ep = -1

# Equalized odds vars
best_te_bias = -1
tpr = lambda tp, fn: tp/ (tp + fn)
fpr = lambda fp, fn: fp/ (fp + fn)

for epoch in range(EPOCH):
    print(f"running epoch {epoch}, best test loss {best_te_loss} after epoch {best_ep}")
    step = 0
    tr_loss = 0
    model.train()
    pbar = tqdm(train_dataloader, leave=False)
    for batch in pbar:
        step += 1
        optimizer.zero_grad()

        images = batch[-1]
        
        images = images.to(device)
        logits_per_image, _ = model(images, gender_texts)
        # print(logits_per_image.shape)
        # print(logits_per_image)
         # torch.arange(BATCH_SIZE).to(device)
        ground_truth = torch.zeros((BATCH_SIZE, len(gender_labels))).to(device) # torch.arange(BATCH_SIZE).to(device)
        
        for i in range(BATCH_SIZE):
            truth_idx = gender_labels.index(batch[2][i])
            ground_truth[i, truth_idx] = 1

        total_loss = loss_img(logits_per_image, ground_truth)
        total_loss.backward()
        tr_loss += total_loss.item()
        if device == "cpu":
            optimizer.step()
            scheduler.step()
        else:
            convert_models_to_fp32(model)
            optimizer.step()
            scheduler.step()
            clip.model.convert_weights(model)
        pbar.set_description(f"train batchCE: {total_loss.item()}", refresh=True)
    tr_loss /= step
    
    step = 0
    te_loss = 0
    rates = {'male': {'tp': 0, 'fp': 0, 'fn': 0}, 
             'female': {'tp': 0, 'fp': 0, 'fn': 0} }
    with torch.no_grad():
        model.eval()
        val_pbar = tqdm(validation_dataloader, leave=False)
        for batch in val_pbar:
            step += 1
            images = batch[-1]
            
            images = images.to(device)
            
            logits_per_image, logits_per_text = model(images, gender_texts)
            ground_truth = torch.zeros((BATCH_SIZE, len(gender_labels))).to(device) # torch.arange(BATCH_SIZE).to(device)
            
            for i in range(BATCH_SIZE):
                truth_idx = gender_labels.index(batch[2][i])
                ground_truth[i, truth_idx] = 1

            total_loss = loss_img(logits_per_image, ground_truth)
            te_loss += total_loss.item()
            
            update_eq_odds_rates(rates, gender_labels, logits_per_image, batch[2])
            
            val_pbar.set_description(f"test batchCE: {total_loss.item()}", refresh=True)
        te_loss /= step
        
    # Equalized odds calculation
    tpr_values = {label : 0 for label in rates.keys()}
    fpr_values = {label : 0 for label in rates.keys()}
    for label in rates.keys():
        label_rates = rates[label]
        tpr_values[label] = tpr(label_rates['tp'], label_rates['fn'])
        fpr_values[label] = fpr(label_rates['fp'], label_rates['fn'])
    
    equalized_odds = True
    for pair in combinations(tpr_values.keys(), 2):
        first_label = pair[0]
        second_label = pair[1]
        te_bias = [abs(tpr_values[first_label] - tpr_values[second_label]), abs(fpr_values[first_label] - fpr_values[second_label])]
        if max(te_bias[0], te_bias[1]) >= EQ_ODDS_THRESHOLD:
            equalized_odds = False
            break
    
    if te_loss < best_te_loss and equalized_odds: # maximize accuracy with fairness threshold
        best_te_loss = te_loss
        best_ep = epoch
        best_te_bias = te_bias
        torch.save(model.state_dict(), "best_model.pt")
    print(f"epoch {epoch}, tr_loss {tr_loss}, te_loss {te_loss}, te_bias {te_bias}")

torch.save(model.state_dict(), "last_model.pt")

running epoch 0, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 0, tr_loss 0.20831860953569412, te_loss 0.19161398887634276, te_bias [0.023492063492063564, 0.03448275862068967]
running epoch 1, best test loss 0.19161398887634276 after epoch 0


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 1, tr_loss 0.12252717256546021, te_loss 0.1913271963596344, te_bias [0.0006349206349206549, 0.12000000000000005]
running epoch 2, best test loss 0.1913271963596344 after epoch 1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 2, tr_loss 0.08054717004299164, te_loss 0.2788547143340111, te_bias [0.14603174603174607, 0.631578947368421]
running epoch 3, best test loss 0.1913271963596344 after epoch 1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 3, tr_loss 0.05970311480760574, te_loss 0.178642415702343, te_bias [0.0019047619047619646, 0.11111111111111116]
running epoch 4, best test loss 0.178642415702343 after epoch 3


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 4, tr_loss 0.027671914756298065, te_loss 0.22546054631471635, te_bias [0.03936507936507938, 0.1333333333333333]
running epoch 5, best test loss 0.178642415702343 after epoch 3


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 5, tr_loss 0.007327819645404816, te_loss 0.24901767820119858, te_bias [0.043809523809523854, 0.17241379310344823]
running epoch 6, best test loss 0.178642415702343 after epoch 3


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 6, tr_loss 0.002126490533351898, te_loss 0.2562493336200714, te_bias [0.043809523809523854, 0.17241379310344823]
running epoch 7, best test loss 0.178642415702343 after epoch 3


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 7, tr_loss 0.0018113685846328736, te_loss 0.258963919878006, te_bias [0.043809523809523854, 0.17241379310344823]
running epoch 8, best test loss 0.178642415702343 after epoch 3


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 8, tr_loss 0.001703640103340149, te_loss 0.2596202036738396, te_bias [0.043809523809523854, 0.17241379310344823]
running epoch 9, best test loss 0.178642415702343 after epoch 3


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 9, tr_loss 0.0016850371360778809, te_loss 0.2597071689367294, te_bias [0.043809523809523854, 0.17241379310344823]
