# Installing CLIP

In [109]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [110]:
!unzip gdrive/MyDrive/utkface.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: utkface/30_1_0_20170117143902329.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._30_1_0_20170117143902329.jpg.chip.jpg  
  inflating: utkface/41_0_0_20170116214404363.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._41_0_0_20170116214404363.jpg.chip.jpg  
  inflating: utkface/58_0_1_20170113193258196.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._58_0_1_20170113193258196.jpg.chip.jpg  
  inflating: utkface/36_1_1_20170116155035948.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._36_1_1_20170116155035948.jpg.chip.jpg  
  inflating: utkface/25_0_0_20170113181532013.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._25_0_0_20170113181532013.jpg.chip.jpg  
  inflating: utkface/66_1_0_20170110123144558.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._66_1_0_20170110123144558.jpg.chip.jpg  
  inflating: utkface/40_0_1_20170117134411786.jpg.chip.jpg  
  inflating: __MACOSX/utkface/._40_0_1_20170117134411786.jpg.chip.jpg  
  in

In [111]:
import os
import random as rd

import pandas as pd
from sklearn.model_selection import StratifiedKFold

RACE_MAPPER = {0:'white', 1:'black',2: 'asian',3: 'indian', 4:'other'}
GENDER_MAPPER = {0:'male',1:'female'}


def data_selection(ds_path: str = 'utkface/', k: int = 5):
  
  df = load_dataset(ds_path)
  
  print(df)

  df = map_values(df)

  print(df)
  
  # Stratified KFold
  skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=1)

  X = df['filepath']
  y = df['gender'] + df['race']
  r = rd.randint(0, k - 1)
  train_idx = []
  test_idx = []

  for i, (train_index, test_index) in enumerate(skf.split(X, y)):
      if i == r:
          train_idx=train_index
          test_idx=test_index
          break

  train_data = df.iloc[train_idx].reset_index(drop=True)
  test_data = df.iloc[test_idx].reset_index(drop=True)

  print(train_data)
  print(test_data)

  return train_data, test_data


def load_dataset(ds_path: str):
  # Loading filenames
  filenames = os.listdir(ds_path)
  
  try:
      filenames.remove('.DS_Store')
  except:
      pass
  
  # Building the dataframe
  df = pd.DataFrame(filenames, columns = ['filename'] )
  df['filepath'] = df.filename.apply(lambda x: ds_path + x )
  df['age'] = df.filename.apply(lambda x: int(x.split('_')[0]))
  df['gender'] = df.filename.apply(lambda x: int(x.split('_')[1]))
  df['race'] = df.filename.apply(lambda x: int(x.split('_')[-2]))
  
  return df

def map_values(df: pd.DataFrame):
  for i in range(len(df)):
      df['gender'][i]= GENDER_MAPPER[df['gender'][i]]
      df['race'][i]= RACE_MAPPER[df['race'][i]]
  return df
    



In [112]:
%pip install ftfy regex tqdm
%pip install git+https://github.com/openai/CLIP.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-5sksw79i
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-5sksw79i
  Resolved https://github.com/openai/CLIP.git to commit a9b1bf5920416aaeaec965c25dd9e8f98c864f16
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [113]:
from PIL import Image
import torch
from torch import nn, optim
import glob
import os
import pandas as pd
import json
import numpy as np
import clip
from torch.utils.data import Dataset, DataLoader, BatchSampler
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import random
from matplotlib.pyplot import imshow
import nltk, re, string, collections
from nltk.util import ngrams
import collections
from itertools import combinations

%matplotlib inline

BATCH_SIZE = 4
EPOCH = 30
EQ_ODDS_THRESHOLD = 0.15

# Preparing Model and Data

In [114]:
train_df, test_df = data_selection()
test_df.to_json(r'test_data_df.json')


                                    filename  \
0       6_0_0_20170110215531428.jpg.chip.jpg   
1      27_1_3_20170117191505796.jpg.chip.jpg   
2      35_0_0_20170105163316787.jpg.chip.jpg   
3      35_1_1_20170112205159460.jpg.chip.jpg   
4      26_1_3_20170117174543231.jpg.chip.jpg   
...                                      ...   
23702  13_0_0_20170110232526929.jpg.chip.jpg   
23703  27_1_0_20170116173136538.jpg.chip.jpg   
23704  36_1_0_20170109134525414.jpg.chip.jpg   
23705  31_1_1_20170105001003876.jpg.chip.jpg   
23706  48_0_3_20170119151033366.jpg.chip.jpg   

                                            filepath  age  gender  race  
0       utkface/6_0_0_20170110215531428.jpg.chip.jpg    6       0     0  
1      utkface/27_1_3_20170117191505796.jpg.chip.jpg   27       1     3  
2      utkface/35_0_0_20170105163316787.jpg.chip.jpg   35       0     0  
3      utkface/35_1_1_20170112205159460.jpg.chip.jpg   35       1     1  
4      utkface/26_1_3_20170117174543231.jpg.chip.jpg 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['gender'][i]= GENDER_MAPPER[df['gender'][i]]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['race'][i]= RACE_MAPPER[df['race'][i]]


                                    filename  \
0       6_0_0_20170110215531428.jpg.chip.jpg   
1      27_1_3_20170117191505796.jpg.chip.jpg   
2      35_0_0_20170105163316787.jpg.chip.jpg   
3      35_1_1_20170112205159460.jpg.chip.jpg   
4      26_1_3_20170117174543231.jpg.chip.jpg   
...                                      ...   
23702  13_0_0_20170110232526929.jpg.chip.jpg   
23703  27_1_0_20170116173136538.jpg.chip.jpg   
23704  36_1_0_20170109134525414.jpg.chip.jpg   
23705  31_1_1_20170105001003876.jpg.chip.jpg   
23706  48_0_3_20170119151033366.jpg.chip.jpg   

                                            filepath  age  gender    race  
0       utkface/6_0_0_20170110215531428.jpg.chip.jpg    6    male   white  
1      utkface/27_1_3_20170117191505796.jpg.chip.jpg   27  female  indian  
2      utkface/35_0_0_20170105163316787.jpg.chip.jpg   35    male   white  
3      utkface/35_1_1_20170112205159460.jpg.chip.jpg   35  female   black  
4      utkface/26_1_3_20170117174543231.jpg

## Splitting 20% for Validation

## Loading Pre-trained CLIP Model and Preprocessor

In [115]:
train_df_temp = train_df.sample(frac=0.8)
validation_df = train_df.drop(train_df_temp.index).reset_index(drop=True)
train_df = train_df_temp.reset_index(drop=True)

print(len(train_df))
train_df_males = train_df.loc[train_df['gender'] == 'male'].sample(1000)
train_df_females = train_df.loc[train_df['gender'] == 'female'].sample(1000)
train_df = pd.merge(train_df_males, train_df_females, how = 'outer')

valid_df_males = validation_df.loc[validation_df['gender'] == 'male'].sample(200)
valid_df_females = validation_df.loc[validation_df['gender'] == 'female'].sample(200)
validation_df = pd.merge(valid_df_males, valid_df_females, how = 'outer')

len(train_df), len(validation_df), len(test_df)

15172


(2000, 400, 4742)

In [116]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)

## UTKFaceDataset

In [117]:
class UTKFaceDataset(Dataset):
    def __init__(self, dataframe, preprocess):
        self.preprocess = preprocess
        self.filepath = dataframe["filepath"].tolist()
        self.filename = dataframe["filename"].tolist()
        self.gender = dataframe["gender"].tolist()
        self.race = dataframe["race"].tolist()
        self.age = dataframe["age"].tolist()
        self.preprocessed_cache = {}
        for path in self.filepath:
            self.preprocessed_cache[path] = self.preprocess(Image.open(path))

    def __len__(self):
        return len(self.filepath)

    def __getitem__(self, idx):
        filepath = self.filepath[idx]
        filename = self.filename[idx]
        gender = self.gender[idx]
        race = self.race[idx]
        age = self.age[idx]
        image = self.preprocessed_cache[filepath]
        return filepath, filename, gender, race, age, image

train_dataset = UTKFaceDataset(train_df, preprocess)
validation_dataset = UTKFaceDataset(validation_df, preprocess)
len(train_dataset), len(validation_dataset), train_dataset[0]

(2000,
 400,
 ('utkface/1_0_2_20161219155759221.jpg.chip.jpg',
  '1_0_2_20161219155759221.jpg.chip.jpg',
  'male',
  'asian',
  1,
  tensor([[[ 0.5143,  0.5581,  0.6311,  ..., -1.3397, -1.2083, -1.1353],
           [ 0.5435,  0.5873,  0.6603,  ..., -1.2959, -1.1937, -1.0915],
           [ 0.5873,  0.6311,  0.7041,  ..., -1.2521, -1.1207, -1.0477],
           ...,
           [ 0.6749,  0.6019,  0.5143,  ..., -1.2959, -1.2959, -1.2959],
           [ 0.7187,  0.6019,  0.5143,  ..., -1.2813, -1.2813, -1.2813],
           [ 0.7187,  0.6165,  0.5143,  ..., -1.2521, -1.2521, -1.2667]],
  
          [[ 0.1989,  0.2439,  0.3190,  ..., -0.2663, -0.0412,  0.1089],
           [ 0.2289,  0.2740,  0.3490,  ..., -0.2213,  0.0038,  0.1539],
           [ 0.2740,  0.3190,  0.3940,  ..., -0.1463,  0.0638,  0.2139],
           ...,
           [ 0.1539,  0.1389,  0.1389,  ...,  0.8292,  0.8292,  0.8292],
           [ 0.1239,  0.0789,  0.0638,  ...,  0.8442,  0.8442,  0.8442],
           [ 0.0939,  0.0488, 

## BatchSampler

In [118]:
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size = BATCH_SIZE, shuffle=False)

In [119]:
for batch in train_dataloader:
    print(batch[2])
    break

('female', 'male', 'male', 'female')


# Training

In [120]:
attributes_queries = {}

race_labels = ['black', 'white', 'asian', 'indian', 'other']
for label in race_labels:
    attributes_queries[label] = 'A photo of a person of ' + label + ' race.'

gender_labels = ['male', 'female']
for label in gender_labels:
    attributes_queries[label] = 'A photo of a person of ' + label + ' gender.'

print(attributes_queries)

gender_texts = [attributes_queries[lbl] for lbl in gender_labels]
gender_texts = clip.tokenize(gender_texts).to(device)

{'black': 'A photo of a person of black race.', 'white': 'A photo of a person of white race.', 'asian': 'A photo of a person of asian race.', 'indian': 'A photo of a person of indian race.', 'other': 'A photo of a person of other race.', 'male': 'A photo of a person of male gender.', 'female': 'A photo of a person of female gender.'}


In [121]:
#https://github.com/openai/CLIP/issues/57
def convert_models_to_fp32(model): 
    for p in model.parameters(): 
        p.data = p.data.float() 
        p.grad.data = p.grad.data.float() 

if device == "cpu":
    model.float()

loss_img = nn.CrossEntropyLoss()
loss_txt = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_dataloader)*EPOCH)

In [122]:
def update_eq_odds_rates(rates, labels, logits, real_values):
    for i in range(len(real_values)):
        true_value = real_values[i]
        pred_idx = torch.argmax(logits[i])
        pred_value = labels[pred_idx]
        
        if true_value == labels[pred_idx]:
            rates[true_value]['tp'] += 1
        else:
            rates[true_value]['fn'] += 1
            rates[pred_value]['fp'] += 1

In [123]:
best_te_loss = 1e5
best_ep = -1

# Equalized odds vars
best_te_bias = -1
tpr = lambda tp, fn: tp/ (tp + fn)
fpr = lambda fp, fn: fp/ (fp + fn)

for epoch in range(EPOCH):
    print(f"running epoch {epoch}, best test loss {best_te_loss} after epoch {best_ep}")
    step = 0
    tr_loss = 0
    model.train()
    pbar = tqdm(train_dataloader, leave=False)
    for batch in pbar:
        step += 1
        optimizer.zero_grad()

        images = batch[-1]
        
        images = images.to(device)
        logits_per_image, _ = model(images, gender_texts)
        # print(logits_per_image.shape)
        # print(logits_per_image)
         # torch.arange(BATCH_SIZE).to(device)
        ground_truth = torch.zeros((BATCH_SIZE, len(gender_labels))).to(device) # torch.arange(BATCH_SIZE).to(device)
        
        for i in range(BATCH_SIZE):
            truth_idx = gender_labels.index(batch[2][i])
            ground_truth[i, truth_idx] = 1

        total_loss = loss_img(logits_per_image, ground_truth)
        total_loss.backward()
        tr_loss += total_loss.item()
        if device == "cpu":
            optimizer.step()
            scheduler.step()
        else:
            convert_models_to_fp32(model)
            optimizer.step()
            scheduler.step()
            clip.model.convert_weights(model)
        pbar.set_description(f"train batchCE: {total_loss.item()}", refresh=True)
    tr_loss /= step
    
    step = 0
    te_loss = 0
    rates = {'male': {'tp': 0, 'fp': 0, 'fn': 0}, 
             'female': {'tp': 0, 'fp': 0, 'fn': 0} }
    with torch.no_grad():
        model.eval()
        val_pbar = tqdm(validation_dataloader, leave=False)
        for batch in val_pbar:
            step += 1
            images = batch[-1]
            
            images = images.to(device)
            
            logits_per_image, logits_per_text = model(images, gender_texts)
            ground_truth = torch.zeros((BATCH_SIZE, len(gender_labels))).to(device) # torch.arange(BATCH_SIZE).to(device)
            
            for i in range(BATCH_SIZE):
                truth_idx = gender_labels.index(batch[2][i])
                ground_truth[i, truth_idx] = 1

            total_loss = loss_img(logits_per_image, ground_truth)
            te_loss += total_loss.item()
            
            update_eq_odds_rates(rates, gender_labels, logits_per_image, batch[2])
            
            val_pbar.set_description(f"test batchCE: {total_loss.item()}", refresh=True)
        te_loss /= step
        
    # Equalized odds calculation
    tpr_values = {label : 0 for label in rates.keys()}
    fpr_values = {label : 0 for label in rates.keys()}
    for label in rates.keys():
        label_rates = rates[label]
        tpr_values[label] = tpr(label_rates['tp'], label_rates['fn'])
        fpr_values[label] = fpr(label_rates['fp'], label_rates['fn'])
    
    equalized_odds = True
    for pair in combinations(tpr_values.keys(), 2):
        first_label = pair[0]
        second_label = pair[1]
        te_bias = [abs(tpr_values[first_label] - tpr_values[second_label]), abs(fpr_values[first_label] - fpr_values[second_label])]
        if max(te_bias[0], te_bias[1]) >= EQ_ODDS_THRESHOLD:
            equalized_odds = False
            break
    
    if te_loss < best_te_loss and equalized_odds: # maximize accuracy with fairness threshold
        best_te_loss = te_loss
        best_ep = epoch
        best_te_bias = te_bias
        best_model = model
    print(f"epoch {epoch}, tr_loss {tr_loss}, te_loss {te_loss}, te_bias {te_bias}")

torch.save(best_model.cpu().state_dict(), "best_model.pt")
torch.save(model.state_dict(), "last_model.pt")

running epoch 0, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 0, tr_loss 0.28928751730918884, te_loss 0.21649759292602538, te_bias [0.04499999999999993, 0.31034482758620685]
running epoch 1, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 1, tr_loss 0.2154399623274803, te_loss 0.34283639907836916, te_bias [0.275, 0.9649122807017543]
running epoch 2, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 2, tr_loss 0.16210210001468658, te_loss 0.22188000679016112, te_bias [0.10999999999999999, 0.6470588235294117]
running epoch 3, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 3, tr_loss 0.10422754526138306, te_loss 0.19533493041992187, te_bias [0.06499999999999995, 0.48148148148148145]
running epoch 4, best test loss 100000.0 after epoch -1


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 4, tr_loss 0.09851403450965882, te_loss 0.15490204900503157, te_bias [0.009999999999999898, 0.08333333333333331]
running epoch 5, best test loss 0.15490204900503157 after epoch 4


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 5, tr_loss 0.10205958944559097, te_loss 0.11324031472206116, te_bias [0.0050000000000000044, 0.05882352941176472]
running epoch 6, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 6, tr_loss 0.06921983033418655, te_loss 0.3588233375549316, te_bias [0.30000000000000004, 0.967741935483871]
running epoch 7, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 7, tr_loss 0.08038020944595337, te_loss 0.13941966831684113, te_bias [0.010000000000000009, 0.125]
running epoch 8, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 8, tr_loss 0.024816462099552156, te_loss 0.22847598075866699, te_bias [0.04499999999999993, 0.31034482758620685]
running epoch 9, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 9, tr_loss 0.038566496908664705, te_loss 0.15946656227111816, te_bias [0.019999999999999907, 0.16666666666666669]
running epoch 10, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 10, tr_loss 0.016788961291313173, te_loss 0.2151779380440712, te_bias [0.02499999999999991, 0.19999999999999996]
running epoch 11, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 11, tr_loss 0.03384245854616165, te_loss 0.3000289970636368, te_bias [0.025000000000000022, 0.11627906976744184]
running epoch 12, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 12, tr_loss 0.048627881109714506, te_loss 0.17113025546073912, te_bias [0.07999999999999996, 0.6153846153846154]
running epoch 13, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 13, tr_loss 0.012975196182727813, te_loss 0.35278669238090515, te_bias [0.16000000000000003, 0.7619047619047619]
running epoch 14, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 14, tr_loss 0.004072285830974579, te_loss 0.17120524525642394, te_bias [0.020000000000000018, 0.19999999999999996]
running epoch 15, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 15, tr_loss 0.0004314733147621155, te_loss 0.17561925768852235, te_bias [0.015000000000000013, 0.1578947368421053]
running epoch 16, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 16, tr_loss 0.0002740484476089478, te_loss 0.18030744284391403, te_bias [0.015000000000000013, 0.1578947368421053]
running epoch 17, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 17, tr_loss 0.00021650749444961548, te_loss 0.1852247902750969, te_bias [0.015000000000000013, 0.1578947368421053]
running epoch 18, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 18, tr_loss 0.00017478221654891967, te_loss 0.1892159751057625, te_bias [0.015000000000000013, 0.1578947368421053]
running epoch 19, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 19, tr_loss 0.0001435946226119995, te_loss 0.1927396434545517, te_bias [0.015000000000000013, 0.1578947368421053]
running epoch 20, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 20, tr_loss 0.000121768057346344, te_loss 0.19584856420755387, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 21, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 21, tr_loss 0.00010657626390457154, te_loss 0.1982831683754921, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 22, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 22, tr_loss 9.38020944595337e-05, te_loss 0.20057363152503968, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 23, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 23, tr_loss 8.592963218688965e-05, te_loss 0.20229069352149964, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 24, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 24, tr_loss 7.965081930160522e-05, te_loss 0.20335564464330674, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 25, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 25, tr_loss 7.556694746017457e-05, te_loss 0.20444774210453034, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 26, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 26, tr_loss 7.292991876602172e-05, te_loss 0.20481952428817748, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 27, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 27, tr_loss 7.199406623840331e-05, te_loss 0.2052006071805954, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 28, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 28, tr_loss 7.152092456817627e-05, te_loss 0.2052994403243065, te_bias [0.010000000000000009, 0.11111111111111116]
running epoch 29, best test loss 0.11324031472206116 after epoch 5


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

epoch 29, tr_loss 7.155638933181763e-05, te_loss 0.20517626851797105, te_bias [0.010000000000000009, 0.11111111111111116]
