In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/"

/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect


In [3]:
## Additional External Libraries (Deep Learning)
import time
import datetime
import torchvision
import torch
import random
from sklearn.model_selection import train_test_split
from skimage import io
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms as tfs
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader,SubsetRandomSampler
from PIL import Image
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
from torchsummary import summary
import torchvision.models as models

gpu_boole = torch.cuda.is_available()

In [4]:
class VGG16(nn.Module):
  def __init__(self, in_channels=3):
    super(VGG16, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.vgg16(pretrained=True)

    self.model.conv1 = nn.Conv2d(in_channels, 64,3)
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.classifier[6].in_features
    self.model.classifier[6] = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [5]:
class ResNet101(nn.Module):
  def __init__(self, in_channels=3):
    super(ResNet101, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet101(pretrained=True)

    self.model.conv1 = nn.Conv2d(in_channels, 64,3)
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [6]:
import os
from os.path import isfile, join

crop_path = "/content/drive/MyDrive/JHU_courses/ML_class/Final/Dataset/UTKFace/"

crop_list = [f for f in os.listdir(crop_path) if isfile(join(crop_path,f)) and f.endswith(".jpg")]

In [7]:
import pandas as pd 

age_list = []
gender_list = []
race_list = []

for crop_img_name in crop_list:
  
  img_name = crop_img_name.split("_")

  age_list.append(int(img_name[0]))
  gender_list.append(img_name[1])
  race_list.append(img_name[2])

df = pd.DataFrame({
    "img":crop_list,
    "age":age_list,
    "gender":gender_list,
    "race":race_list
})

df

Unnamed: 0,img,age,gender,race
0,56_0_3_20170119180916188.jpg.chip.jpg,56,0,3
1,56_0_3_20170119183551541.jpg.chip.jpg,56,0,3
2,56_0_2_20170119180737540.jpg.chip.jpg,56,0,2
3,56_0_3_20170119174930466.jpg.chip.jpg,56,0,3
4,56_0_3_20170119180731484.jpg.chip.jpg,56,0,3
...,...,...,...,...
23703,1_0_2_20161219202914180.jpg.chip.jpg,1,0,2
23704,1_1_0_20161219202919508.jpg.chip.jpg,1,1,0
23705,2_1_2_20161219202825380.jpg.chip.jpg,2,1,2
23706,5_0_2_20161219202809316.jpg.chip.jpg,5,0,2


### race0 data

In [8]:
race_num = "0"

race_num_list0 = df.index[df['race'] == race_num].tolist()

df_race0 = df.iloc[race_num_list0]

In [9]:
# age_20_50_index = df_race.index[(df_race['age'] < 50)&(df_race['age'] >= 20)]
# df_20_50_race = df.iloc[age_20_50_index]
df_20_50_race0 = df_race0

In [10]:
age_range_list = []

for age in df_20_50_race0["age"]:
  if(int(age)>=0 and int(age) < 10):
      # set 0-10 as class 0
      age_range_list.append(0)
  elif(int(age)>=10 and int(age) < 20):
      # set 10-20 as class 1
      age_range_list.append(1)
  elif(int(age)>=20 and int(age) < 30):
      # set 20-30 as class 2
      age_range_list.append(2)
  elif(int(age)>=30 and int(age) < 40):
      # set 30-40 as class 3
      age_range_list.append(3)
  elif(int(age)>=40 and int(age) < 50):
      # set 40-50 as class 4
      age_range_list.append(4)
  elif(int(age)>=50 and int(age) < 60):
      # set 50-60 as class 5
      age_range_list.append(5)
  elif(int(age) >= 60):
      # set 60> as class 5
      age_range_list.append(6)

df_20_50_race0["age_range"] = age_range_list
df_20_50_race0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,img,age,gender,race,age_range
7,56_1_0_20170119202536518.jpg.chip.jpg,56,1,0,5
27,58_0_0_20170119211727401.jpg.chip.jpg,58,0,0,5
58,59_0_0_20170119211444609.jpg.chip.jpg,59,0,0,5
59,60_0_0_20170119204301455.jpg.chip.jpg,60,0,0,6
60,60_0_0_20170119145422774.jpg.chip.jpg,60,0,0,6
...,...,...,...,...,...
23685,7_0_0_20161219201514284.jpg.chip.jpg,7,0,0,0
23695,1_0_0_20161219202455708.jpg.chip.jpg,1,0,0,0
23698,1_0_0_20161219203009924.jpg.chip.jpg,1,0,0,0
23699,1_1_0_20161219203021180.jpg.chip.jpg,1,1,0,0


In [11]:
df_20_50_race0["age_range"].value_counts()

2    1986
6    1811
3    1749
5    1368
4    1115
0    1106
1     943
Name: age_range, dtype: int64

In [12]:
randomlist = random.sample(range(len(df_20_50_race0)), 1000)
df_rand0 = df_20_50_race0.iloc[randomlist]
df_rand0

Unnamed: 0,img,age,gender,race,age_range
17737,80_0_0_20170111211457352.jpg.chip.jpg,80,0,0,6
2052,20_1_0_20170119153717575.jpg.chip.jpg,20,1,0,2
14913,66_1_0_20170110140921650.jpg.chip.jpg,66,1,0,6
20878,53_1_0_20170104184642790.jpg.chip.jpg,53,1,0,5
17184,54_0_0_20170111201637797.jpg.chip.jpg,54,0,0,5
...,...,...,...,...,...
11298,25_0_0_20170113181532013.jpg.chip.jpg,25,0,0,2
20949,64_0_0_20170104185632214.jpg.chip.jpg,64,0,0,6
4003,26_1_0_20170116235548284.jpg.chip.jpg,26,1,0,2
7777,55_0_0_20170117190131875.jpg.chip.jpg,55,0,0,5


### race1 data

In [13]:
race_num = "1"

race1_num_list = df.index[df['race'] == race_num].tolist()

df_race1 = df.iloc[race1_num_list]

In [14]:
# age_20_50_index = df_race.index[(df_race['age'] < 50)&(df_race['age'] >= 20)]
# df_20_50_race = df.iloc[age_20_50_index]
df_20_50_race1 = df_race1

In [15]:
age_range_list = []

for age in df_20_50_race1["age"]:
  if(int(age)>=0 and int(age) < 10):
      # set 0-10 as class 0
      age_range_list.append(0)
  elif(int(age)>=10 and int(age) < 20):
      # set 10-20 as class 1
      age_range_list.append(1)
  elif(int(age)>=20 and int(age) < 30):
      # set 20-30 as class 2
      age_range_list.append(2)
  elif(int(age)>=30 and int(age) < 40):
      # set 30-40 as class 3
      age_range_list.append(3)
  elif(int(age)>=40 and int(age) < 50):
      # set 40-50 as class 4
      age_range_list.append(4)
  elif(int(age)>=50 and int(age) < 60):
      # set 50-60 as class 5
      age_range_list.append(5)
  elif(int(age) >= 60):
      # set 60> as class 5
      age_range_list.append(6)

df_20_50_race1["age_range"] = age_range_list
df_20_50_race1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,img,age,gender,race,age_range
65,60_0_1_20170119204250215.jpg.chip.jpg,60,0,1,6
167,70_1_1_20170119205140215.jpg.chip.jpg,70,1,1,6
171,80_1_1_20170119210040345.jpg.chip.jpg,80,1,1,6
194,25_0_1_20170116003121821.jpg.chip.jpg,25,0,1,2
203,26_1_1_20170116022038426.jpg.chip.jpg,26,1,1,2
...,...,...,...,...,...
23282,1_1_1_20161219155940125.jpg.chip.jpg,1,1,1,0
23287,4_1_1_20161219160501453.jpg.chip.jpg,4,1,1,0
23291,4_1_1_20161219160210758.jpg.chip.jpg,4,1,1,0
23296,1_0_1_20161219160115237.jpg.chip.jpg,1,0,1,0


In [16]:
df_20_50_race1["age_range"].value_counts()

2    1858
3    1258
4     422
6     345
5     307
1     182
0     154
Name: age_range, dtype: int64

In [17]:
randomlist = random.sample(range(len(df_20_50_race1)), 1000)
df_rand1 = df_20_50_race1.iloc[randomlist]
df_rand1

Unnamed: 0,img,age,gender,race,age_range
5903,26_0_1_20170116182254548.jpg.chip.jpg,26,0,1,2
3003,35_1_1_20170117154240728.jpg.chip.jpg,35,1,1,3
4484,28_0_1_20170117020752716.jpg.chip.jpg,28,0,1,2
9604,26_1_1_20170117201147190.jpg.chip.jpg,26,1,1,2
14154,4_1_1_20170109194530016.jpg.chip.jpg,4,1,1,0
...,...,...,...,...,...
12415,35_0_1_20170113134244873.jpg.chip.jpg,35,0,1,3
4991,35_1_1_20170117154025584.jpg.chip.jpg,35,1,1,3
9813,35_1_1_20170117201515542.jpg.chip.jpg,35,1,1,3
4910,34_1_1_20170116231528550.jpg.chip.jpg,34,1,1,3


### race2 data

In [18]:
race_num = "2"

race2_num_list = df.index[df['race'] == race_num].tolist()

df_race2 = df.iloc[race2_num_list]

In [19]:
# age_20_50_index = df_race.index[(df_race['age'] < 50)&(df_race['age'] >= 20)]
# df_20_50_race = df.iloc[age_20_50_index]
df_20_50_race2 = df_race2

In [20]:
age_range_list = []

for age in df_20_50_race2["age"]:
  if(int(age)>=0 and int(age) < 10):
      # set 0-10 as class 0
      age_range_list.append(0)
  elif(int(age)>=10 and int(age) < 20):
      # set 10-20 as class 1
      age_range_list.append(1)
  elif(int(age)>=20 and int(age) < 30):
      # set 20-30 as class 2
      age_range_list.append(2)
  elif(int(age)>=30 and int(age) < 40):
      # set 30-40 as class 3
      age_range_list.append(3)
  elif(int(age)>=40 and int(age) < 50):
      # set 40-50 as class 4
      age_range_list.append(4)
  elif(int(age)>=50 and int(age) < 60):
      # set 50-60 as class 5
      age_range_list.append(5)
  elif(int(age) >= 60):
      # set 60> as class 5
      age_range_list.append(6)

df_20_50_race2["age_range"] = age_range_list
df_20_50_race2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,img,age,gender,race,age_range
2,56_0_2_20170119180737540.jpg.chip.jpg,56,0,2,5
128,65_0_2_20170119205814303.jpg.chip.jpg,65,0,2,6
179,80_1_2_20170119211723865.jpg.chip.jpg,80,1,2,6
185,35_0_2_20170119180256157.jpg.chip.jpg,35,0,2,3
189,28_0_2_20170119180256157.jpg.chip.jpg,28,0,2,2
...,...,...,...,...,...
23702,1_0_2_20161219202906108.jpg.chip.jpg,1,0,2,0
23703,1_0_2_20161219202914180.jpg.chip.jpg,1,0,2,0
23705,2_1_2_20161219202825380.jpg.chip.jpg,2,1,2,0
23706,5_0_2_20161219202809316.jpg.chip.jpg,5,0,2,0


In [21]:
df_20_50_race1["age_range"].value_counts()

2    1858
3    1258
4     422
6     345
5     307
1     182
0     154
Name: age_range, dtype: int64

In [22]:
randomlist = random.sample(range(len(df_20_50_race2)), 1000)
df_rand2 = df_20_50_race2.iloc[randomlist]
df_rand2

Unnamed: 0,img,age,gender,race,age_range
10967,32_0_2_20170116184944206.jpg.chip.jpg,32,0,2,3
10762,29_0_2_20170116161810177.jpg.chip.jpg,29,0,2,2
20411,27_1_2_20170104021735596.jpg.chip.jpg,27,1,2,2
7126,32_0_2_20170117182411900.jpg.chip.jpg,32,0,2,3
11691,22_1_2_20170116162944219.jpg.chip.jpg,22,1,2,2
...,...,...,...,...,...
13594,83_0_2_20170112224652531.jpg.chip.jpg,83,0,2,6
11782,22_1_2_20170116172706771.jpg.chip.jpg,22,1,2,2
10254,26_0_2_20170116175904702.jpg.chip.jpg,26,0,2,2
10164,26_0_2_20170116163925116.jpg.chip.jpg,26,0,2,2


### race3 data

In [23]:
race_num = "3"

race3_num_list = df.index[df['race'] == race_num].tolist()

df_race3 = df.iloc[race3_num_list]

In [24]:
# age_20_50_index = df_race.index[(df_race['age'] < 50)&(df_race['age'] >= 20)]
# df_20_50_race = df.iloc[age_20_50_index]
df_20_50_race3 = df_race3

In [25]:
age_range_list = []

for age in df_20_50_race3["age"]:
  if(int(age)>=0 and int(age) < 10):
      # set 0-10 as class 0
      age_range_list.append(0)
  elif(int(age)>=10 and int(age) < 20):
      # set 10-20 as class 1
      age_range_list.append(1)
  elif(int(age)>=20 and int(age) < 30):
      # set 20-30 as class 2
      age_range_list.append(2)
  elif(int(age)>=30 and int(age) < 40):
      # set 30-40 as class 3
      age_range_list.append(3)
  elif(int(age)>=40 and int(age) < 50):
      # set 40-50 as class 4
      age_range_list.append(4)
  elif(int(age)>=50 and int(age) < 60):
      # set 50-60 as class 5
      age_range_list.append(5)
  elif(int(age) >= 60):
      # set 60> as class 5
      age_range_list.append(6)

df_20_50_race3["age_range"] = age_range_list
df_20_50_race3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,img,age,gender,race,age_range
0,56_0_3_20170119180916188.jpg.chip.jpg,56,0,3,5
1,56_0_3_20170119183551541.jpg.chip.jpg,56,0,3,5
3,56_0_3_20170119174930466.jpg.chip.jpg,56,0,3,5
4,56_0_3_20170119180731484.jpg.chip.jpg,56,0,3,5
5,56_0_3_20170119152358239.jpg.chip.jpg,56,0,3,5
...,...,...,...,...,...
22809,1_0_3_20161220223247506.jpg.chip.jpg,1,0,3,0
22810,1_0_3_20161220223250813.jpg.chip.jpg,1,0,3,0
22811,5_0_3_20161220223303987.jpg.chip.jpg,5,0,3,0
22813,4_1_3_20161220223310227.jpg.chip.jpg,4,1,3,0


In [26]:
df_20_50_race3["age_range"].value_counts()

2    1499
3     722
0     489
5     452
4     431
6     233
1     149
Name: age_range, dtype: int64

In [27]:
randomlist = random.sample(range(len(df_20_50_race3)), 1000)
df_rand3 = df_20_50_race3.iloc[randomlist]
df_rand3

Unnamed: 0,img,age,gender,race,age_range
22388,5_1_3_20161219225406048.jpg.chip.jpg,5,1,3,0
1617,45_0_3_20170119212016521.jpg.chip.jpg,45,0,3,4
7415,36_0_3_20170117183310510.jpg.chip.jpg,36,0,3,3
19584,26_0_3_20170104215715094.jpg.chip.jpg,26,0,3,2
116,63_0_3_20170119211952608.jpg.chip.jpg,63,0,3,6
...,...,...,...,...,...
14781,57_1_3_20170109132601018.jpg.chip.jpg,57,1,3,5
19746,25_1_3_20170104222350215.jpg.chip.jpg,25,1,3,2
18127,21_1_3_20170104231633619.jpg.chip.jpg,21,1,3,2
204,51_0_3_20170119175402515.jpg.chip.jpg,51,0,3,5


## Dataloader

In [28]:
img_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.PILToTensor(),
])


class Crop_Face_Data(Dataset):
    def __init__(self, 
                 X_train,
                 y_train,
                 data_dir,
                 transforms = img_transform, 
                 use_memory = False):
      #if load all data into memeory 
      self.use_memory = use_memory
      self.transform = transforms
      self.data = []
      self.labels = []
      for i,img_name in enumerate(X_train):
        img_path = join(data_dir, img_name)
        if self.use_memory:
          self.data.append(Image.open(img_path))
        else:
          self.data.append(img_path)
        self.labels.append(y_train.tolist()[i])

    def __len__(self):
      return len(self.labels)

    def __getitem__(self, idx):
      if self.use_memory:
        img = self.data[idx]
      else:
        img = Image.open(self.data[idx])
      if self.transform:
        img = self.transform(img)
      return img, float(self.labels[idx])

race0_testing_dataset = Crop_Face_Data(X_train = df_rand0["img"], 
                                       y_train = df_rand0["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race0_testing_dataset_dataloader = DataLoader(race0_testing_dataset, batch_size=1, shuffle=True)

race1_testing_dataset = Crop_Face_Data(X_train = df_rand1["img"], 
                                       y_train = df_rand1["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race1_testing_dataset_dataloader = DataLoader(race1_testing_dataset, batch_size=1, shuffle=True)

race2_testing_dataset = Crop_Face_Data(X_train = df_rand2["img"], 
                                       y_train = df_rand2["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race2_testing_dataset_dataloader = DataLoader(race2_testing_dataset, batch_size=1, shuffle=True)

race3_testing_dataset = Crop_Face_Data(X_train = df_rand3["img"], 
                                       y_train = df_rand3["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race3_testing_dataset_dataloader = DataLoader(race3_testing_dataset, batch_size=1, shuffle=True)

In [29]:
race0_testing_dataset = Crop_Face_Data(X_train = df_20_50_race0["img"], 
                                       y_train = df_20_50_race0["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race0_testing_dataset_dataloader = DataLoader(race0_testing_dataset, batch_size=1, shuffle=True)

race1_testing_dataset = Crop_Face_Data(X_train = df_20_50_race1["img"], 
                                       y_train = df_20_50_race1["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race1_testing_dataset_dataloader = DataLoader(race1_testing_dataset, batch_size=1, shuffle=True)

race2_testing_dataset = Crop_Face_Data(X_train = df_20_50_race2["img"], 
                                       y_train = df_20_50_race2["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race2_testing_dataset_dataloader = DataLoader(race2_testing_dataset, batch_size=1, shuffle=True)

race3_testing_dataset = Crop_Face_Data(X_train = df_20_50_race3["img"], 
                                       y_train = df_20_50_race3["age_range"], 
                                       data_dir = crop_path, 
                                       transforms=img_transform, 
                                       use_memory = True)
race3_testing_dataset_dataloader = DataLoader(race3_testing_dataset, batch_size=1, shuffle=True)

In [30]:
def testing(test_dataloader,model,model_name,optimizer,loss,race_x_on_race_x):

    net = model.cuda()

    optimizer = optimizer(net.parameters(), lr = 0.001)
    loss_metric = loss()

    with torch.no_grad():

        correct = 0
        total = 0
        loss_sum = 0

        for i, (x,y) in enumerate(test_dataloader):

          x = x.float()
          y = y.float()

          if gpu_boole:
            x = x.cuda()
            y = y.cuda()

          outputs = net.forward(x)
          loss = loss_metric(outputs,y.to(torch.int64))

          total += y.size(0)
          _, predicted = torch.max(outputs.data, 1)
          correct += (predicted.float() == y.float()).sum()

          loss_sum += loss

        test_acc, test_loss = correct / total, loss_sum.cpu().data.numpy().item()/total

        print("-"*20)
        print(f'{race_x_on_race_x} {model_name} test accuracy: %f %%' % (100.0 * test_acc))
        print(f'{race_x_on_race_x} {model_name} test loss: %f' % (test_loss))
        print("-"*20)

# Race0

In [31]:
class VGG16(nn.Module):
  def __init__(self, in_channels=3):
    super(VGG16, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.vgg16(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.classifier[6].in_features
    self.model.classifier[6] = nn.Sequential(
        nn.Linear(num_ftrs, 1024),
        nn.ReLU(),
        nn.Linear(1024, 512),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(512, 7))

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [32]:
class ResNet101(nn.Module):
  def __init__(self, in_channels=3):
    super(ResNet101, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet18(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    #i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Sequential(
        nn.Linear(num_ftrs,512),
        nn.LeakyReLU(),
        nn.Dropout(0.8),
        nn.Linear(512,256),
        nn.LeakyReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, 7))
    #self.model.fc = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [33]:
VGG_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_0_checkpts/VGG16_race_0_age_best.pt"
resnet101_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_0_checkpts/resnet101_race_0_age_best.pt"

VGG_0 = VGG16()
ResNet101_0 = ResNet101()

VGG_0.load_state_dict(torch.load(VGG_PATH)["model_state_dict"])
ResNet101_0.load_state_dict(torch.load(resnet101_PATH)["model_state_dict"])

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

<All keys matched successfully>

In [34]:
optimizer = optim.Adam
loss_f = nn.NLLLoss

### VGG

In [35]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = VGG_0,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_0")

--------------------
race_0_on_race_0 VGG test accuracy: 16.660053 %
race_0_on_race_0 VGG test loss: 43.268620
--------------------


In [36]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = VGG_0,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_1")

--------------------
race_0_on_race_1 VGG test accuracy: 8.440124 %
race_0_on_race_1 VGG test loss: 49.500100
--------------------


In [37]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = VGG_0,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_2")

--------------------
race_0_on_race_2 VGG test accuracy: 6.377403 %
race_0_on_race_2 VGG test loss: 54.235745
--------------------


In [38]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = VGG_0,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_3")

--------------------
race_0_on_race_3 VGG test accuracy: 6.389937 %
race_0_on_race_3 VGG test loss: 49.083546
--------------------


### ResNet101

In [39]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = ResNet101_0,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_0")

--------------------
race_0_on_race_0 ResNet101 test accuracy: 17.642389 %
race_0_on_race_0 ResNet101 test loss: 2.785083
--------------------


In [40]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = ResNet101_0,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_1")

--------------------
race_0_on_race_1 ResNet101 test accuracy: 28.303137 %
race_0_on_race_1 ResNet101 test loss: 1.878405
--------------------


In [41]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = ResNet101_0,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_2")

--------------------
race_0_on_race_2 ResNet101 test accuracy: 17.384975 %
race_0_on_race_2 ResNet101 test loss: 2.592159
--------------------


In [42]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = ResNet101_0,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_0_on_race_3")

--------------------
race_0_on_race_3 ResNet101 test accuracy: 19.345911 %
race_0_on_race_3 ResNet101 test loss: 2.297959
--------------------


# Race1

In [43]:
class VGG16(nn.Module):
  def __init__(self, in_channels=3):
    super(VGG16, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.vgg16(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.classifier[6].in_features
    self.model.classifier[6] = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [44]:
class ResNet101(nn.Module):
  def __init__(self, in_channels=3):
    super(ResNet101, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet101(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    #i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [45]:
VGG_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_1_checkpts/VGG16_race_1_age_best.pt"
resnet101_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_1_checkpts/resnet101_race_1_age_best_harry.pt"

VGG_1 = VGG16()
ResNet101_1 = ResNet101()

VGG_1.load_state_dict(torch.load(VGG_PATH)["model_state_dict"])
ResNet101_1.load_state_dict(torch.load(resnet101_PATH)["model_state_dict"])

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

<All keys matched successfully>

In [46]:
optimizer = optim.Adam
loss_f = nn.NLLLoss

### VGG

In [47]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = VGG_1,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_0")

--------------------
race_1_on_race_0 VGG test accuracy: 37.924191 %
race_1_on_race_0 VGG test loss: 5.899930
--------------------


In [48]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = VGG_1,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_1")

--------------------
race_1_on_race_1 VGG test accuracy: 80.424210 %
race_1_on_race_1 VGG test loss: 1.992794
--------------------


In [49]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = VGG_1,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_2")

--------------------
race_1_on_race_2 VGG test accuracy: 53.756554 %
race_1_on_race_2 VGG test loss: 4.674223
--------------------


In [50]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = VGG_1,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_3")

--------------------
race_1_on_race_3 VGG test accuracy: 44.628929 %
race_1_on_race_3 VGG test loss: 5.532811
--------------------


### ResNet101

In [51]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = ResNet101_1,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_0")

--------------------
race_1_on_race_0 ResNet101 test accuracy: 19.706291 %
race_1_on_race_0 ResNet101 test loss: 4.734182
--------------------


In [52]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = ResNet101_1,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_1")

--------------------
race_1_on_race_1 ResNet101 test accuracy: 41.051701 %
race_1_on_race_1 ResNet101 test loss: 2.675161
--------------------


In [53]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = ResNet101_1,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_2")

In [54]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = ResNet101_1,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_1_on_race_3")

--------------------
race_1_on_race_3 ResNet101 test accuracy: 37.710690 %
race_1_on_race_3 ResNet101 test loss: 3.334293
--------------------


# Race2

In [55]:
class VGG16(nn.Module):
  def __init__(self, in_channels=3):
    super(VGG16, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.vgg16(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.classifier[6].in_features
    self.model.classifier[6] = nn.Sequential(
        nn.Linear(num_ftrs, 1024),
        nn.ReLU(),
        nn.Linear(1024, 512),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(512, 7))

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [56]:
class ResNet101(nn.Module):
  def __init__(self, in_channels=3):
    super(ResNet101, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet18(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    #i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Sequential(
        nn.Linear(num_ftrs,512),
        nn.LeakyReLU(),
        nn.Dropout(0.8),
        nn.Linear(512,256),
        nn.LeakyReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, 7))
    #self.model.fc = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [57]:
VGG_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_2_checkpts/VGG16_race_2_age_best.pt"
resnet101_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_2_checkpts/resnet101_race_2_age_best.pt"

VGG_2 = VGG16()
ResNet101_2 = ResNet101()

VGG_2.load_state_dict(torch.load(VGG_PATH)["model_state_dict"])
ResNet101_2.load_state_dict(torch.load(resnet101_PATH)["model_state_dict"])

<All keys matched successfully>

In [58]:
optimizer = optim.Adam
loss_f = nn.NLLLoss

### VGG

In [59]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = VGG_2,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_0")

--------------------
race_2_on_race_0 VGG test accuracy: 18.545347 %
race_2_on_race_0 VGG test loss: 9.284242
--------------------


In [60]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = VGG_2,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_1")

--------------------
race_2_on_race_1 VGG test accuracy: 20.592134 %
race_2_on_race_1 VGG test loss: 6.436265
--------------------


In [61]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = VGG_2,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_2")

--------------------
race_2_on_race_2 VGG test accuracy: 36.488064 %
race_2_on_race_2 VGG test loss: 7.977941
--------------------


In [62]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = VGG_2,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_3")

--------------------
race_2_on_race_3 VGG test accuracy: 26.364780 %
race_2_on_race_3 VGG test loss: 7.466142
--------------------


### ResNet101

In [63]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = ResNet101_2,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_0")

--------------------
race_2_on_race_0 ResNet101 test accuracy: 18.386585 %
race_2_on_race_0 ResNet101 test loss: 2.308289
--------------------


In [64]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = ResNet101_2,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_1")

--------------------
race_2_on_race_1 ResNet101 test accuracy: 35.307114 %
race_2_on_race_1 ResNet101 test loss: 1.868388
--------------------


In [65]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = ResNet101_2,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_2")

--------------------
race_2_on_race_2 ResNet101 test accuracy: 36.924870 %
race_2_on_race_2 ResNet101 test loss: 1.595714
--------------------


In [66]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = ResNet101_2,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_2_on_race_3")

--------------------
race_2_on_race_3 ResNet101 test accuracy: 34.138363 %
race_2_on_race_3 ResNet101 test loss: 1.910360
--------------------


# Race3

In [67]:
class VGG16(nn.Module):
  def __init__(self, in_channels=3):
    super(VGG16, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.vgg16(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.classifier[6].in_features
    self.model.classifier[6] = nn.Sequential(
        nn.Linear(num_ftrs, 1024),
        nn.ReLU(),
        nn.Linear(1024, 512),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(512, 7))

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [68]:
class ResNet101(nn.Module):
  def __init__(self, in_channels=3):
    super(ResNet101, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet101(pretrained=True)

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64,3)

    #i=0
    #for param in self.model.parameters():
    #    if i>39 and i<343:
    #        param.requires_grad = False
    #    i+=1 
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Linear(num_ftrs, 7)

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x,dim=1)

In [69]:
VGG_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_3_checkpts/VGG16_race_3_age_best.pt"
resnet101_PATH = "/content/drive/MyDrive/JHU_courses/ML_class/Final/New_Final_Cross_Race_Effect/race_3_checkpts/resnet101_race_3_age_best.pt"

VGG_3 = VGG16()
ResNet101_3 = ResNet101()

VGG_3.load_state_dict(torch.load(VGG_PATH)["model_state_dict"])
ResNet101_3.load_state_dict(torch.load(resnet101_PATH)["model_state_dict"])

<All keys matched successfully>

In [70]:
optimizer = optim.Adam
loss_f = nn.NLLLoss

### VGG

In [71]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = VGG_3,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_0")

--------------------
race_3_on_race_0 VGG test accuracy: 21.661045 %
race_3_on_race_0 VGG test loss: 682.150377
--------------------


In [72]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = VGG_3,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_1")

--------------------
race_3_on_race_1 VGG test accuracy: 38.908531 %
race_3_on_race_1 VGG test loss: 272.384501
--------------------


In [73]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = VGG_3,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_2")

--------------------
race_3_on_race_2 VGG test accuracy: 37.652882 %
race_3_on_race_2 VGG test loss: 565.200713
--------------------


In [74]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = VGG_3,
        model_name = "VGG",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_3")

--------------------
race_3_on_race_3 VGG test accuracy: 36.251572 %
race_3_on_race_3 VGG test loss: 415.128585
--------------------


### ResNet101

In [75]:
testing(test_dataloader = race0_testing_dataset_dataloader,
        model = ResNet101_3,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_0")

--------------------
race_3_on_race_0 ResNet101 test accuracy: 19.706291 %
race_3_on_race_0 ResNet101 test loss: 4.511626
--------------------


In [76]:
testing(test_dataloader = race1_testing_dataset_dataloader,
        model = ResNet101_3,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_1")

--------------------
race_3_on_race_1 ResNet101 test accuracy: 41.051701 %
race_3_on_race_1 ResNet101 test loss: 3.185000
--------------------


In [77]:
testing(test_dataloader = race2_testing_dataset_dataloader,
        model = ResNet101_3,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_2")

--------------------
race_3_on_race_2 ResNet101 test accuracy: 38.701225 %
race_3_on_race_2 ResNet101 test loss: 3.003424
--------------------


In [78]:
testing(test_dataloader = race3_testing_dataset_dataloader,
        model = ResNet101_3,
        model_name = "ResNet101",
        optimizer = optimizer,
        loss = loss_f,
        race_x_on_race_x = "race_3_on_race_3")

--------------------
race_3_on_race_3 ResNet101 test accuracy: 37.710690 %
race_3_on_race_3 ResNet101 test loss: 2.964359
--------------------
