# PyTorch ResNet + UTKFace

Partially based [on this](https://www.kaggle.com/code/gxkok21/resnet50-with-pytorch/notebook)

In [None]:
import numpy as np 
import pandas as pd
import os
import copy
import glob
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision

# import skimage
from skimage.io import imread

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
dataset_folder_name = '../data/UTKFace/Images'

TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = IM_HEIGHT = 200

dataset_dict = {
    'race_id': {
        0: 'white', 
        1: 'black', 
        2: 'asian', 
        3: 'indian', 
        4: 'others'
    },
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}

dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())
dataset_dict['race_alias'] = dict((g, i) for i, g in dataset_dict['race_id'].items())

In [None]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return int(age), dataset_dict['gender_id'][int(gender)], dataset_dict['race_id'][int(race)]
        except Exception as ex:
            return None, None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
        
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'gender', 'race', 'file']
    df = df.dropna()
    
    return df

In [None]:
df = parse_dataset(dataset_folder_name)
df.head()

In [None]:
df["gender"].value_counts().plot(kind="pie")

In [None]:
df["race"].value_counts().plot(kind="pie")

In [None]:
train_indices, test_indices = train_test_split(df.index, test_size=0.25)

In [None]:
df.shape[0]

In [None]:
class Dataset(torch.utils.data.Dataset):
    """
    This is our custom dataset class which will load the images, perform transforms on them,
    and load their corresponding labels.
    """
    
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.images = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")]
        self.transform = transform
        
    def __getitem__(self, idx):
        print(f'#{idx}...', end='')
        if idx >= self.df.shape[0]:
            idx = self.df.shape[0]-1
        img_path = self.df.iloc[idx]['file']
#         print("img_path:", img_path)
        print('OK')
        img = imread(img_path)
        
        if self.transform:
            img = self.transform(img)
        
        sample = {
            "image": img,
        }
        sample["gender"] = dataset_dict['gender_alias'][self.df.iloc[idx]["gender"]]
        #sample["id"] = self.df.loc[idx, "id"]
        return sample
    
    def __len__(self):
        try:
            return self.df.shape[0]
        except AttributeError:
            return len(self.images)

In [None]:
transform_pipe = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(), # Convert np array to PILImage
    
    # Resize image to 224 x 224 as required by most vision models
    torchvision.transforms.Resize(
        size=(224, 224)
    ),
    
    # Convert PIL image to tensor with image values in [0, 1]
    torchvision.transforms.ToTensor(),
    
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
train_data = Dataset(
    df=df,
    img_dir="../data/UTKFace/Images/",
    transform=transform_pipe
)

In [None]:
# The training dataset loader will randomly sample from the train samples
train_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size=64,
    sampler=torch.utils.data.SubsetRandomSampler(
        train_indices
    )
#     shuffle=True,
#     num_workers=8
)

In [None]:
# The testing dataset loader will randomly sample from the test samples
test_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size=64,
    sampler=torch.utils.data.SubsetRandomSampler(
        test_indices
    )
#     shuffle=True,
#     num_workers=8
)

In [None]:
dataloaders = {
    "train": train_loader,
    "test": test_loader
}

In [None]:
model = torchvision.models.resnet50() # WITH pre-trained weigths

In [None]:
# Replace final fully connected layer to suite problem
model.fc = torch.nn.Sequential(
    torch.nn.Linear(
        in_features=2048,
        out_features=1
    ),
    torch.nn.Sigmoid()
)

In [None]:
out = model(train_data[0]["image"].view(1, 3, 224, 224))
out

In [None]:
USE_GPU = True
EPOCHS = 5

In [None]:
# Model training
if USE_GPU:
    model = model.cuda() # Should be called before instantiating optimizer

optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss() # For binary classification problem

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for i in range(EPOCHS):
    for phase in ["train", "test"]:
        if phase == "train":
            model.train()
        else:
            model.eval()
        
        samples = 0
        loss_sum = 0
        correct_sum = 0
        for j, batch in enumerate(dataloaders[phase]):
            X = batch["image"]
            genders = batch["gender"]
            if USE_GPU:
                X = X.cuda()
                genders = genders.cuda()

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                y = model(X)
                loss = criterion(
                    y, 
                    genders.view(-1, 1).float()
                )

                if phase == "train":
                    loss.backward()
                    optimizer.step()
                
                # We need to multiple by batch size as loss is the mean loss of the samples in the batch
                loss_sum += loss.item() * X.shape[0]
                samples += X.shape[0]
                num_corrects = torch.sum((y >= 0.5).float() == genders.view(-1, 1).float())
                correct_sum += num_corrects
                
                # Print batch statistics every 50 batches
                if j % 50 == 49 and phase == "train":
                    print("{}:{} - loss: {}, acc: {}".format(
                        i + 1, 
                        j + 1, 
                        float(loss_sum) / float(samples), 
                        float(correct_sum) / float(samples)
                    ))
                
        # Print epoch statistics
        epoch_acc = float(correct_sum) / float(samples)
        epoch_loss = float(loss_sum) / float(samples)
        print("epoch: {} - {} loss: {}, {} acc: {}".format(i + 1, phase, epoch_loss, phase, epoch_acc))
        
        # Deep copy the model
        if phase == "test" and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, "resnet50.pth")

In [None]:
# Reconstruct model from saved weights
model1 = torchvision.models.resnet50()
model1.fc = torch.nn.Sequential(
    torch.nn.Linear(
        in_features=2048,
        out_features=1
    ),
    torch.nn.Sigmoid()
)
model1.load_state_dict(torch.load("resnet50.pth"))

In [None]:
# Make predictions
model1.eval()
if USE_GPU:
    model1 = model1.cuda()

ids_all = []
predictions = []

for j, batch in enumerate(test_loader1):
    X = batch["image"]
    ids = batch["id"]
    if USE_GPU:
        X = X.cuda()
    
    for _id in ids:
        ids_all.append(_id)

    with torch.set_grad_enabled(False):
        y_pred = model1(X)
        predictions.append((y_pred >= 0.5).float().cpu().numpy())
        
print("Done making predictions!")