In [1]:
import numpy as np
import pandas as pd
import os
import copy
import glob
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from skimage.io import imread
from sklearn.model_selection import train_test_split

  from .collection import imread_collection_wrapper


In [2]:
# make sure to enable GPU acceleration!
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
BATCH_SIZE = 64


dataset_folder_name = '../data/UTKFace/Images'

TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = IM_HEIGHT = 200

dataset_dict = {
    'race_id': {
        0: 'white', 
        1: 'black', 
        2: 'asian', 
        3: 'indian', 
        4: 'others'
    },
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}

dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())
dataset_dict['race_alias'] = dict((g, i) for i, g in dataset_dict['race_id'].items())


def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It iterates over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return int(age), dataset_dict['gender_id'][int(gender)], dataset_dict['race_id'][int(race)]
        except Exception as ex:
            return None, None, None

    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))

    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)

    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'gender', 'race', 'file']
    df = df.dropna()

    return df


class Dataset(torch.utils.data.Dataset):
    """
    This is our custom dataset class which will load the images, perform transforms on them,
    and load their corresponding labels.
    """
    
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.images = [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith(".jpg")]
        self.transform = transform
        
    def __getitem__(self, idx):
        #print(f'#{idx}...', end='')
        if idx >= self.df.shape[0]:
            idx = self.df.shape[0]-1
        img_path = self.df.iloc[idx]['file']
#         print("img_path:", img_path)
        #print('OK')
        img = imread(img_path)
        
        if self.transform:
            img = self.transform(img)
        
        sample = {
            "image": img,
        }
        sample["gender"] = dataset_dict['gender_alias'][self.df.iloc[idx]["gender"]]
#        sample["id"] = self.df.loc[idx, "id"]
        return sample
    
    def __len__(self):
        try:
            return self.df.shape[0]
        except AttributeError:
            return len(self.images)

In [4]:
df = parse_dataset(dataset_folder_name)

train_indices, test_indices = train_test_split(df.index, test_size=0.25)

transform_pipe = transforms.Compose([
    transforms.ToPILImage(), # Convert np array to PILImage

    # Resize image to 224 x 224 as required by most vision models
    transforms.Resize(
        size=(224, 224)
    ),

    # Convert PIL image to tensor with image values in [0, 1]
    transforms.ToTensor(),

    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

dataset = Dataset(
    df=df,
    img_dir="../data/UTKFace/Images/",
    transform=transform_pipe
)

In [5]:
# The training dataset loader will randomly sample from the train samples
train_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    sampler=torch.utils.data.SubsetRandomSampler(
        train_indices
    ),
#     shuffle=True,
    num_workers=8,
)

# The testing dataset loader will randomly sample from the test samples
test_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=64,
    sampler=torch.utils.data.SubsetRandomSampler(
        test_indices
    ),
#     shuffle=True,
    num_workers=8,
)

dataloaders = {
    "train": train_loader,
    "test": test_loader
}

In [6]:
# instance = dataloaders['train'].dataset[2]
# img, gender = instance['image'], instance['gender']
# img = img[0]
# plt.figure(figsize=(10,10))
# plt.imshow(img)
# plt.gray()
# plt.show()
# gender = dataset_dict['gender_id'][gender]
# print(f'Gender: {gender}')

In [7]:
model = torchvision.models.resnet50()
USE_GPU = True
EPOCHS = 5

# Replace final fully connected layer to suite problem
model.fc = torch.nn.Sequential(
    torch.nn.Linear(
        in_features=2048,
        out_features=1
    ),
    torch.nn.Softmax(dim=1)
)

# Model training
if device=='cuda':
    model = model.cuda() # Should be called before instantiating optimizer
    
def count_parameters(model):
    total = 0
    for p in model.parameters():
        layer_count = np.prod(p.shape)
        total += layer_count
        
    return total

print(f'The model has {count_parameters(model)} parameters')

The model has 23510081 parameters


In [8]:
# we will save checkpoints to the checkpoints folder. Create it.
!mkdir -p checkpoints
def save_checkpoint(optimizer, model, epoch, filename):
    checkpoint_dict = {
        'optimizer': optimizer.state_dict(),
        'model': model.state_dict(),
        'epoch': epoch
    }
    torch.save(checkpoint_dict, filename)


def load_checkpoint(optimizer, model, filename):
    checkpoint_dict = torch.load(filename)
    epoch = checkpoint_dict['epoch']
    model.load_state_dict(checkpoint_dict['model'])
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint_dict['optimizer'])
    return epoch

In [9]:
criterion = nn.BCELoss() # Binary classification Male-Female
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, nesterov=True)

def train_for_epoch():
    model.train()
    
    train_losses = []
    samples = 0
    loss_sum = 0
    correct_sum = 0
    for i, batch in enumerate(dataloaders['train']):
        # move training data into GPU
        X = batch["image"]
        genders = batch["gender"]
        if device=='cuda':
            X = X.cuda()
            genders = genders.cuda()
        # clear previous gradients
        optimizer.zero_grad()
        # forward propagation
        y = model(X)
        # calculate loss for the batch
        loss = criterion(y, genders.view(-1, 1).float())
        # backpropagation to compute gradients
        loss.backward()
        # update model weights
        optimizer.step()
        train_losses.append(loss.item())
        
        # We need to multiple by batch size as loss is the mean loss of the samples in the batch
        loss_sum += loss.item() * X.shape[0]
        samples += X.shape[0]
        num_corrects = torch.sum((y >= 0.5).float() == genders.view(-1, 1).float())
        correct_sum += num_corrects

        # Print batch statistics every 50 batches
        #if j % 50 == 49 and phase == "train":
        print(f"\tB{i + 1} - loss: {float(loss_sum) / float(samples)}, acc: {float(correct_sum) / float(samples)}")
        
    # calculate average training loss of the epoch
    train_loss = np.mean(train_losses)
    return train_loss


def validate():
    model.eval()
    
    valid_losses = []
    valid_set = []
    y_pred = []
    
    # we don't need gradients for valiation (save memory)
    with torch.no_grad():
        for batch in dataloaders['test']:
            # move training data into GPU
            X = batch["image"].to(device)
            genders = batch["gender"].to(device)
            valid_set.extend(genders)
            
            y = model(X)
            loss = criterion(y, genders.view(-1, 1).float())
            valid_losses.append(loss.item())
            y_pred.extend(y.argmax(dim=1).cpu().numpy())
            
    valid_loss = np.mean(valid_losses)
    
    # collect predictions into y_pred and ground truth into y_true
    y_pred = np.array(y_pred, dtype=np.float32)
    y_true = np.array(valid_set, dtype=np.float32)
    
    accuracy = np.mean(y_true == y_pred)
    return valid_loss, accuracy


def train(first_epoch, num_epochs):
    train_losses, valid_losses = [], []
    for epoch in range(first_epoch, first_epoch+num_epochs):
        train_loss = train_for_epoch()
        
        valid_loss, valid_acc = validate()
        
        print(f'Epoch({epoch:03d}) train loss: {train_loss}'
              f'val loss: {valid_loss}'
              f'val acc: {valid_acc*100:.4f}%')
        
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        
        # save a checkpoint
        checkpoint_filename = f'checkpoints/utk-{epoch:03d}.pkl'
        save_checkpoint(optimizer, model, epoch, checkpoint_filename)
    
    return train_losses, valid_losses

In [None]:
train_losses, valid_losses = train(1, 3)

	B1 - loss: 17.269392013549805, acc: 0.375
