In [52]:
%matplotlib inline
from pathlib import Path

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from torchvision import datasets, models, transforms
from torch.optim import lr_scheduler
import PIL

from fastai import *
from fastai.vision import *
import fastai; fastai.__version__

import torch
import torch.nn as nn
import torch.optim as optim
import time
import copy

In [53]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [54]:
pdata = Path('D://hung//UTKFace//')
fns = list(pdata.glob('*.jpg'))
fns = [fn for fn in fns if len(str(fn).split('/')[-1].split('_'))==4 and '__' not in str(fn)]

In [55]:
len(fns)

23705

In [56]:
def show_dct(dct, n=3):
    return dict(list(dct.items())[:n])

In [57]:
i2fn = fns
fn2i = {fn:i for i,fn in enumerate(i2fn)}
show_dct(fn2i)
bs_fns = [os.path.basename(o) for o in fns]
bs_fns[:3]

['100_0_0_20170112213500903.jpg.chip.jpg',
 '100_0_0_20170112215240346.jpg.chip.jpg',
 '100_1_0_20170110183726390.jpg.chip.jpg']

In [58]:
i2age, i2gender, i2race = zip(*[bs_fn.split('_')[:3] for bs_fn in bs_fns])

In [59]:
i2age = [int(o) for o in i2age]
o2gender = {'0': 'm', '1': 'f'}
i2gender = [o2gender[o] for o in i2gender]
o2race = dict(list(enumerate(('White', 'Black', 'Asian', 'Indian', 'Others'))))
i2race = [o2race[int(o)] for o in i2race]
sorted(Counter(list(zip(i2race, i2gender))). items())

[(('Asian', 'f'), 1859),
 (('Asian', 'm'), 1575),
 (('Black', 'f'), 2208),
 (('Black', 'm'), 2318),
 (('Indian', 'f'), 1714),
 (('Indian', 'm'), 2261),
 (('Others', 'f'), 932),
 (('Others', 'm'), 760),
 (('White', 'f'), 4601),
 (('White', 'm'), 5477)]

In [60]:
df = pd.DataFrame({'img_name': i2fn, 'age': i2age, 'gender': i2gender, 'race': i2race})
# df.to_csv('img2targets.csv', index=False)

In [61]:
df['is_train'] = np.random.choice(3, size=len(df), p=[0.1, 0.1, 0.8])
df.head()

Unnamed: 0,img_name,age,gender,race,is_train
0,D:\hung\UTKFace\100_0_0_20170112213500903.jpg....,100,m,White,2
1,D:\hung\UTKFace\100_0_0_20170112215240346.jpg....,100,m,White,2
2,D:\hung\UTKFace\100_1_0_20170110183726390.jpg....,100,f,White,1
3,D:\hung\UTKFace\100_1_0_20170112213001988.jpg....,100,f,White,2
4,D:\hung\UTKFace\100_1_0_20170112213303693.jpg....,100,f,White,1


In [62]:
# Data augmentation and normalization for training
# Just normalization for validation
d_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])


In [63]:
class MultitaskDataset(torch.utils.data.dataset.Dataset):
    "`Dataset` for joint single and multi-label image classification."
    def __init__(self, 
                 fns,
                 labels_gender,
                 classes_gender, transform):
        
        self.x = np.array(fns)              
        self.classes_gender = classes_gender
        self.class2idx_gender = {v:k for k,v in enumerate(self.classes_gender)}
        self.y_gender = np.array([self.class2idx_gender[o] for o in labels_gender], dtype=np.int64)
        self.c_gender = len(classes_gender)
        
        self.transform = transform
    
    def __len__(self): return len(self.x)
    
    def __getitem__(self,i:int): 
        return self.transform(PIL.Image.open(self.x[i])), torch.tensor([self.y_gender[i]])
    
    def __repr__(self): return f'{type(self).__name__} of len {len(self)}'

In [64]:
def get_dataset(df):
    return MultitaskDataset(df.img_name, 
                            labels_gender=df.gender, 
                            classes_gender=sorted(set(df.gender)), transform = d_transforms)

In [65]:
train_ds = get_dataset(df[df.is_train==1])
valid_ds = get_dataset(df[df.is_train==0])

In [66]:
dataset_sizes = {'train': len(train_ds), 'val': len(valid_ds)}

train_loader = torch.utils.data.DataLoader(train_ds, batch_size=8,shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=8,shuffle=True)

dataloaders={'train': train_loader, 'val': valid_loader}
dataset_sizes

{'train': 2372, 'val': 2365}

In [67]:
def singletask_loss(input, target):
    target_t = target[:,0]
    loss = torch.nn.functional.cross_entropy(input, target_t.long())
    return loss

## Model

In [68]:
N_gender = len(set(df.gender))

model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
    param.requires_grad = False

num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, N_gender)
model_ft = model_ft.to(device)
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
criterion = nn.CrossEntropyLoss()

In [69]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        model.train()  # Set model to training mode
        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for iter, (inputs, labels) in enumerate(dataloaders['train']):
            # print(iter)  
            inputs = inputs.to(device)
            labels = labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            # loss = nn.CrossEntropyLoss()(outputs, labels)
            loss = singletask_loss(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels[:,0].data)

        scheduler.step()
        epoch_loss = running_loss / dataset_sizes['train']
        epoch_acc = running_corrects.double() / dataset_sizes['train']

        print('{} Loss: {:.4f} Acc: {:.4f}'.format('train', epoch_loss, epoch_acc))


        # Each epoch has a training and validation phase
        model.eval()   # Set model to evaluate mode
        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.        
        for (inputs, labels) in dataloaders['val']:  
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            # loss = criterion(outputs, labels)
            loss = singletask_loss(outputs, labels)
            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels[:,0].data)

        epoch_loss = running_loss / dataset_sizes['val']
        epoch_acc = running_corrects.double() / dataset_sizes['val']

        print('{} Loss: {:.4f} Acc: {:.4f}'.format('val', epoch_loss, epoch_acc))

        if epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model



In [70]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Loss: 0.6532 Acc: 0.6492
val Loss: 0.5347 Acc: 0.7290

Epoch 1/9
----------
train Loss: 0.5401 Acc: 0.7357
val Loss: 0.4756 Acc: 0.7734

Epoch 2/9
----------
train Loss: 0.5249 Acc: 0.7445
val Loss: 0.7038 Acc: 0.6655

Epoch 3/9
----------
train Loss: 0.5245 Acc: 0.7555
val Loss: 0.5024 Acc: 0.7628

Epoch 4/9
----------
train Loss: 0.5697 Acc: 0.7306
val Loss: 0.5349 Acc: 0.7607

Epoch 5/9
----------
train Loss: 0.5740 Acc: 0.7416
val Loss: 0.4734 Acc: 0.7801

Epoch 6/9
----------
train Loss: 0.5406 Acc: 0.7517
val Loss: 0.4981 Acc: 0.7738

Epoch 7/9
----------
train Loss: 0.4425 Acc: 0.7955
val Loss: 0.4739 Acc: 0.7814

Epoch 8/9
----------
train Loss: 0.4479 Acc: 0.7833
val Loss: 0.4776 Acc: 0.7818

Epoch 9/9
----------
train Loss: 0.4420 Acc: 0.7960
val Loss: 0.4623 Acc: 0.7886

Training complete in 2m 8s
Best val Acc: 0.788584


In [None]:
def get_data(sz, bs):
    return ImageDataBunch.create(train_ds, valid_ds, path=p, bs=bs, val_bs=sz).normalize(imagenet_stats)

data = get_data(sz=200, bs=8)    

In [None]:
learn = create_cnn(data, models.resnet50, 
                    metrics=[accuracy_gender,
                            accuracy_race,
                            l1loss_age],
                    loss_func=multitask_loss)

In [None]:
# learn.fit_one_cycle(100)
learn.lr_find(start_lr=1e-6)

## Convert fastai to normal pytorch