In [2]:
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
from torch.optim.lr_scheduler import StepLR
import torch.utils.data
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import pretrainedmodels
import pretrainedmodels.utils
from model import get_model, get_resnet
from dataset import FaceDataset, FaceDataset_UTK
from train import train, validate

Change data_dir to the direction of the 'appa-real-release' dataset

Download with cmd:

wget http://158.109.8.102/AppaRealAge/appa-real-release.zip

unzip appa-real-release.zip

In [26]:
model = get_model()
data_dir = 'appa-real-release'
data_dir_utk = 'data_UTK'
start_epoch = 0
checkpoint_dir = Path('checkpoint')
checkpoint_dir.mkdir(parents=True, exist_ok=True)
resume_path = "checkpoint/epoch011_0.24262_4.8524.pth"
tensorboard_dir = None
opts = []
multi_gpu = False


img_size = 224
age_stddev = 1.0
batch_size = 20
learning_rate = 1e-3/15
step_size = 20
decay_rate = 0.2
num_epochs = 100
classes = torch.arange(0, 101).type(torch.FloatTensor)

In [27]:
import os
os.listdir(data_dir)

['.badfiles.un~',
 '.clean_asdf.sh.un~',
 '.parse_labels.m.un~',
 '.README.txt.un~',
 'gt_avg_test.csv',
 'gt_avg_train.csv',
 'gt_avg_valid.csv',
 'gt_test.csv',
 'gt_train.csv',
 'gt_valid.csv',
 'README.txt',
 'test',
 'train',
 'valid',
 'gt_avg_train_res.csv',
 'gt_avg_valid_res.csv',
 'gt_avg_test_res.csv']

In [28]:
train_dataset = FaceDataset(data_dir, "train", img_size=img_size, augment=True, age_stddev=age_stddev)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = FaceDataset(data_dir, "valid", img_size=img_size, augment=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=True)


optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
device =  "cuda" if torch.cuda.is_available() else "cpu"
#device = "cpu"
model = model.to(device)
criterion = nn.L1Loss().to(device)

if resume_path:
    if Path(resume_path).is_file():
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path, map_location="cpu")
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume_path, checkpoint['epoch']))
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        print("=> no checkpoint found at '{}'".format(resume_path))

scheduler = StepLR(optimizer, step_size=step_size, gamma=decay_rate, last_epoch=start_epoch - 1)
best_val_mae = 10000.0
train_writer = None

if tensorboard_dir is not None:
    opts_prefix = "_".join(opts)
    train_writer = SummaryWriter(log_dir=tensorboard_dir + "/" + opts_prefix + "_train")
    val_writer = SummaryWriter(log_dir=tensorboard_dir + "/" + opts_prefix + "_val")

=> loading checkpoint 'checkpoint/epoch011_0.24262_4.8524.pth'
=> loaded checkpoint 'checkpoint/epoch011_0.24262_4.8524.pth' (epoch 12)


In [None]:
for epoch in range(start_epoch, num_epochs):
    # train
    train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, device, classes)

    # validate
    val_loss, val_acc, val_mae = validate(val_loader, model, criterion, epoch, device, classes)

    if tensorboard_dir is not None:
        train_writer.add_scalar("loss", train_loss, epoch)
        train_writer.add_scalar("acc", train_acc, epoch)
        val_writer.add_scalar("loss", val_loss, epoch)
        val_writer.add_scalar("acc", val_acc, epoch)
        val_writer.add_scalar("mae", val_mae, epoch)

    # checkpoint
    if val_mae < best_val_mae:
        print(f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}")
        model_state_dict = model.module.state_dict() if multi_gpu else model.state_dict()
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': "resnet",
                'state_dict': model_state_dict,
                'optimizer_state_dict': optimizer.state_dict()
            },
            str(checkpoint_dir.joinpath("epoch{:03d}_{:.5f}_{:.4f}.pth".format(epoch, val_loss, val_mae)))
        )
        best_val_mae = val_mae
    else:
        print(f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})")

    # adjust learning rate
    scheduler.step()

print("=> training finished")
print(f"additional opts: {opts}")
print(f"best val mae: {best_val_mae:.3f}")

### UTK Dataset 

In [None]:
train_dataset = FaceDataset_UTK(data_dir_utk, "train", img_size=img_size, augment=False, age_stddev=age_stddev)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = FaceDataset_UTK(data_dir_utk, "valid", img_size=img_size, augment=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

In [None]:
for epoch in range(start_epoch, num_epochs):
    # train
    train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, device, classes)

    # validate
    val_loss, val_acc, val_mae = validate(val_loader, model, criterion, epoch, device, classes)

    if tensorboard_dir is not None:
        train_writer.add_scalar("loss", train_loss, epoch)
        train_writer.add_scalar("acc", train_acc, epoch)
        val_writer.add_scalar("loss", val_loss, epoch)
        val_writer.add_scalar("acc", val_acc, epoch)
        val_writer.add_scalar("mae", val_mae, epoch)

    # checkpoint
    if val_mae < best_val_mae:
        print(f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}")
        model_state_dict = model.module.state_dict() if multi_gpu else model.state_dict()
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': "resnet",
                'state_dict': model_state_dict,
                'optimizer_state_dict': optimizer.state_dict()
            },
            str(checkpoint_dir.joinpath("epoch{:03d}_{:.5f}_{:.4f}.pth".format(epoch, val_loss, val_mae)))
        )
        best_val_mae = val_mae
    else:
        print(f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})")

    # adjust learning rate
    scheduler.step()

print("=> training finished")
print(f"additional opts: {opts}")
print(f"best val mae: {best_val_mae:.3f}")

### Create Residuals csv files

In [19]:
import torch.nn.functional as F
import pandas as pd 
import numpy as np

In [20]:
train_dataset = FaceDataset(data_dir, "train", img_size=img_size, augment=False, age_stddev=age_stddev)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

val_dataset = FaceDataset(data_dir, "valid", img_size=img_size, augment=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

test_dataset = FaceDataset(data_dir, "test", img_size=img_size, augment=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

In [21]:
def residuals(model, loader, device) : 
    "Returns the list of residuals"
    model.to(device)
    residuals = []
    classes = torch.arange(0, 101).type(torch.FloatTensor).to(device)
    with torch.no_grad():
        for x, y, real  in loader : 
            x = x.to(device)
            y = y.to(device)
            real = real.to(device)
            outputs = model(x)
            outputs = F.softmax(outputs, dim=1)@classes
            residuals += list((real - outputs).cpu().numpy())
    return residuals

In [22]:
res_train = np.round(residuals(model, train_loader, device), 2)
res_val = np.round(residuals(model, val_loader, device), 2)
res_test = np.round(residuals(model, test_loader, device), 2)

In [23]:
data_dir + "/train_residuals.csv"

'appa-real-release/train_residuals.csv'

In [24]:
pd.DataFrame(res_train, columns=["residual"]).to_csv("train_residuals.csv", index=False)
pd.DataFrame(res_val, columns=["residual"]).to_csv("val_residuals.csv", index=False)
pd.DataFrame(res_test, columns=["residual"]).to_csv("test_residuals.csv", index=False)

In [25]:
df_train = pd.read_csv("appa-real-release/gt_avg_train.csv")
residual_train = pd.read_csv("train_residuals.csv")
df_val = pd.read_csv("appa-real-release/gt_avg_valid.csv")
residual_val = pd.read_csv("val_residuals.csv")
df_test = pd.read_csv("appa-real-release/gt_avg_test.csv")
residual_test = pd.read_csv("test_residuals.csv")

df_train.join(residual_train)[['file_name', 'num_ratings', 'residual', 'apparent_age_std', 'real_age', 'apparent_age_avg']].to_csv("appa-real-release/gt_avg_train_res.csv")
df_val.join(residual_val)[['file_name', 'num_ratings', 'residual', 'apparent_age_std', 'real_age', 'apparent_age_avg']].to_csv("appa-real-release/gt_avg_valid_res.csv")
df_test.join(residual_test)[['file_name', 'num_ratings', 'residual', 'apparent_age_std', 'real_age', 'apparent_age_avg']].to_csv("appa-real-release/gt_avg_test_res.csv")

### Residual DEX

In [29]:
import copy

In [30]:
classes = torch.arange(-50, 51).type(torch.FloatTensor)
#model_2 = copy.deepcopy(model)

In [31]:
train_dataset = FaceDataset(data_dir, "train", img_size=img_size, augment=False, age_stddev=age_stddev, is_res=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = FaceDataset(data_dir, "valid", img_size=img_size, augment=False, is_res=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

best_val_mae = 10000.0
train_writer = None

In [32]:
for epoch in range(start_epoch, num_epochs):
    # train
    train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, device, classes)

    # validate
    val_loss, val_acc, val_mae = validate(val_loader, model, criterion, epoch, device, classes)

    if tensorboard_dir is not None:
        train_writer.add_scalar("loss", train_loss, epoch)
        train_writer.add_scalar("acc", train_acc, epoch)
        val_writer.add_scalar("loss", val_loss, epoch)
        val_writer.add_scalar("acc", val_acc, epoch)
        val_writer.add_scalar("mae", val_mae, epoch)

    # checkpoint
    if val_mae < best_val_mae:
        print(f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}")
        model_state_dict = model.module.state_dict() if multi_gpu else model.state_dict()
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': "resnet",
                'state_dict': model_state_dict,
                'optimizer_state_dict': optimizer.state_dict()
            },
            str(checkpoint_dir.joinpath("epoch{:03d}_{:.5f}_{:.4f}.pth".format(epoch, val_loss, val_mae)))
        )
        best_val_mae = val_mae
    else:
        print(f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})")

    # adjust learning rate
    scheduler.step()

print("=> training finished")
print(f"additional opts: {opts}")
print(f"best val mae: {best_val_mae:.3f}")

  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 012] best val mae was improved from 10000.000 to 5.032


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 013] best val mae was improved from 5.032 to 4.890


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 014] best val mae was improved from 4.890 to 4.854


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 015] best val mae was improved from 4.854 to 4.771


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 016] best val mae was not improved from 4.771 (4.819)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 017] best val mae was not improved from 4.771 (4.798)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 018] best val mae was not improved from 4.771 (4.853)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 019] best val mae was not improved from 4.771 (4.846)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 020] best val mae was not improved from 4.771 (4.845)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 021] best val mae was improved from 4.771 to 4.765


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 022] best val mae was not improved from 4.765 (4.784)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

=> [epoch 023] best val mae was not improved from 4.765 (4.790)


  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

KeyboardInterrupt: 

### Test Residual DEX

In [None]:
test_dataset = FaceDataset(data_dir, "test", img_size=img_size, augment=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

In [None]:
data_dir = 'appa-real-release'
resume_path = "checkpoint/epoch024_0.23000_4.5999.pth" 
resume_path_2 = "checkpoint/epoch021_0.22150_4.4299.pth" 
classes = torch.arange(0, 101).type(torch.FloatTensor).to(device)

In [None]:
if resume_path:
    if Path(resume_path).is_file():
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path, map_location="cpu")
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume_path, checkpoint['epoch']))
        #optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        print("=> no checkpoint found at '{}'".format(resume_path))


if resume_path_2:
    if Path(resume_path_2).is_file():
        print("=> loading checkpoint '{}'".format(resume_path_2))
        checkpoint_2 = torch.load(resume_path_2, map_location="cpu")
        start_epoch = checkpoint_2['epoch']
        model_2.load_state_dict(checkpoint_2['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume_path_2, checkpoint_2['epoch']))
        #optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        print("=> no checkpoint found at '{}'".format(resume_path_2))

In [None]:
_,_,mae = validate(test_loader, model, model_2, None, 0, device,  torch.arange(0, 101).type(torch.FloatTensor).to(device))
mae