In [6]:
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
from torch.optim.lr_scheduler import StepLR
import torch.utils.data
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import pretrainedmodels
import pretrainedmodels.utils
from model import get_model, get_vgg
from dataset import FaceDataset
from train import train, validate

Change data_dir to the direction of the 'appa-real-release' dataset

Download with cmd:

wget http://158.109.8.102/AppaRealAge/appa-real-release.zip

unzip appa-real-release.zip

In [7]:
model = get_model()
data_dir = 'appa-real-release'
start_epoch = 0
checkpoint_dir = Path('checkpoint')
checkpoint_dir.mkdir(parents=True, exist_ok=True)
resume_path = None
tensorboard_dir = None
opts = []
multi_gpu = False


img_size = 56
age_stddev = 1.0
batch_size = 32
learning_rate = 1e-3
step_size = 20
decay_rate = 0.2
num_epochs = 15

In [8]:
import os
os.listdir(data_dir)

['.badfiles.un~',
 '.clean_asdf.sh.un~',
 '.parse_labels.m.un~',
 '.README.txt.un~',
 'gt_avg_test.csv',
 'gt_avg_train.csv',
 'gt_avg_valid.csv',
 'gt_test.csv',
 'gt_train.csv',
 'gt_valid.csv',
 'README.txt',
 'test',
 'train',
 'valid']

In [9]:
train_dataset = FaceDataset(data_dir, "train", img_size=img_size, augment=True, age_stddev=age_stddev)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = FaceDataset(data_dir, "valid", img_size=img_size, augment=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False)


optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
device =  "cpu"
model = model.to(device)
criterion = nn.L1Loss().to(device)

if resume_path:
    if Path(resume_path).is_file():
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path, map_location="cpu")
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume_path, checkpoint['epoch']))
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        print("=> no checkpoint found at '{}'".format(resume_path))

scheduler = StepLR(optimizer, step_size=step_size, gamma=decay_rate, last_epoch=start_epoch - 1)
best_val_mae = 10000.0
train_writer = None

if tensorboard_dir is not None:
    opts_prefix = "_".join(opts)
    train_writer = SummaryWriter(log_dir=tensorboard_dir + "/" + opts_prefix + "_train")
    val_writer = SummaryWriter(log_dir=tensorboard_dir + "/" + opts_prefix + "_val")

In [10]:
for epoch in range(start_epoch, num_epochs):
    # train
    train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, device)

    # validate
    val_loss, val_acc, val_mae = validate(val_loader, model, criterion, epoch, device)

    if tensorboard_dir is not None:
        train_writer.add_scalar("loss", train_loss, epoch)
        train_writer.add_scalar("acc", train_acc, epoch)
        val_writer.add_scalar("loss", val_loss, epoch)
        val_writer.add_scalar("acc", val_acc, epoch)
        val_writer.add_scalar("mae", val_mae, epoch)

    # checkpoint
    if val_mae < best_val_mae:
        print(f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}")
        model_state_dict = model.module.state_dict() if multi_gpu else model.state_dict()
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': "resnet",
                'state_dict': model_state_dict,
                'optimizer_state_dict': optimizer.state_dict()
            },
            str(checkpoint_dir.joinpath("epoch{:03d}_{:.5f}_{:.4f}.pth".format(epoch, val_loss, val_mae)))
        )
        best_val_mae = val_mae
    else:
        print(f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})")

    # adjust learning rate
    scheduler.step()

print("=> training finished")
print(f"additional opts: {opts}")
print(f"best val mae: {best_val_mae:.3f}")

100%|██████████| 124/124 [03:29<00:00,  1.69s/it, stage=train, epoch=0, loss=0.368, acc=0, correct=0, sample_num=32]
100%|██████████| 47/47 [00:26<00:00,  1.80it/s, stage=val, epoch=0, loss=0.345, acc=0.00467, correct=0, sample_num=28]


=> [epoch 000] best val mae was improved from 10000.000 to 11.007


100%|██████████| 124/124 [03:33<00:00,  1.72s/it, stage=train, epoch=1, loss=0.355, acc=0.000756, correct=0, sample_num=32]
100%|██████████| 47/47 [00:24<00:00,  1.94it/s, stage=val, epoch=1, loss=0.33, acc=0, correct=0, sample_num=28] 


=> [epoch 001] best val mae was improved from 11.007 to 10.526


100%|██████████| 124/124 [03:26<00:00,  1.67s/it, stage=train, epoch=2, loss=0.351, acc=0.000504, correct=0, sample_num=32]
100%|██████████| 47/47 [00:24<00:00,  1.95it/s, stage=val, epoch=2, loss=0.327, acc=0.002, correct=0, sample_num=28]  


=> [epoch 002] best val mae was improved from 10.526 to 10.445


100%|██████████| 124/124 [03:25<00:00,  1.66s/it, stage=train, epoch=3, loss=0.338, acc=0.00302, correct=0, sample_num=32]
100%|██████████| 47/47 [00:24<00:00,  1.93it/s, stage=val, epoch=3, loss=0.314, acc=0.00267, correct=1, sample_num=28]


=> [epoch 003] best val mae was improved from 10.445 to 10.028


100%|██████████| 124/124 [03:46<00:00,  1.83s/it, stage=train, epoch=4, loss=0.331, acc=0.00277, correct=0, sample_num=32]
100%|██████████| 47/47 [00:26<00:00,  1.81it/s, stage=val, epoch=4, loss=0.31, acc=0.00333, correct=1, sample_num=28] 


=> [epoch 004] best val mae was improved from 10.028 to 9.895


  2%|▏         | 3/124 [00:06<04:07,  2.04s/it, stage=train, epoch=5, loss=0.317, acc=0, correct=0, sample_num=32]