In [1]:
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim
from torch.optim.lr_scheduler import StepLR
import torch.utils.data
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import pretrainedmodels
import pretrainedmodels.utils
from model import get_model, get_vgg
from dataset import FaceDataset
from train import train, validate

In [2]:
model = get_vgg()
data_dir = 'appa-real-release'
start_epoch = 0
checkpoint_dir = Path('checkpoint')
checkpoint_dir.mkdir(parents=True, exist_ok=True)
resume_path = None
tensorboard_dir = "t_log"
opts = []
multi_gpu = False


img_size = 224
age_stddev = 1.0
batch_size = 128
learning_rate = 1e-3
step_size = 20
decay_rate = 0.2
num_epochs = 80 #80

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to C:\Users\Moustapha/.cache\torch\hub\checkpoints\vgg16_bn-6c64b313.pth
100%|██████████| 528M/528M [18:53<00:00, 488kB/s] 


In [3]:
train_dataset = FaceDataset(data_dir, "train", img_size=img_size, augment=True, age_stddev=age_stddev)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = FaceDataset(data_dir, "valid", img_size=img_size, augment=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False)


optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
device = "cpu"
model = model.to(device)
criterion = nn.L1Loss().to(device)

if resume_path:
    if Path(resume_path).is_file():
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path, map_location="cpu")
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume_path, checkpoint['epoch']))
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        print("=> no checkpoint found at '{}'".format(resume_path))
        

scheduler = StepLR(optimizer, step_size=step_size, gamma=decay_rate, last_epoch=start_epoch - 1)
best_val_mae = 10000.0
train_writer = None

if tensorboard_dir is not None:
    opts_prefix = "_".join(opts)
    train_writer = SummaryWriter(log_dir=tensorboard_dir + "/" + opts_prefix + "_train")
    val_writer = SummaryWriter(log_dir=tensorboard_dir + "/" + opts_prefix + "_val")

In [4]:
for epoch in range(start_epoch, num_epochs):
    # train
    train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, device)

    # validate
    val_loss, val_acc, val_mae = validate(val_loader, model, criterion, epoch, device)

    if tensorboard_dir is not None:
        train_writer.add_scalar("loss", train_loss, epoch)
        train_writer.add_scalar("acc", train_acc, epoch)
        val_writer.add_scalar("loss", val_loss, epoch)
        val_writer.add_scalar("acc", val_acc, epoch)
        val_writer.add_scalar("mae", val_mae, epoch)

    # checkpoint
    if val_mae < best_val_mae:
        print(f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}")
        model_state_dict = model.module.state_dict() if multi_gpu else model.state_dict()
        torch.save(
            {
                'epoch': epoch + 1,
                'arch': "vgg16",
                'state_dict': model_state_dict,
                'optimizer_state_dict': optimizer.state_dict()
            },
            str(checkpoint_dir.joinpath("epoch{:03d}_{:.5f}_{:.4f}.pth".format(epoch, val_loss, val_mae)))
        )
        best_val_mae = val_mae
    else:
        print(f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})")

    # adjust learning rate
    scheduler.step()

print("=> training finished")
print(f"additional opts: {opts}")
print(f"best val mae: {best_val_mae:.3f}")

100%|██████████| 31/31 [1:00:01<00:00, 116.17s/it, stage=train, epoch=0, loss=0.0983, acc=0, correct=0, sample_num=128]
100%|██████████| 12/12 [04:17<00:00, 21.46s/it, stage=val, epoch=0, loss=0.0919, acc=0.00333, correct=1, sample_num=92] 


=> [epoch 000] best val mae was improved from 10000.000 to 11.524


 19%|█▉        | 6/31 [10:07<42:16, 101.45s/it, stage=train, epoch=1, loss=0.0891, acc=0, correct=0, sample_num=128]

In [7]:
import numpy as np
import torch.nn.functional as F 
x = np.load("bla.npy")
x = torch.from_numpy(x)
outputs = model(x)
classes = torch.arange(0, 101).type(torch.FloatTensor)
outputs = F.softmax(outputs, dim=1)@classes
outputs

tensor([33.], grad_fn=<MvBackward0>)