In [1]:
#Import essential libraries
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import time
import numpy as np
import tensorflow as tf

from tensorflow.keras import Input, Model, losses, metrics, optimizers

from data.make_dataset import build_dataset
from model.build_model import LeviHassnerBackbone, MultiTaskHead
from utils.train_multitask import train_model_multitask

In [2]:
#Training configurations
EPOCHS = 10

LOG_DIR = "/export/home/aneezahm001/nndl_2/age_gender/logs/celeba" 
log_folder = str(round(time.time())) + "_celeba_pretrain"
LOG_DIR = os.path.join(LOG_DIR, log_folder)
LOG_FILE = os.path.join(LOG_DIR, "training.log.tsv")

PRETRAIN_PATH = None
INPUT_SHAPE = (227, 227, 3)
DROPOUT = 0.5
INITIALIZER = "None"
SAVE_DIR = "/export/home/aneezahm001/nndl_2/checkpoints/celeba_age_gender"

#Optimizer configurations
OPTIMIZER_NAME = "sgd" #Name of the optimizer which will be used when loading the optimizer function
OPTIMIZER_ALPHA = 1e-2 #learning rate of the optimizer
OPTIMIZER_GAMMA = 5e-4 #Weight decay to apply to the optimizer
OPTIMIZER_MOMENTUM = 0. #Momentum for SGD. Does not apply to adam.

In [3]:
#Build training and validation datasets
train_ds, val_ds = build_dataset("/export/home/aneezahm001/nndl_2/celeba-4/output", batch_size=128)

TFRecord files for training: 
 ['/export/home/aneezahm001/nndl_2/celeba-4/output/train-00006-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00000-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00003-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00004-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00002-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00009-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00008-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00005-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00007-of-00010', '/export/home/aneezahm001/nndl_2/celeba-4/output/train-00001-of-00010']
TFRecord files for training: 
 ['/export/home/aneezahm001/nndl_2/celeba-4/output/val-00001-of-00002', '/export/home/aneezahm001/nndl_2/celeba-4/output/val-00000-of-00002']


In [4]:
#Define multi-task levi-hassner model
input = tf.keras.layers.Input(shape=(227, 227, 3))

backbone = LeviHassnerBackbone(
        weight_decay = OPTIMIZER_GAMMA,
        dropout_prob = DROPOUT,
        include_head = False,
        initializer = INITIALIZER,
)


head = MultiTaskHead(
        num_classes_1=6,
        num_classes_2=1,
        activation_1="softmax",
        activation_2="sigmoid",
        name_1="age",
        name_2="gender",
)


input = Input(shape=(227, 227, 3))
latent = backbone(input)
output = head(latent)

model = Model(inputs=input, outputs=output)

In [5]:
#Build model
model.build(input_shape=(None, 227, 227, 3))
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 227, 227, 3)]     0         
_________________________________________________________________
levi_hassner_backbone (LeviH (None, 512)               8855200   
_________________________________________________________________
multi_task_head (MultiTaskHe [(None, 6), (None, 1)]    3591      
Total params: 8,858,791
Trainable params: 8,858,711
Non-trainable params: 80
_________________________________________________________________


In [6]:
#Initialize optimizer, criterion, and accuracy metrics for Multi-Task learning
optimizer = optimizers.SGD(
    learning_rate = OPTIMIZER_ALPHA,
    momentum = OPTIMIZER_MOMENTUM,
    name = OPTIMIZER_NAME,
)

criterions = [
    losses.SparseCategoricalCrossentropy(),
    losses.BinaryCrossentropy()
]

eval_metrics = [
    metrics.SparseCategoricalAccuracy(),
    metrics.BinaryAccuracy(),
]

In [8]:
#Train model using custom training loop
model = train_model_multitask(
    model, train_ds, val_ds,
    optimizer=optimizer,
    epochs=EPOCHS,
    loss_fn_1=criterions[0],
    loss_fn_2=criterions[1],
    acc_metric_1=eval_metrics[0],
    acc_metric_2=eval_metrics[1],
    log_file=LOG_FILE,
    tensorboard_dir=LOG_DIR,
)

Start of epoch 1
Combined training loss at step 500: 1.6619 
Combined training loss at step 1000: 1.4662 
Training accuracy on task 1 over epoch 1: 0.4843 
Training accuracy on task 2 over epoch 1: 0.8347 

Validation accuracy on task 1 over epoch 1: 0.5010
Validation accuracy on task 2 over epoch 1: 0.9236
Time taken for epoch 1: 457.35s

Start of epoch 2
Combined training loss at step 500: 1.1495 
Combined training loss at step 1000: 1.2942 
Training accuracy on task 1 over epoch 2: 0.5088 
Training accuracy on task 2 over epoch 2: 0.9456 

Validation accuracy on task 1 over epoch 2: 0.5009
Validation accuracy on task 2 over epoch 2: 0.9297
Time taken for epoch 2: 453.94s

Start of epoch 3
Combined training loss at step 500: 1.1150 
Combined training loss at step 1000: 1.1479 
Training accuracy on task 1 over epoch 3: 0.5265 
Training accuracy on task 2 over epoch 3: 0.9592 

Validation accuracy on task 1 over epoch 3: 0.4140
Validation accuracy on task 2 over epoch 3: 0.9556
Time ta

In [9]:
#Save the weights of the model backbone
model.layers[1].save_weights(SAVE_DIR)

In [10]:
%tensorboard --logdir LOG_DIR

UsageError: Line magic function `%tensorboard` not found.
