#### Imports

In [1]:
# import the project directory here to find the emtl package
import os, sys
project_dir = os.path.abspath('..')

# if the kernel wasn't restarted, the folder might still be there
if project_dir not in sys.path: 
    sys.path.append(project_dir)

In [2]:
import torch
from torchvision import models as M
from torchvision import datasets as D
from torchvision import transforms as T

# EMTL Library Imports
from emtl import Task, MultiHeadedDatasetTask, Trainer
from emtl.algorithms import SequentialTraining

#### Dataset & Head Class

In [3]:
'''
Type of target to use, attr, identity, bbox, or landmarks. Can also be a list to output a tuple with all specified target types. The targets represent:
- attr (Tensor shape=(40,) dtype=int): binary (0, 1) labels for attributes
- identity (int): label for each person (data points with the same identity are the same person)
- bbox (Tensor shape=(4,) dtype=int): bounding box (x, y, width, height)
- landmarks (Tensor shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x, righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
'''
trainset = D.CelebA(root='../data', split='train', target_type=['attr', 'bbox'], download=True,
                   transform=T.ToTensor(), target_transform=lambda t : (t[0].float(), t[1].float()))
testset = D.CelebA(root='../data', split='test', target_type=['attr', 'bbox'], download=True,
                   transform=T.ToTensor(), target_transform=lambda t : (t[0].float(), t[1].float()))

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class Head(torch.nn.Module):
    def __init__(self, out_neurons: int) -> None:
        super().__init__()
        self.model = torch.nn.Linear(2048, out_neurons)

    def forward(self, x: torch.Tensor, **kwargs):
        out = self.model(x)
        return out

#### Metric Functions

In [5]:
def relative_error(pred, true):
    return abs(pred - true) / true

def multilabel_binary_classification_accuracy(pred, true):
    return (pred.round().bool() == true.bool()).float().mean()

#### Create Tasks (w/ Heads)

In [6]:
# Task specs are formatted as: name, model, criterion, metrics
celeba_features_task_spec = (
    'Features',
    Head(out_neurons=40),
    torch.nn.BCEWithLogitsLoss(reduction='mean'),
    {'Accuracy': lambda pred, true : multilabel_binary_classification_accuracy(pred, true).item()}
)

celeba_bbox_task_spec = (
    'BBox',
    Head(out_neurons=4),
    torch.nn.MSELoss(reduction='mean'),
    {'Relative Accuracy': lambda pred, true : (1 - relative_error(pred, true)).mean().item()}
)

celeba_multitask = MultiHeadedDatasetTask(
    name = 'CelebA',
    # trainset = torch.utils.data.Subset(testset, range(100)), 
    # testset = torch.utils.data.Subset(testset, range(100, 104)),
    trainset = trainset, 
    testset = testset,
    tasks_specs=[celeba_features_task_spec, celeba_bbox_task_spec],
    optimizer_fn = torch.optim.Adam,
    scheduler_fn = torch.optim.lr_scheduler.ReduceLROnPlateau,
    config = '../configs/tasks/CelebA.ini',
    dataloader_params = {'batch_size': 64}
)

#### Create & Launch Trainer (w/ Backbone)

In [7]:
# make a ResNet50 backbone removing the last two layers (fc and avgpool)
backbone = M.resnet50(replace_stride_with_dilation=[False, True, True])
backbone = torch.nn.Sequential(*list(backbone.children())[:-2], torch.nn.AdaptiveAvgPool2d(1), torch.nn.Flatten())

trainer = Trainer(
    backbone = backbone,
    tasks = [celeba_multitask],
    algorithm = SequentialTraining(epochs=50),
    config='../config.ini',
    mlflow_database='sqlite:///../mlflow.db'
)

# train the model
trainer.launch()

CelebA:   0%|          | 0/50 [00:00<?, ?it/s]