In [1]:
import torch.optim as optim
import torch.utils.data.sampler as sampler

from auto_lambda import AutoLambda
from create_network import *
from create_dataset import *
from utils import *

In [6]:
# Options for training
opt = {
  "mode": "none", 
  "port": "none",

  # Model parameters
  "task": "depth", # seg, depth, normal
  "dataset": "nyuv2", # nyuv2, cityscapes
  "network": "split", # split, mtan

  # Training parameters
  "total_epochs" : 1,
  "batch_size": 4,
  "gpu": 0, # gpu ID
  "seed": 42,
}

# Define dataset paths and loaders
dataset_paths = {
  "nyuv2": "dataset/nyuv2",
  "cityscapes": "dataset/cityscapes"
}

model_classes = {
  "split": MTLDeepLabv3,
  "mtan": MTANDeepLabv3
}

In [7]:
torch.manual_seed(opt["seed"])
np.random.seed(opt["seed"])
random.seed(opt["seed"])

# device = torch.device(f"cuda:{opt['gpu']}" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
# Create logging folder to store training weights and losses
os.makedirs("logging", exist_ok=True)

train_tasks = create_task_flags(opt["task"], opt["dataset"])
print(f"Training Task: {opt['dataset'].title()} - {opt['task'].title()} in Single Task Learning Mode with {opt['network'].upper()}")

# Initialize model
model = model_classes[opt["network"]](train_tasks).to(device)
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: {opt['network'].title()} | Number of Trainable Parameters: {num_params/1e6:.2f}M")

# Initialize optimizer and scheduler
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4, momentum=0.9)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, opt["total_epochs"])

Training Task: Nyuv2 - Depth in Single Task Learning Mode with SPLIT
Model: Split | Number of Trainable Parameters: 39.63M


In [9]:
torch_save(model, 'logging/depth_model.pt')

In [None]:
if opt["dataset"] in dataset_paths:
    dataset_path = dataset_paths[opt["dataset"]]
    
    # Initialize datasets
    if opt["dataset"] == 'nyuv2':
        train_set = NYUv2(root=dataset_path, train=True, augmentation=True)
        test_set = NYUv2(root=dataset_path, train=False)

    elif opt["dataset"] == 'cityscapes':
        train_set = CityScapes(root=dataset_path, train=True, augmentation=True)
        test_set = CityScapes(root=dataset_path, train=False)
    
    # Initialize data loaders
    train_loader = torch.utils.data.DataLoader(
        dataset=train_set,
        batch_size=opt["batch_size"],
        shuffle=True,
        num_workers=4
    )
    test_loader = torch.utils.data.DataLoader(
        dataset=test_set,
        batch_size=opt["batch_size"],
        shuffle=False
    )
else:
    raise ValueError(f"Unsupported dataset: {opt['dataset']}")

In [4]:
# Train and evaluate multi-task network
train_batch = len(train_loader)
test_batch = len(test_loader)
train_metric = TaskMetric(train_tasks, train_tasks, opt["batch_size"], opt["total_epochs"], opt["dataset"])
test_metric = TaskMetric(train_tasks, train_tasks, opt["batch_size"], opt["total_epochs"], opt["dataset"])

# Training loop
for index in range(opt["total_epochs"]):

    # evaluating train data
    model.train()
    train_dataset = iter(train_loader)
    for k in range(train_batch):
        train_data, train_target = train_dataset.next()
        train_data = train_data.to(device)
        train_target = {task_id: train_target[task_id].to(device) for task_id in train_tasks.keys()}

        train_pred = model(train_data)
        optimizer.zero_grad()

        train_loss = [compute_loss(train_pred[i], train_target[task_id], task_id) for i, task_id in enumerate(train_tasks)]
        train_loss[0].backward()
        optimizer.step()

        train_metric.update_metric(train_pred, train_target, train_loss)

    train_str = train_metric.compute_metric()
    train_metric.reset()

    # evaluating test data
    model.eval()
    with torch.no_grad():
        test_dataset = iter(test_loader)
        for k in range(test_batch):
            test_data, test_target = test_dataset.next()
            test_data = test_data.to(device)
            test_target = {task_id: test_target[task_id].to(device) for task_id in train_tasks.keys()}

            test_pred = model(test_data)
            test_loss = [compute_loss(test_pred[i], test_target[task_id], task_id) for i, task_id in enumerate(train_tasks)]

            test_metric.update_metric(test_pred, test_target, test_loss)

    test_str = test_metric.compute_metric()
    test_metric.reset()

    scheduler.step()

    print(f"Epoch {index:04d} | TRAIN:{train_str} || TEST:{test_str} | Best: {opt['task'].title()} {test_metric.get_best_performance(opt['task']):.4f}")

    task_dict = {"train_loss": train_metric.metric, "test_loss": test_metric.metric}
    np.save("logging/stl_{}_{}_{}_{}.npy".format(opt.network, opt.dataset, opt.task, opt.seed), task_dict)

Dataset: Nyuv2 | Training Task: Seg + Depth + Normal | Primary Task: Seg + Depth + Normal in Multi-task / Auxiliary Learning Mode with SPLIT
Applying Multi-task Methods | Weighting-based: Equal + Gradient-based: NONE
Model: Split | Number of Trainable Parameters: 71.89M
