## Import required packages and functions

In [1]:
import os
import torch
from datetime import datetime
import argparse
from torch.utils.tensorboard import SummaryWriter

from DatasetClass import CarlaMotionSeg
from train import train, run_val
from utils_train import get_dataloaders, setup_logger

## Define Command Line Arguments + Define Setup Function (can be left unchanged)

In [2]:
def parse():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lr", default=1.25e-5, type=float, help='Learning rate - default: 5e-3')
    parser.add_argument("--batch_size", default=2, type=int, help='Default=2')
    parser.add_argument("--epochs", default=50, type=int, help='Default=50')
    parser.add_argument("--loss_type", default='focal', type=str, help='Loss types available - focal, bce')
    parser.add_argument("--patience", default=3, type=int, help='Default=3')
    parser.add_argument("--lr_scheduler_factor", default=0.5, type=float, help="Learning rate multiplier - default: 3")
    parser.add_argument("--alpha", default=0.25, type=float, help='Focal loss alpha - default: 0.25')
    parser.add_argument("--gamma", default=2.0, type=float, help='Focal loss gamma - default: 2')
    parser.add_argument("--load_chkpt", '-chkpt', default='0', type=str, help="Loading entire checkpoint path for inference/continue training")
    parser.add_argument("--dataset_fraction", default=0.002, type=float, help="fraction of dataset to be used")
    return parser

def train_setup(args):
    # data_root = os.path.join(root, "datasets/Extended_MOD_Masks/")
    data_root = os.path.join(args.root, "datasets/Carla_Annotation/Carla_Export/")
    log_root = os.path.join(args.root, "logs/")
    root_tb = os.path.join(args.root, "runs/")
    args.root_tb = root_tb

    # define string needed for logging
    args.now = datetime.now()
    now_string = args.now.strftime(f"%d-%m-%Y_%H-%M_{args.batch_size}_{args.lr}_{args.epochs}")
    
    # setup logging
    args, logger = setup_logger(args, log_root, now_string)

    # log general info
    logger.info(f"running with lr={args.lr}, batch_size={args.batch_size}, epochs={args.epochs}, loss_type = {args.loss_type}, patience={args.patience}, lr_scheduler_factor={args.lr_scheduler_factor} alpha={args.alpha}, gamma={args.gamma}")
    logger.info(f"running on '{args.device}'")

    # define dataset and get data loaders
    dataset = CarlaMotionSeg(data_root)
    train_loader, val_loader, test_loader = get_dataloaders(dataset, args)

    # initialize tensorboard
    args.writer = SummaryWriter(os.path.join(root_tb, now_string))

    return args, logger, train_loader, val_loader

## Specify the root and run training

In [3]:
args = parse().parse_args("")
args.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
args.root = "/storage/remote/atcremers40/motion_seg/"

args, logger, train_loader, val_loader = train_setup(args)
train(args, train_loader, val_loader, None, logger)

[INFO] running with lr=1.25e-05, batch_size=2, epochs=50, loss_type = focal, patience=3, lr_scheduler_factor=0.5 alpha=0.25, gamma=2.0
[INFO] running on 'cuda:0'
dirs loaded:
['0000', '0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009', '0010', '0011', '0012', '0013', '0014', '0015', '0016', '0017', '0018', '0019', '0020', '0021', '0022', '0023', '0024', '0025']
[INFO] loaded model of type: <class 'ModelClass.UNET'>
train network ...
[INFO] Epoch [1/50] with lr 1.25e-05, train loss: 147278.39844, val loss: 91302.375, IoU: 1.0, ETA: 0.06 hrs
[INFO] Epoch [2/50] with lr 1.25e-05, train loss: 140562.03125, val loss: 92290.27344, IoU: 0.0, ETA: 0.06 hrs


KeyboardInterrupt: 

In [4]:
%load_ext tensorboard

In [8]:
%tensorboard --logdir args.root_tb