In [1]:
import torch
import numpy as np
import utils
import trainer
from torch.utils.tensorboard import SummaryWriter

In [13]:
def mixup_data(data, label, alpha=0.2, device='cuda'):
    lamb = np.random.beta(alpha, alpha)
    idx = torch.randperm(data.shape[0]).to(device)
    data_2 = data[idx]
    label_2 = label[idx]
    mixed_data = lamb * data + (1 - lamb) * data_2
    return mixed_data, label_2, lamb


In [15]:
def mixup_loss(lamb, pred, label_1, label_2, loss_fn):
    loss = lamb * loss_fn(pred, label_1) + (1 - lamb) * loss_fn(pred, label_2)
    return loss 

In [None]:
def train(args, model, optimizer, scheduler=None, model_name='model'):
    # TODO Q1.5: Initialize your tensorboard writer here!
    writer = SummaryWriter("runs/resnet_pre_mixup") 
    train_loader = utils.get_data_loader(
        'voc', train=True, batch_size=args.batch_size, split='trainval', inp_size=args.inp_size)
    test_loader = utils.get_data_loader(
        'voc', train=False, batch_size=args.test_batch_size, split='test', inp_size=args.inp_size)

    # Ensure model is in correct mode and on right device
    model.train()
    model = model.to(args.device)
    loss_fn = torch.nn.BCEWithLogitsLoss()

    cnt = 0
    for epoch in range(args.epochs):
        for batch_idx, (data, target, wgt) in enumerate(train_loader):
            # Get a batch of data
            data, target, wgt = data.to(args.device), target.to(args.device), wgt.to(args.device)
            mixed_up_data, target_2, lamb = mixup_data(data, target)
            optimizer.zero_grad()
            # Forward pass
            output = model(mixed_up_data)
            # Calculate the loss
            # TODO Q1.4: your loss for multi-label classification
            loss = mixup_loss(lamb, output, target, target_2, loss_fn)
            # Calculate gradient w.r.t the loss
            loss.backward()
            # Optimizer takes one step
            optimizer.step()
            # Log info
            if cnt % args.log_every == 0:
                # TODO Q1.5: Log training loss to tensorboard
                writer.add_scalar("training loss", loss.item(), cnt)
                print('Train Epoch: {} [{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, cnt, 100. * batch_idx / len(train_loader), loss.item()))
                # TODO Q3.2: Log histogram of gradients
                for name, params in model.named_parameters():
                    writer.add_histogram(name, params.grad.data.cpu().numpy(), cnt)

            # Validation iteration
            if cnt % args.val_every == 0:
                model.eval()
                ap, map = utils.eval_dataset_map(model, args.device, test_loader)
                # TODO Q1.5: Log MAP to tensorboard
                writer.add_scalar("MAP", map, cnt)
                model.train()
            cnt += 1
            torch.cuda.empty_cache()
        # TODO Q3.2: Log Learning rate
        if scheduler is not None:
            scheduler.step()
            curr_lr = scheduler.get_last_lr()
            writer.add_scalar("Learning Rate", curr_lr[0], epoch)

        # save model
        if trainer.save_this_epoch(args, epoch):
            trainer.save_model(epoch, model_name, model)

    # Validation iteration
    test_loader = utils.get_data_loader('voc', train=False, batch_size=args.test_batch_size, split='test', inp_size=args.inp_size)
    ap, map = utils.eval_dataset_map(model, args.device, test_loader)
    writer.flush()
    writer.close()
    return ap, map