In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import argparse
from torch.autograd import Variable
import torch.utils.data as data
#CHANGE
from data import v2, v1, AnnotationTransform, VOCDetection, detection_collate, VOC_CLASSES
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import numpy as np
import time
from commonData import commonDataset
from logger import Logger

In [2]:
def str2bool(v):
    return v.lower() in ("yes", "true", "t", "1")

#CHANGE
cocoimgPath = "/new_data/gpu/utkrsh/coco/images/train2014/"
annFilePath = "/new_data/gpu/utkrsh/coco/annotations/instances_train2014.json"
# RESUME = "./weights/ssd300_0712_COCO14_2000_run2_BCELoss.pth" # change to saved model file path
RESUME = None
START_ITER = 1
CUDA = True
VOCroot = "/users/gpu/utkrsh/data/VOCdevkit/"
logFolder = "./.logs/run1_singleGPU_BCELoss/"
logger = Logger(logFolder)
logTensorboard = True

In [3]:
version ='v2'
basenet ='vgg16_reducedfc.pth'
jaccard_threshold = 0.5
batch_size = 16
resume = RESUME
num_workers = 4
iterations = 120000
start_iter = START_ITER
cuda = CUDA
lr = 1e-3
momentum = 0.9
weight_decay = 5e-4
gamma = 0.1
log_iters = False
visdom = False
send_images_to_visdom = False
save_folder = 'weights/'
cocoimg = cocoimgPath
annFile = annFilePath
voc_root = VOCroot

In [None]:
if cuda and torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

cfg = (v1, v2)[version == 'v2']

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

train_sets = [('2007', 'trainval')]
# train_sets = 'train'
ssd_dim = 300  # only support 300 now
means = (104, 117, 123)  # only support voc now
num_classes = len(VOC_CLASSES) + 1
batch_size = batch_size
accum_batch_size = 32
iter_size = accum_batch_size / batch_size
max_iter = 30000
weight_decay = 0.0005
stepvalues = (80000, 100000, 120000)
gamma = 0.1
momentum = 0.9

if visdom:
    import visdom
    viz = visdom.Visdom()

ssd_net = build_ssd('train', 300, num_classes)
net = ssd_net

if False:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True

if resume:
    print('Resuming training, loading {}...'.format(resume))
    ssd_net.load_weights(resume)
else:
    vgg_weights = torch.load(save_folder + basenet)
    print('Loading base network...')
    ssd_net.vgg.load_state_dict(vgg_weights)

if cuda:
    net = net.cuda()


def xavier(param):
    init.xavier_uniform(param)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()


if not resume:
    print('Initializing weights...')
    # initialize newly added layers' weights with xavier method
    ssd_net.extras.apply(weights_init)
    ssd_net.loc.apply(weights_init)
    ssd_net.conf.apply(weights_init)
#CHANGE
    ssd_net.dmn.apply(weights_init)

optimizer = optim.SGD(net.parameters(), lr=lr,
                      momentum=momentum, weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, cuda)

Loading base network...
Initializing weights...


dataset = commonDataset(voc_root, train_sets, ssd_dim, means,
                cocoimg, annFile)

data_loader = data.DataLoader(dataset, batch_size, num_workers=num_workers,
                              shuffle=True, collate_fn=detection_collate, pin_memory=True)
batch_iter = iter(data_loader)
  

img, targets = next(batch_iter)

net

%env CUDA_LAUNCH_BLOCKING=1

In [None]:
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    print('Loading Dataset...')

#CHANGE
    dataset = commonDataset(voc_root, train_sets, ssd_dim, means,
                cocoimg, annFile)
    #dataset = VOCDetection(voc_root, train_sets, SSDAugmentation(
    #    ssd_dim, means), AnnotationTransform())

    epoch_size = len(dataset) // batch_size
    print('Training SSD on', dataset.name)
    step_index = 0
    if visdom:
        # initialize visdom loss plot
        lot = viz.line(
            X=torch.zeros((1,)).cpu(),
            Y=torch.zeros((1, 3)).cpu(),
            opts=dict(
                xlabel='Iteration',
                ylabel='Loss',
                title='Current SSD Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
        epoch_lot = viz.line(
            X=torch.zeros((1,)).cpu(),
            Y=torch.zeros((1, 3)).cpu(),
            opts=dict(
                xlabel='Epoch',
                ylabel='Loss',
                title='Epoch SSD Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
    batch_iterator = None
    data_loader = data.DataLoader(dataset, batch_size, num_workers=num_workers,
                                  shuffle=True, collate_fn=detection_collate, pin_memory=True)
    for iteration in range(start_iter, max_iter):
        if (not batch_iterator) or (iteration % epoch_size == 0):
            # create batch iterator
            batch_iterator = iter(data_loader)
        if iteration in stepvalues:
            step_index += 1
            adjust_learning_rate(optimizer, gamma, step_index)
            if visdom:
                viz.line(
                    X=torch.ones((1, 3)).cpu() * epoch,
                    Y=torch.Tensor([loc_loss, conf_loss,
                        loc_loss + conf_loss]).unsqueeze(0).cpu() / epoch_size,
                    win=epoch_lot,
                    update='append'
                )
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        # load train data
        images, targets = next(batch_iterator)

        if cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda(), volatile=True) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
#CHANGE
        loss_l, loss_c, loss_d = criterion(out, targets)
        loss = loss_l + loss_c + loss_d

        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]
        if iteration % 10 == 0:
            print('Timer: %.4f sec.' % (t1 - t0))
            print("iter : "+repr(iteration)+" || loc: %.4f || conf: %.4f || dom: %.4f || loss: %.4f ||\n" %
                       (loss_l.data[0], loss_c.data[0], loss_d.data[0], loss.data[0]) )
            if visdom and send_images_to_visdom:
                random_batch_index = np.random.randint(images.size(0))
                viz.image(images.data[random_batch_index].cpu().numpy())
            if logTensorboard:
                info = {
                    'loc_loss' : loss_l.data[0],
                    'conf_loss' : loss_c.data[0],
                'domain_loss' : loss_d.data[0],
                'loss' : loss.data[0]
                    }
                
                for tag, value in info.items():
                    logger.scalar_summary(tag, value, iteration)
                
                def to_np(x):
                    return x.data.cpu().numpy()
                
                for tag, value in net.named_parameters():
                    tag = tag.replace('.','/')
                    logger.histo_summary(tag, to_np(value), iteration)
                    logger.histo_summary(tag+'/grad', to_np(value.grad), iteration)
                    

        if iteration % 2000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), 'weights/ssd300_0712_COCO14_' +
                       repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(), save_folder + '' + version + '.pth')


def adjust_learning_rate(optimizer, gamma, step):
    """Sets the learning rate to the initial LR decayed by 10 at every specified step
    # Adapted from PyTorch Imagenet example:
    # https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    lr = lr * (gamma ** (step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


if __name__ == '__main__':
    train()

Loading Dataset...
loading annotations into memory...
Done (t=9.65s)
creating index...
index created!
Training SSD on VOC07_and_COCO14_train


  return tensor.masked_fill_(mask, value)


Timer: 0.2019 sec.
iter : 10 || loc: 3.2641 || conf: 14.4058 || dom: 0.0020 || loss: 17.6719 ||

Timer: 0.1986 sec.
iter : 20 || loc: 3.0107 || conf: 12.8385 || dom: 0.0024 || loss: 15.8516 ||

Timer: 0.1993 sec.
iter : 30 || loc: 3.1137 || conf: 11.0757 || dom: 0.0032 || loss: 14.1926 ||

Timer: 0.1968 sec.
iter : 40 || loc: 3.3932 || conf: 9.2630 || dom: 0.0017 || loss: 12.6579 ||

Timer: 0.1989 sec.
iter : 50 || loc: 2.8676 || conf: 7.7530 || dom: 0.0028 || loss: 10.6233 ||

Timer: 0.2013 sec.
iter : 60 || loc: 3.1021 || conf: 8.4156 || dom: 0.0023 || loss: 11.5201 ||

Timer: 0.1910 sec.
iter : 70 || loc: 2.9545 || conf: 7.1044 || dom: 0.0032 || loss: 10.0621 ||

Timer: 0.2003 sec.
iter : 80 || loc: 2.9028 || conf: 6.3668 || dom: 0.0023 || loss: 9.2719 ||

Timer: 0.1960 sec.
iter : 90 || loc: 3.2444 || conf: 5.7655 || dom: 0.0027 || loss: 9.0126 ||

Timer: 0.2078 sec.
iter : 100 || loc: 2.9452 || conf: 5.7137 || dom: 0.0023 || loss: 8.6613 ||

Timer: 0.1951 sec.
iter : 110 || loc: 3

Timer: 0.2033 sec.
iter : 870 || loc: 2.0996 || conf: 5.0295 || dom: 0.0017 || loss: 7.1308 ||

Timer: 0.2040 sec.
iter : 880 || loc: 2.2734 || conf: 4.4064 || dom: 0.0024 || loss: 6.6822 ||

Timer: 0.2080 sec.
iter : 890 || loc: 2.4595 || conf: 4.7166 || dom: 0.0016 || loss: 7.1777 ||

Timer: 0.2191 sec.
iter : 900 || loc: 2.5670 || conf: 4.7494 || dom: 0.0025 || loss: 7.3189 ||

Timer: 0.2047 sec.
iter : 910 || loc: 2.0092 || conf: 4.7841 || dom: 0.0018 || loss: 6.7951 ||

Timer: 0.2116 sec.
iter : 920 || loc: 1.9685 || conf: 4.0702 || dom: 0.0018 || loss: 6.0405 ||

Timer: 0.1994 sec.
iter : 930 || loc: 1.8564 || conf: 4.9184 || dom: 0.0031 || loss: 6.7779 ||

Timer: 0.2035 sec.
iter : 940 || loc: 1.9761 || conf: 5.0237 || dom: 0.0018 || loss: 7.0016 ||

Timer: 0.2057 sec.
iter : 950 || loc: 2.2445 || conf: 4.2190 || dom: 0.0034 || loss: 6.4669 ||

Timer: 0.2565 sec.
iter : 960 || loc: 2.3403 || conf: 4.6682 || dom: 0.0019 || loss: 7.0104 ||

Timer: 0.2084 sec.
iter : 970 || loc: 2.

Timer: 0.2030 sec.
iter : 1720 || loc: 2.3592 || conf: 4.6082 || dom: 0.0025 || loss: 6.9699 ||

Timer: 0.1964 sec.
iter : 1730 || loc: 1.6881 || conf: 4.9300 || dom: 0.0025 || loss: 6.6206 ||

Timer: 0.1947 sec.
iter : 1740 || loc: 2.2603 || conf: 4.6846 || dom: 0.0020 || loss: 6.9470 ||

Timer: 0.1963 sec.
iter : 1750 || loc: 2.1926 || conf: 3.8357 || dom: 0.0019 || loss: 6.0302 ||

Timer: 0.1943 sec.
iter : 1760 || loc: 2.0160 || conf: 4.5423 || dom: 0.0023 || loss: 6.5606 ||

Timer: 0.2031 sec.
iter : 1770 || loc: 2.4671 || conf: 4.5771 || dom: 0.0028 || loss: 7.0470 ||

Timer: 0.2052 sec.
iter : 1780 || loc: 1.5742 || conf: 4.2502 || dom: 0.0021 || loss: 5.8266 ||

Timer: 0.2066 sec.
iter : 1790 || loc: 1.8776 || conf: 4.8358 || dom: 0.0024 || loss: 6.7158 ||

Timer: 0.2002 sec.
iter : 1800 || loc: 1.9287 || conf: 4.5160 || dom: 0.0020 || loss: 6.4466 ||

Timer: 0.1971 sec.
iter : 1810 || loc: 2.0531 || conf: 4.1307 || dom: 0.0021 || loss: 6.1859 ||

Timer: 0.2027 sec.
iter : 1820