In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import argparse
from torch.autograd import Variable
import torch.utils.data as data
#CHANGE
from data import v2, v1, AnnotationTransform, VOCDetection, detection_collate, VOC_CLASSES
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import numpy as np
import time
from commonData import commonDataset

def str2bool(v):
    return v.lower() in ("yes", "true", "t", "1")

#CHANGE
cocoimgPath = "/new_data/gpu/utkrsh/coco/images/train2014/"
annFilePath = "/new_data/gpu/utkrsh/coco/annotations/instances_train2014.json"
RESUME = "./weights/ssd300_0712_COCO14_2000_run2_BCELoss.pth" # change to saved model file path
START_ITER = 2001
CUDA = True
VOCroot = "/users/gpu/utkrsh/data/VOCdevkit/"

In [2]:
version ='v2'
basenet ='vgg16_reducedfc.pth'
jaccard_threshold = 0.5
batch_size = 16
resume = RESUME
num_workers = 4
iterations = 120000
start_iter = START_ITER
cuda = CUDA
lr = 1e-3
momentum = 0.9
weight_decay = 5e-4
gamma = 0.1
log_iters = False
visdom = False
send_images_to_visdom = False
save_folder = 'weights/'
cocoimg = cocoimgPath
annFile = annFilePath
voc_root = VOCroot

In [3]:
if cuda and torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

cfg = (v1, v2)[version == 'v2']

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

train_sets = [('2007', 'trainval')]
# train_sets = 'train'
ssd_dim = 300  # only support 300 now
means = (104, 117, 123)  # only support voc now
num_classes = len(VOC_CLASSES) + 1
batch_size = batch_size
accum_batch_size = 32
iter_size = accum_batch_size / batch_size
max_iter = 120000
weight_decay = 0.0005
stepvalues = (80000, 100000, 120000)
gamma = 0.1
momentum = 0.9

if visdom:
    import visdom
    viz = visdom.Visdom()

ssd_net = build_ssd('train', 300, num_classes)
net = ssd_net

if False:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True

if resume:
    print('Resuming training, loading {}...'.format(resume))
    ssd_net.load_weights(resume)
else:
    vgg_weights = torch.load(save_folder + basenet)
    print('Loading base network...')
    ssd_net.vgg.load_state_dict(vgg_weights)

if cuda:
    net = net.cuda()


def xavier(param):
    init.xavier_uniform(param)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()


if not resume:
    print('Initializing weights...')
    # initialize newly added layers' weights with xavier method
    ssd_net.extras.apply(weights_init)
    ssd_net.loc.apply(weights_init)
    ssd_net.conf.apply(weights_init)
#CHANGE
    ssd_net.dmn.apply(weights_init)

optimizer = optim.SGD(net.parameters(), lr=lr,
                      momentum=momentum, weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, cuda)

Resuming training, loading ./weights/ssd300_0712_COCO14_2000_run2_BCELoss.pth...
Loading weights into state dict...
Finished!


dataset = commonDataset(voc_root, train_sets, ssd_dim, means,
                cocoimg, annFile)

data_loader = data.DataLoader(dataset, batch_size, num_workers=num_workers,
                              shuffle=True, collate_fn=detection_collate, pin_memory=True)
batch_iter = iter(data_loader)
  

img, targets = next(batch_iter)

In [4]:
net

SSD (
  (vgg): ModuleList (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU (inplace)
    (4): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU (inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU (inplace)
    (9): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU (inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU (inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU (inplace)
    (16): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding

In [5]:
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [6]:
def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    print('Loading Dataset...')

#CHANGE
    dataset = commonDataset(voc_root, train_sets, ssd_dim, means,
                cocoimg, annFile)
    #dataset = VOCDetection(voc_root, train_sets, SSDAugmentation(
    #    ssd_dim, means), AnnotationTransform())

    epoch_size = len(dataset) // batch_size
    print('Training SSD on', dataset.name)
    step_index = 0
    if visdom:
        # initialize visdom loss plot
        lot = viz.line(
            X=torch.zeros((1,)).cpu(),
            Y=torch.zeros((1, 3)).cpu(),
            opts=dict(
                xlabel='Iteration',
                ylabel='Loss',
                title='Current SSD Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
        epoch_lot = viz.line(
            X=torch.zeros((1,)).cpu(),
            Y=torch.zeros((1, 3)).cpu(),
            opts=dict(
                xlabel='Epoch',
                ylabel='Loss',
                title='Epoch SSD Training Loss',
                legend=['Loc Loss', 'Conf Loss', 'Loss']
            )
        )
    batch_iterator = None
    data_loader = data.DataLoader(dataset, batch_size, num_workers=num_workers,
                                  shuffle=True, collate_fn=detection_collate, pin_memory=True)
    for iteration in range(start_iter, max_iter):
        if (not batch_iterator) or (iteration % epoch_size == 0):
            # create batch iterator
            batch_iterator = iter(data_loader)
        if iteration in stepvalues:
            step_index += 1
            adjust_learning_rate(optimizer, gamma, step_index)
            if visdom:
                viz.line(
                    X=torch.ones((1, 3)).cpu() * epoch,
                    Y=torch.Tensor([loc_loss, conf_loss,
                        loc_loss + conf_loss]).unsqueeze(0).cpu() / epoch_size,
                    win=epoch_lot,
                    update='append'
                )
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        # load train data
        images, targets = next(batch_iterator)

        if cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda(), volatile=True) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
#CHANGE
        loss_l, loss_c, loss_d = criterion(out, targets)
        loss = loss_l + loss_c + loss_d

        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.data[0]
        conf_loss += loss_c.data[0]
        if iteration % 10 == 0:
            print('Timer: %.4f sec.' % (t1 - t0))
            print("iter : "+repr(iteration)+" || loc: %.4f || conf: %.4f || dom: %.4f || loss: %.4f ||\n" %
                       (loss_l.data[0], loss_c.data[0], loss_d.data[0], loss.data[0]) )
            if visdom and send_images_to_visdom:
                random_batch_index = np.random.randint(images.size(0))
                viz.image(images.data[random_batch_index].cpu().numpy())
        if visdom:
            viz.line(
                X=torch.ones((1, 3)).cpu() * iteration,
                Y=torch.Tensor([loss_l.data[0], loss_c.data[0],
                    loss_l.data[0] + loss_c.data[0]]).unsqueeze(0).cpu(),
                win=lot,
                update='append'
            )
            # hacky fencepost solution for 0th epoch plot
            if iteration == 0:
                viz.line(
                    X=torch.zeros((1, 3)).cpu(),
                    Y=torch.Tensor([loc_loss, conf_loss,
                        loc_loss + conf_loss]).unsqueeze(0).cpu(),
                    win=epoch_lot,
                    update=True
                )
        if iteration % 2000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), 'weights/ssd300_0712_COCO14_' +
                       repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(), save_folder + '' + version + '.pth')


def adjust_learning_rate(optimizer, gamma, step):
    """Sets the learning rate to the initial LR decayed by 10 at every specified step
    # Adapted from PyTorch Imagenet example:
    # https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    lr = lr * (gamma ** (step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


if __name__ == '__main__':
    train()

Loading Dataset...
loading annotations into memory...
Done (t=9.30s)
creating index...
index created!
Training SSD on VOC07_and_COCO14_train


  return tensor.masked_fill_(mask, value)


Timer: 0.2143 sec.
iter : 2010 || loc: 2.9447 || conf: 5.8861 || dom: 0.0017 || loss: 8.8325 ||

Timer: 0.1963 sec.
iter : 2020 || loc: 2.6709 || conf: 7.0412 || dom: 0.0018 || loss: 9.7139 ||

Timer: 0.2027 sec.
iter : 2030 || loc: 2.9040 || conf: 8.1712 || dom: 0.0012 || loss: 11.0764 ||

Timer: 0.2004 sec.
iter : 2040 || loc: 3.0880 || conf: 6.4760 || dom: 0.0012 || loss: 9.5652 ||

Timer: 0.1958 sec.
iter : 2050 || loc: 3.0226 || conf: 6.0363 || dom: 0.0014 || loss: 9.0603 ||

Timer: 0.2007 sec.
iter : 2060 || loc: 2.2830 || conf: 6.0580 || dom: 0.0011 || loss: 8.3421 ||

Timer: 0.1909 sec.
iter : 2070 || loc: 3.1656 || conf: 6.4843 || dom: 0.0016 || loss: 9.6516 ||

Timer: 0.2146 sec.
iter : 2080 || loc: 2.7288 || conf: 5.2525 || dom: 0.0018 || loss: 7.9830 ||

Timer: 0.1998 sec.
iter : 2090 || loc: 2.4715 || conf: 6.1795 || dom: 0.0019 || loss: 8.6529 ||

Timer: 0.2053 sec.
iter : 2100 || loc: 3.0989 || conf: 7.3395 || dom: 0.0016 || loss: 10.4400 ||

Timer: 0.2013 sec.
iter : 21

Timer: 0.2058 sec.
iter : 2860 || loc: 2.5942 || conf: 6.0533 || dom: 0.0013 || loss: 8.6488 ||

Timer: 0.2073 sec.
iter : 2870 || loc: 2.6357 || conf: 6.2141 || dom: 0.0013 || loss: 8.8511 ||

Timer: 0.2225 sec.
iter : 2880 || loc: 2.6515 || conf: 5.4964 || dom: 0.0015 || loss: 8.1495 ||

Timer: 0.2063 sec.
iter : 2890 || loc: 2.0863 || conf: 5.4561 || dom: 0.0016 || loss: 7.5440 ||

Timer: 0.2116 sec.
iter : 2900 || loc: 2.3548 || conf: 6.0109 || dom: 0.0012 || loss: 8.3668 ||

Timer: 0.2071 sec.
iter : 2910 || loc: 2.0717 || conf: 4.8278 || dom: 0.0020 || loss: 6.9015 ||

Timer: 0.2075 sec.
iter : 2920 || loc: 2.8969 || conf: 5.8254 || dom: 0.0011 || loss: 8.7234 ||

Timer: 0.2067 sec.
iter : 2930 || loc: 2.5328 || conf: 5.2631 || dom: 0.0016 || loss: 7.7976 ||

Timer: 0.2042 sec.
iter : 2940 || loc: 2.4907 || conf: 4.5645 || dom: 0.0013 || loss: 7.0565 ||

Timer: 0.2110 sec.
iter : 2950 || loc: 2.2289 || conf: 4.9048 || dom: 0.0013 || loss: 7.1350 ||

Timer: 0.2075 sec.
iter : 2960

Timer: 0.2044 sec.
iter : 3710 || loc: 2.2189 || conf: 4.6975 || dom: 0.0012 || loss: 6.9177 ||

Timer: 0.2015 sec.
iter : 3720 || loc: 2.8690 || conf: 4.9815 || dom: 0.0012 || loss: 7.8518 ||

Timer: 0.2141 sec.
iter : 3730 || loc: 2.2574 || conf: 5.5723 || dom: 0.0012 || loss: 7.8310 ||

Timer: 0.2056 sec.
iter : 3740 || loc: 3.0043 || conf: 4.1275 || dom: 0.0021 || loss: 7.1338 ||

Timer: 0.2022 sec.
iter : 3750 || loc: 2.1645 || conf: 4.3666 || dom: 0.0017 || loss: 6.5328 ||

Timer: 0.2029 sec.
iter : 3760 || loc: 2.1021 || conf: 4.4766 || dom: 0.0014 || loss: 6.5801 ||

Timer: 0.1993 sec.
iter : 3770 || loc: 1.9441 || conf: 4.4447 || dom: 0.0013 || loss: 6.3901 ||

Timer: 0.2065 sec.
iter : 3780 || loc: 2.8262 || conf: 5.0252 || dom: 0.0013 || loss: 7.8526 ||

Timer: 0.2012 sec.
iter : 3790 || loc: 1.9196 || conf: 5.0874 || dom: 0.0013 || loss: 7.0082 ||

Timer: 0.2155 sec.
iter : 3800 || loc: 2.5124 || conf: 5.0619 || dom: 0.0022 || loss: 7.5765 ||

Timer: 0.2020 sec.
iter : 3810

Timer: 0.2072 sec.
iter : 4560 || loc: 2.5865 || conf: 4.0510 || dom: 0.0007 || loss: 6.6382 ||

Timer: 0.2106 sec.
iter : 4570 || loc: 2.3506 || conf: 4.5393 || dom: 0.0012 || loss: 6.8911 ||

Timer: 0.1976 sec.
iter : 4580 || loc: 2.2670 || conf: 4.3605 || dom: 0.0009 || loss: 6.6285 ||

Timer: 0.2073 sec.
iter : 4590 || loc: 2.6719 || conf: 4.8541 || dom: 0.0010 || loss: 7.5270 ||

Timer: 0.2015 sec.
iter : 4600 || loc: 2.2645 || conf: 4.1626 || dom: 0.0007 || loss: 6.4278 ||

Timer: 0.2020 sec.
iter : 4610 || loc: 2.0869 || conf: 5.0770 || dom: 0.0011 || loss: 7.1650 ||

Timer: 0.2049 sec.
iter : 4620 || loc: 1.9658 || conf: 4.5106 || dom: 0.0014 || loss: 6.4778 ||

Timer: 0.2022 sec.
iter : 4630 || loc: 2.2277 || conf: 4.4819 || dom: 0.0010 || loss: 6.7107 ||

Timer: 0.1985 sec.
iter : 4640 || loc: 1.9925 || conf: 4.0222 || dom: 0.0009 || loss: 6.0156 ||

Timer: 0.2034 sec.
iter : 4650 || loc: 2.3580 || conf: 4.8136 || dom: 0.0009 || loss: 7.1726 ||

Timer: 0.1959 sec.
iter : 4660

Timer: 0.2096 sec.
iter : 5410 || loc: 1.7190 || conf: 4.0404 || dom: 0.0013 || loss: 5.7607 ||

Timer: 0.2041 sec.
iter : 5420 || loc: 1.8649 || conf: 4.4223 || dom: 0.0011 || loss: 6.2883 ||

Timer: 0.2062 sec.
iter : 5430 || loc: 2.1562 || conf: 3.8554 || dom: 0.0011 || loss: 6.0126 ||

Timer: 0.2074 sec.
iter : 5440 || loc: 2.3819 || conf: 4.4260 || dom: 0.0007 || loss: 6.8085 ||

Timer: 0.2001 sec.
iter : 5450 || loc: 1.7881 || conf: 4.7805 || dom: 0.0013 || loss: 6.5699 ||

Timer: 0.2098 sec.
iter : 5460 || loc: 2.0277 || conf: 4.1087 || dom: 0.0009 || loss: 6.1373 ||

Timer: 0.2046 sec.
iter : 5470 || loc: 2.9855 || conf: 5.2952 || dom: 0.0015 || loss: 8.2822 ||

Timer: 0.2032 sec.
iter : 5480 || loc: 2.0946 || conf: 5.1079 || dom: 0.0007 || loss: 7.2032 ||

Timer: 0.2091 sec.
iter : 5490 || loc: 2.1404 || conf: 4.8402 || dom: 0.0011 || loss: 6.9817 ||

Timer: 0.2078 sec.
iter : 5500 || loc: 2.2454 || conf: 4.5870 || dom: 0.0016 || loss: 6.8339 ||

Timer: 0.2059 sec.
iter : 5510

Process Process-8:
KeyboardInterrupt
Process Process-5:
Process Process-6:
Process Process-7:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/users/gpu/utkrsh/anaconda3/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/users/gpu/utkrsh/anaconda3/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/users/gpu/utkrsh/anaconda3/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/users/gpu/utkrsh/anaconda3/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/users/gpu/utkrsh/anaconda3/envs/pytorch/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/users/gpu/utkrsh/anaconda3/envs/pytorch/lib/python3.6/si

KeyboardInterrupt: 