Notebook for the original dataset loader file

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torchvision
import argparse
from torch.autograd import Variable
import torch.utils.data as data

from data import v2, v1, AnnotationTransform, VOCDetection, detection_collate, VOCroot, VOC_CLASSES
from data import v2, v1, detection_collate
#from data import MSCOCODetection, COCOAnnotationTransform
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import numpy as np
import time
import sys

# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
train_sets = [('2007', 'trainval'), ('2012', 'trainval')]
# train_sets = 'train'
ssd_dim = 300  # only support 300 now
means = (104, 117, 123)  # only support voc now
num_classes = len(VOC_CLASSES) + 1
batch_size = 32
accum_batch_size = 32
iter_size = accum_batch_size / batch_size
max_iter = 120000
weight_decay = 0.0005
stepvalues = (80000, 100000, 120000)
gamma = 0.1
momentum = 0.9
cuda = True


version = "v2"
basenet = "vgg16_reducedfc.pth"
jaccard_threshold=0.5
start_iter=0
lr=1e-3
save_folder = "./weights/"
basenet = "vgg16_reducedfc.pth"

if cuda and torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

cfg = (v1, v2)[version == 'v2']

In [3]:
dataset = VOCDetection(VOCroot, train_sets, 
                           SSDAugmentation(ssd_dim, means),
                           AnnotationTransform())

In [6]:
data_loader = data.DataLoader(dataset, batch_size, num_workers=0,
                             shuffle=True, collate_fn=detection_collate,
                             pin_memory=cuda)

In [7]:
it = iter(data_loader)

In [8]:
img, targets = next(it)

In [12]:
targets[2].size()

torch.Size([1, 5])

In [3]:
ssd_net = build_ssd('train', 300, num_classes)
net = ssd_net

In [4]:
net = torch.nn.DataParallel(ssd_net)
cudnn.benchmark = True

In [5]:
vgg_weights = torch.load(save_folder + basenet)
print('Loading base network...')
ssd_net.vgg.load_state_dict(vgg_weights)

if cuda and torch.cuda.is_available():
    net = net.cuda()

def xavier(param):
    init.xavier_uniform(param)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()

Loading base network...


In [6]:
optimizer = optim.SGD(net.parameters(), lr=lr,
                      momentum=momentum, weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, cuda)


In [7]:
def my_detection_collate(batch):
    """Custom collate fn for dealing with batches of images that have a different
    number of associated object annotations (bounding boxes).

    Arguments:
        batch: (tuple) A tuple of tensor images and lists of annotations

    Return:
        A tuple containing:
            1) (tensor) batch of images stacked on their 0 dim
            2) (list of tensors) annotations for a given image are stacked on 0 dim
    """
    targets_1 = []
    imgs = []
    print(len(batch))
    for sample in batch:
        imgs.append(sample[0])
        targets_1.append(torch.FloatTensor(sample[1]))
    return torch.stack(imgs, 0), targets_1


In [8]:

# dataset = VOCDetection(VOCroot, train_sets)

In [9]:
save_weights = False
print_interval = 2

def train():
    net.train()
    # loss counters
    loc_loss = 0  # epoch
    conf_loss = 0
    epoch = 0
    print('Loading Dataset...')
    
    dataset = VOCDetection(VOCroot, train_sets, 
                           SSDAugmentation(ssd_dim, means),
                           AnnotationTransform())
    print("Dataset Loaded!")
    
    epoch_size = len(dataset) // batch_size
    print("Training SSD on", dataset.name)
    step_index = 0
    batch_iterator = None
    data_loader = data.DataLoader(dataset, batch_size, num_workers=0,
                             shuffle=True, collate_fn=detection_collate,
                             pin_memory=cuda)
    
    for iteration in range(start_iter, max_iter):
        if(not batch_iterator) or (iteration % epoch_size == 0):
            # create batch_iterator
            batch_iterator = iter(data_loader)
        if iteration in stepvalues:
            step_index += 1
            adjust_learning_rate(optimizer, gamma, step_index)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1
        
        images, targets = next(batch_iterator)
        if cuda:
            images = Variable(images.cuda())
            targets = [Variable(anno.cuda(), volatile=True) for anno in targets]
        else:
            images = Variable(images)
            targets = [Variable(anno, volatile=True) for anno in targets]
        
        # forward pass
        t0 = time.time()
        out = net(images)
        
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.data[0] # get tensor from Variable
        conf_loss += loss_c.data[0]
        if iteration % print_interval == 0:
            print("Timer: %.4f sec. " % (t1 - t0))
            print("iter: "+ repr(iteration) + "|| Loss: %.4f || " % (loss.data[0]), end=' ')
        if iteration % 2000 == 0 and save_weights:
            try:
                print(" Saving state, iter: ", iteration)
                torch.save(ssd_net.state_dict(), "weights/ssd300_VOC07_" +
                          repr(iteration) + ".pth")
            except IOError:
                print("Some file related eror in saving")
            except:
                print("Some other error while saving")
    torch.save(ssd_net.state_dict(), save_folder+"final_model.pth")

In [10]:
train()

Loading Dataset...
Dataset Loaded!
Training SSD on VOC0712


  return tensor.masked_fill_(mask, value)


Timer: 15.1098 sec. 
iter: 0|| Loss: 19.1433 ||  Timer: 0.1617 sec. 
iter: 2|| Loss: 16.7160 ||  Timer: 0.1534 sec. 
iter: 4|| Loss: 15.4030 ||  Timer: 0.1545 sec. 
iter: 6|| Loss: 15.1084 ||  Timer: 0.1529 sec. 
iter: 8|| Loss: 15.3910 ||  Timer: 0.1576 sec. 
iter: 10|| Loss: 15.6563 ||  Timer: 0.1542 sec. 
iter: 12|| Loss: 15.3834 ||  Timer: 0.1519 sec. 
iter: 14|| Loss: 15.2503 ||  Timer: 0.1540 sec. 
iter: 16|| Loss: 15.1467 ||  Timer: 0.1545 sec. 
iter: 18|| Loss: 14.7455 ||  Timer: 0.1680 sec. 
iter: 20|| Loss: 14.9030 ||  Timer: 0.1639 sec. 
iter: 22|| Loss: 14.9024 ||  Timer: 0.1571 sec. 
iter: 24|| Loss: 14.8545 ||  Timer: 0.1570 sec. 
iter: 26|| Loss: 14.6721 ||  Timer: 0.1537 sec. 
iter: 28|| Loss: 14.7718 ||  Timer: 0.1545 sec. 
iter: 30|| Loss: 14.8269 ||  Timer: 0.1563 sec. 
iter: 32|| Loss: 14.5173 ||  Timer: 0.1543 sec. 
iter: 34|| Loss: 14.2355 ||  Timer: 0.1567 sec. 
iter: 36|| Loss: 14.8015 ||  Timer: 0.1576 sec. 
iter: 38|| Loss: 14.4792 ||  Timer: 0.1538 sec. 
iter

KeyboardInterrupt: 