In [11]:
import numpy as np
import torch
from torchvision import models
import torch.nn as nn

# necessary imports
import os
import sys
import time
import argparse

import torch
import torch.optim as optim
import numpy as np
# import model
# from torchsummary import summary

# from datasets import ALOVDataset, ILSVRC2014_DET_Dataset
from helper import (Rescale, shift_crop_training_sample,crop_sample, NormalizeToTensor)

In [12]:
# constants
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
input_size = 224
kSaveModel = 20000  # save model after every 20000 steps
batchSize = 50  # number of samples in a batch
kGeneratedExamplesPerImage = 10  # generate 10 synthetic samples per image
transform = NormalizeToTensor()
bb_params = {}
enable_tensorboard = False
if enable_tensorboard:
    from tensorboardX import SummaryWriter
    writer = SummaryWriter()

args = None
parser = argparse.ArgumentParser(description='GOTURN Training')
num_batches=500000
lr=1e-5
gamma=0.1
momentum=0.9
weight_decay=0.0005
lr_decay_step=100000
d='../data/'
s='../saved_checkpoints/exp3/'
lshift=5
lscale=15
minsc=-0.4
maxsc=0.4
seed=800
resume=''
b=50
save_freq=20000

In [16]:

class GoNet(nn.Module):
    """ Neural Network class
        Two stream model:
        ________
       |        | conv layers              Untrained Fully
       |Previous|------------------>|      Connected Layers
       | frame  |                   |    ___     ___     ___
       |________|                   |   |   |   |   |   |   |   fc4
                   Pretrained       |   |   |   |   |   |   |    * (left)
                   CaffeNet         |-->|fc1|-->|fc2|-->|fc3|--> * (top)
                   Convolution      |   |   |   |   |   |   |    * (right)
                   layers           |   |___|   |___|   |___|    * (bottom)
        ________                    |   (4096)  (4096)  (4096)  (4)
       |        |                   |
       | Current|------------------>|
       | frame  |
       |________|
    """
    def __init__(self):
        super(GoNet, self).__init__()
        caffenet = models.alexnet(pretrained=True)
        self.convnet = nn.Sequential(*list(caffenet.children())[:-1])
        for param in self.convnet.parameters():
            param.requires_grad = False
        self.classifier = nn.Sequential(
                nn.Linear(256*6*6*2, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4),
                )
        self.weight_init()

    def weight_init(self):
        for m in self.classifier.modules():
            # fully connected layers are weight initialized with
            # mean=0 and std=0.005 (in tracker.prototxt) and
            # biases are set to 1
            # tracker.prototxt link: https://goo.gl/iHGKT5
            if isinstance(m, nn.Linear):
                m.bias.data.fill_(1)
                m.weight.data.normal_(0, 0.005)

    def forward(self, x, y):
        x1 = self.convnet(x)
        x1 = x1.view(x.size(0), 256*6*6)
        x2 = self.convnet(y)
        x2 = x2.view(x.size(0), 256*6*6)
        x = torch.cat((x1, x2), 1)
        x = self.classifier(x)
        return x

In [17]:
# constants
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
input_size = 224
kSaveModel = 20000  # save model after every 20000 steps
batchSize = 50  # number of samples in a batch
kGeneratedExamplesPerImage = 10  # generate 10 synthetic samples per image
transform = NormalizeToTensor()
bb_params = {}
enable_tensorboard = False
if enable_tensorboard:
    from tensorboardX import SummaryWriter
    writer = SummaryWriter()

args = None
parser = argparse.ArgumentParser(description='GOTURN Training')
num_batches=500000
lr=1e-5
gamma=0.1
momentum=0.9
weight_decay=0.0005
lr_decay_step=100000
d='../data/'
s='../saved_checkpoints/exp3/'
lshift=5
lscale=15
minsc=-0.4
maxsc=0.4
seed=800
resume=''
b=50
save_freq=20000

def main():

    global args, batchSize, kSaveModel, bb_params
    args = parser.parse_args()
    print(args)
    batchSize = args.batch_size
    kSaveModel = args.save_freq
    np.random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    if cuda:
        torch.cuda.manual_seed_all(args.manual_seed)

    # load bounding box motion model params
    #bb_params['lambda_shift_frac'] = args.lambda_shift_frac
    #bb_params['lambda_scale_frac'] = args.lambda_scale_frac
    #bb_params['min_scale'] = args.min_scale
    #bb_params['max_scale'] = args.max_scale

    # load datasets
    alov = ALOVDataset(os.path.join(args.data_directory,
                       'imagedata++/'),
                       os.path.join(args.data_directory,
                       'alov300++_rectangleAnnotation_full/'),
                       transform, input_size)
    imagenet = ILSVRC2014_DET_Dataset(os.path.join(args.data_directory,
                                      'ILSVRC2014_DET_train/'),
                                      os.path.join(args.data_directory,
                                      'ILSVRC2014_DET_bbox_train/'),
                                      bb_params,
                                      transform,
                                      input_size)
    # list of datasets to train on
    datasets = [alov, imagenet]

    # load model
    net = model.GoNet().to(device)
    # summary(net, [(3, 224, 224), (3, 224, 224)])
    loss_fn = torch.nn.L1Loss(size_average=False).to(device)

    # initialize optimizer
    optimizer = optim.SGD(net.classifier.parameters(),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    if os.path.exists(args.save_directory):
        print('Directory %s already exists' % (args.save_directory))
    else:
        os.makedirs(args.save_directory)

    # start training
    net = train_model(net, datasets, loss_fn, optimizer)

    # save trained model
    checkpoint = {'state_dict': net.state_dict()}
    path = os.path.join(args.save_directory, 'pytorch_goturn.pth.tar')
    torch.save(checkpoint, path)


def get_training_batch(num_running_batch, running_batch, dataset):
    '''
    Implements GOTURN batch formation regimen.
    '''
    global args, batchSize
    done = False
    N = kGeneratedExamplesPerImage+1
    train_batch = None
    x1_batch, x2_batch, y_batch = make_transformed_samples(dataset, args)
    assert(x1_batch.shape[0] == x2_batch.shape[0] == y_batch.shape[0] == N)
    count_in = min(batchSize - num_running_batch, N)
    remain = N - count_in
    running_batch['previmg'][num_running_batch:
                             num_running_batch+count_in] = x1_batch[:count_in]
    running_batch['currimg'][num_running_batch:
                             num_running_batch+count_in] = x2_batch[:count_in]
    running_batch['currbb'][num_running_batch:
                            num_running_batch+count_in] = y_batch[:count_in]
    num_running_batch = num_running_batch + count_in
    if remain > 0:
        done = True
        train_batch = running_batch.copy()
        running_batch['previmg'][:remain] = x1_batch[-remain:]
        running_batch['currimg'][:remain] = x2_batch[-remain:]
        running_batch['currbb'][:remain] = y_batch[-remain:]
        num_running_batch = remain
    return running_batch, train_batch, done, num_running_batch


def make_transformed_samples(dataset, args):
    '''
    Given a dataset, it picks a random sample from it and returns a batch
    of (kGeneratedExamplesPerImage+1) samples. The batch contains true sample
    from dataset and kGeneratedExamplesPerImage samples, which are created
    artifically with augmentation by GOTURN smooth motion model.
    '''
    idx = np.random.randint(dataset.len, size=1)[0]
    # unscaled original sample (single image and bb)
    orig_sample = dataset.get_orig_sample(idx)
    # cropped scaled sample (two frames and bb)
    true_sample, _ = dataset.get_sample(idx)
    true_tensor = transform(true_sample)
    x1_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 3,
                            input_size, input_size)
    x2_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 3,
                            input_size, input_size)
    y_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 4)

    # initialize batch with the true sample
    x1_batch[0] = true_tensor['previmg']
    x2_batch[0] = true_tensor['currimg']
    y_batch[0] = true_tensor['currbb']

    scale = Rescale((input_size, input_size))
    for i in range(kGeneratedExamplesPerImage):
        sample = orig_sample
        # unscaled current image crop with box
        curr_sample, opts_curr = shift_crop_training_sample(sample, bb_params)
        # unscaled previous image crop with box
        prev_sample, opts_prev = crop_sample(sample)
        scaled_curr_obj = scale(curr_sample, opts_curr)
        scaled_prev_obj = scale(prev_sample, opts_prev)
        training_sample = {'previmg': scaled_prev_obj['image'],
                           'currimg': scaled_curr_obj['image'],
                           'currbb': scaled_curr_obj['bb']}
        sample = transform(training_sample)
        x1_batch[i+1] = sample['previmg']
        x2_batch[i+1] = sample['currimg']
        y_batch[i+1] = sample['currbb']

    return x1_batch, x2_batch, y_batch


def train_model(model, datasets, criterion, optimizer):

    global args, writer
    since = time.time()
    curr_loss = 0
    lr = args.learning_rate
    flag = False
    start_itr = 0
    num_running_batch = 0
    running_batch = {'previmg': torch.Tensor(batchSize, 3, input_size, input_size),
                     'currimg': torch.Tensor(batchSize, 3, input_size, input_size),
                     'currbb': torch.Tensor(batchSize, 4)}
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=args.lr_decay_step,
                                          gamma=args.gamma)

    # resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_itr = checkpoint['itr']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            scheduler.load_state_dict(checkpoint['scheduler'])
            num_running_batch = checkpoint['num_running_batch']
            running_batch = checkpoint['running_batch']
            lr = checkpoint['lr']
            np.random.set_state(checkpoint['np_rand_state'])
            torch.set_rng_state(checkpoint['torch_rand_state'])
            print("=> loaded checkpoint '{}' (iteration {})"
                  .format(args.resume, checkpoint['itr']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if not os.path.isdir(args.save_directory):
        os.makedirs(args.save_directory)

    itr = start_itr
    st = time.time()
    while itr < args.num_batches:

        model.train()
        if (args.resume and os.path.isfile(args.resume) and
           itr == start_itr and (not flag)):
            checkpoint = torch.load(args.resume)
            i = checkpoint['dataset_indx']
            flag = True
        else:
            i = 0

        # train on datasets
        # usually ALOV and ImageNet
        while i < len(datasets):
            dataset = datasets[i]
            i = i+1
            (running_batch, train_batch,
                done, num_running_batch) = get_training_batch(num_running_batch,
                                                              running_batch,
                                                              dataset)
            # print(i, num_running_batch, done)
            if done:
                scheduler.step()
                # load sample
                x1 = train_batch['previmg'].to(device)
                x2 = train_batch['currimg'].to(device)
                y = train_batch['currbb'].requires_grad_(False).to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                output = model(x1, x2)
                loss = criterion(output, y)

                # backward + optimize
                loss.backward()
                optimizer.step()

                # statistics
                curr_loss = loss.item()
                end = time.time()
                itr = itr + 1
                print('[training] step = %d/%d, loss = %f, time = %f'
                      % (itr, args.num_batches, curr_loss, end-st))
                sys.stdout.flush()
                del(train_batch)
                st = time.time()

                if enable_tensorboard:
                    writer.add_scalar('train/batch_loss', curr_loss, itr)

                if itr > 0 and itr % kSaveModel == 0:
                    path = os.path.join(args.save_directory,
                                        'model_itr_' + str(itr) + '_loss_' +
                                        str(round(curr_loss, 3)) + '.pth.tar')
                    save_checkpoint({'itr': itr,
                                     'np_rand_state': np.random.get_state(),
                                     'torch_rand_state': torch.get_rng_state(),
                                     'l1_loss': curr_loss,
                                     'state_dict': model.state_dict(),
                                     'optimizer': optimizer.state_dict(),
                                     'scheduler': scheduler.state_dict(),
                                     'num_running_batch': num_running_batch,
                                     'running_batch': running_batch,
                                     'lr': lr,
                                     'dataset_indx': i}, path)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    if enable_tensorboard:
        writer.export_scalars_to_json("./all_scalars.json")
        writer.close()
    return model


def save_checkpoint(state, filename='checkpoint.pth.tar'):
    torch.save(state, filename)


if __name__ == "__main__":
    main()

usage: ipykernel_launcher.py [-h]
ipykernel_launcher.py: error: unrecognized arguments: -f /run/user/263244/jupyter/kernel-47d5a810-f339-4156-8113-7366d0f3bd64.json


SystemExit: 2