In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from utils import *
import torchvision.datasets as datasets
import time
import vgg as vgg_test

In [3]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

In [4]:
model = vgg_test.VGG(vgg_test.make_layers(vgg_test.cfg['B'], batch_norm=True))

In [5]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, 4),
        transforms.ToTensor(),
        normalize,
    ]), download=True),
    batch_size=128, shuffle=True,
    num_workers=5, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=128, shuffle=False,
    num_workers=5, pin_memory=True)

# define loss function (criterion) and pptimizer
criterion = nn.CrossEntropyLoss()

Files already downloaded and verified


In [6]:
optimizer = torch.optim.Adam(model.parameters(), 3e-4,
                                weight_decay=0.0005)

In [16]:
def train(train_loader, model, criterion, optimizer, epoch, writer=None):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 100 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))
        writer.add_scalar('Loss/Train', losses.val)
        writer.add_scalar('Prec@1', top1.val)

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
def validate(val_loader, model, criterion):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top3 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        with torch.no_grad():
            output = model(input)
            loss = criterion(output, target)

        output = output.float()
        loss = loss.float()

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,2,3))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0].item(), input.size(0))
        top3.update(prec1[2].item(), input.size(0))
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 200 == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@3 {top3.val:.3f} ({top3.avg:.3f})'.format(
                      i, len(val_loader), batch_time=batch_time, loss=losses,
                      top1=top1, top3=top3))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))

    return top1.avg

In [13]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [9]:
train_epochs = 20
writer = SummaryWriter('batchnorm_base', flush_secs=10)
for i in range(train_epochs):
    train(train_loader, model, criterion, optimizer, i, writer=writer)


Epoch: [0][0/391]	Time 3.579 (3.579)	Data 0.151 (0.151)	Loss 2.3348 (2.3348)	Prec@1 7.031 (7.031)
Epoch: [0][100/391]	Time 3.315 (3.234)	Data 0.001 (0.002)	Loss 1.6947 (1.8852)	Prec@1 32.812 (27.050)
Epoch: [0][200/391]	Time 3.594 (3.226)	Data 0.001 (0.002)	Loss 1.3795 (1.6886)	Prec@1 48.438 (35.642)
Epoch: [0][300/391]	Time 3.258 (3.232)	Data 0.001 (0.001)	Loss 1.2637 (1.5720)	Prec@1 50.000 (40.949)
Epoch: [1][0/391]	Time 3.559 (3.559)	Data 0.067 (0.067)	Loss 1.2761 (1.2761)	Prec@1 55.469 (55.469)
Epoch: [1][100/391]	Time 3.314 (3.230)	Data 0.001 (0.001)	Loss 1.0356 (1.1077)	Prec@1 64.844 (60.388)
Epoch: [1][200/391]	Time 3.272 (3.233)	Data 0.001 (0.001)	Loss 0.9901 (1.0648)	Prec@1 66.406 (62.123)
Epoch: [1][300/391]	Time 3.223 (3.230)	Data 0.001 (0.001)	Loss 0.9507 (1.0282)	Prec@1 64.844 (63.541)
Epoch: [2][0/391]	Time 3.449 (3.449)	Data 0.066 (0.066)	Loss 0.9287 (0.9287)	Prec@1 69.531 (69.531)
Epoch: [2][100/391]	Time 3.181 (3.225)	Data 0.001 (0.001)	Loss 0.8688 (0.8648)	Prec@1 73.4

In [17]:
validate(val_loader, model, criterion)

Test: [0/79]	Time 1.311 (1.311)	Loss 0.4082 (0.4082)	Prec@1 84.375 (84.375)	Prec@3 98.438 (98.438)
 * Prec@1 84.630


84.63

In [11]:
# torch.save(model.state_dict(), 'model_base.pt')

In [39]:
model_wo = torchvision.models.vgg11()
optimizer_wo = torch.optim.Adam(model_wo.parameters(), 3e-4,
                                weight_decay=0.0005)

In [46]:
train(train_loader, model_wo, criterion, optimizer_wo, 10000)

Epoch: [10000][0/391]	Time 5.464 (5.464)	Data 0.072 (0.072)	Loss 1.1889 (1.1889)	Prec@1 57.812 (57.812)
Epoch: [10000][50/391]	Time 3.341 (3.419)	Data 0.001 (0.002)	Loss 1.1646 (1.2292)	Prec@1 54.688 (56.020)
Epoch: [10000][100/391]	Time 3.523 (3.383)	Data 0.001 (0.001)	Loss 1.1371 (1.2004)	Prec@1 57.812 (57.356)
Epoch: [10000][150/391]	Time 3.304 (3.403)	Data 0.001 (0.001)	Loss 0.9491 (1.1710)	Prec@1 60.938 (58.371)
Epoch: [10000][200/391]	Time 3.264 (3.386)	Data 0.001 (0.001)	Loss 1.1725 (1.1639)	Prec@1 57.031 (58.660)
Epoch: [10000][250/391]	Time 3.291 (3.365)	Data 0.001 (0.001)	Loss 1.0466 (1.1535)	Prec@1 58.594 (59.008)
Epoch: [10000][300/391]	Time 3.266 (3.359)	Data 0.001 (0.001)	Loss 1.1317 (1.1394)	Prec@1 60.156 (59.406)
Epoch: [10000][350/391]	Time 3.211 (3.353)	Data 0.001 (0.001)	Loss 1.1335 (1.1278)	Prec@1 56.250 (59.894)


In [45]:
print('validation_error: ' , get_model_error(model, val_loader))

test error: 0.446


TypeError: not all arguments converted during string formatting