In [19]:
import argparse
import os
import time

In [2]:
import numpy as np
import pandas as pd

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torchnet.meter import AverageValueMeter, ClassErrorMeter

# Load data

In [4]:
args = {
    "arch": "resnet50", # resnet50, resnet101, resnet152
    "pretrained": True,
    "datadir": "../data",
    "checkpointdir": "../checkpoint",
    "modelbestdir": "../checkpoint/modelbest",
    "cuda": False,
    "optim": "adam", # sgd, adam, rmsprop
    "epochs": 100,
    "batch_size": 16,
    "lr": 1e-3,
    "momentum": 0.9,
    "weight_decay": 1e-4,
    "seed": 7,
    "workers": 4,
    "nb_augs": 10,
    "cv": 7
}

args = argparse.Namespace(**args)

if not os.path.isdir(args.datadir):
    os.makedirs(args.datadir)
    
if not os.path.isdir(args.checkpointdir):
    os.makedirs(args.checkpointdir)
    
if not os.path.isdir(args.modelbestdir):
    os.makedirs(args.modelbestdir)

args.cuda = args.cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

In [5]:
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [6]:
trainset = torchvision.datasets.CIFAR10(root=args.datadir, train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=args.datadir, train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


# Utility functions

In [7]:
def save_checkpoint(args, state, is_best, filename="checkpoint.pth.tar"):
    checkpoint_filepath = os.path.join(args.checkpointdir, filename)
    torch.save(state, checkpoint_filepath)
    if is_best:
        shutil.copyfile(checkpoint_filepath, args.modelbestdir)

In [8]:
def adjust_learning_rate(args, optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs
    """
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

# Build model

In [9]:
class ResNet(nn.Module):
    def __init__(self, arch, is_pretrained, num_classes):
        super(ResNet, self).__init__()
        if is_pretrained:
            print("=> Using pre-trained model '{}'".format(arch))
            model = models.__dict__[arch](pretrained=True)
            for param in model.parameters():
                param.requires_grad = False
        else:
            model = models.__dict__[arch](pretrained=False)
        #self.model.avgpool = nn.MaxPool2d(4)
        self.features = nn.Sequential(*list(model.children())[:-2]) #Remove last 2 layers: averagePooling & fc
        fc_input_features = model.fc.in_features 
        self.classifier = nn.Linear(fc_input_features, len(classes))
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

In [10]:
model = ResNet(args.arch, True, len(classes))

=> Using pre-trained model 'resnet50'


To access each layer, use:

type(list(model.children())[0])

list(model.children())[0]

list(model.children())[0][4][0]

model

In [21]:
# train function
def train(args, trainloader, model, criterion, optimizer, epoch):
    # turn on train mode
    model.train()
    losses = AverageValueMeter()
    top1 = ClassErrorMeter(accuracy=True) # accuracy instead of error
    top3 = ClassErrorMeter(topk=[3])
    
    for i, (image, target) in enumerate(trainloader):      
        # here we should call cuda() for input;
        # in the ImageNet example, the model is parallel by
        # torch.nn.DataParallel(model).cuda(), so no need to call cuda() there;
        # the option async=True works with pin_memory of DataLoader
        # pin_memory slows down DataLoader but fastens data transfer from
        # CPU to GPU
        if args.cuda:
            image = image.cuda()
            target = target.cuda()
        image_var = Variable(image)
        target_var = Variable(target)

        # compute output and loss
        output = model(image_var)
        loss = criterion(output, target_var)

        # compute gradient and do backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.add(loss.data[0] * image.size(0), image.size(0))
        top1.add(output.data, target)

```python
# validate function
def validate(args, val_loader, model, criterion, epoch):
    model.train(False) # turn off train mode
    losses = AverageValueMeter()
    top1 = ClassErrorMeter(accuracy=True)
    top3 = ClassErrorMeter(topk=[3])
    
    for i, (image, target) in enumerate(val_loader):
        if args.cuda:
            image = image.cuda(async=True)
            target = target.cuda(async=True)
        image_var = Variable(image, volatile=True) # no gradient
        target_var = Variable(target, volatile=True)
        output = model(input_var)
        loss = criterion(output, target_var)
        losses.add(loss.data[0] * image.size(0), image.size(0))
        top1.add(output.data, target)
        top3.add(output.data, target)
        
    print("   * EPOCH {:>2} | Accuracy: {:.3f} | Loss: {:.4f}"
          .format(epoch, top1.value()[0], losses.value()[0]))
    return losses.value()[0]
    ```

In [12]:
# validate function
def validate(args, testloader, model, criterion, epoch):
    model.train(False) # turn off train mode
    losses = AverageValueMeter()
    top1 = ClassErrorMeter(accuracy=True)
    top3 = ClassErrorMeter(topk=[3], accuracy=True)
    
    for i, (image, target) in enumerate(testloader):
        if args.cuda:
            image = image.cuda(async=True)
            target = target.cuda(async=True)
        image_var = Variable(image, volatile=True) # no gradient
        target_var = Variable(target, volatile=True)
        output = model(input_var)
        loss = criterion(output, target_var)
        losses.add(loss.data[0] * image.size(0), image.size(0))
        top1.add(output.data, target)
        top3.add(output.data, target)
        
    print("   * EPOCH {:>2} | Loss: {:.4f} | Top1: {:.4f} | Top3: {:.4f}"
          .format(epoch, losses.value()[0], top1.value()[0], top3.value()[0]))
    return losses.value()[0]

# Train model

In [13]:
# define loss function
criterion = nn.CrossEntropyLoss()
if args.cuda:
    criterion.cuda()

In [17]:
# define optimizer
if args.optim == "sgd":
    optimizer = optim.SGD(model.classifier.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
elif args.optim == "adam":
    optimizer = optim.Adam(model.classifier.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
elif args.optim == "rmsprop":
    optimizer = optim.RMSprop(model.classifier.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)

In [22]:
best_loss = 2
for epoch in range(1, args.epochs+1):
    start = time.time()
    
    adjust_learning_rate(args, optimizer, epoch)

    # train for one epoch
    train(args, trainloader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    loss = validate(args, testloader, model, criterion, epoch)

    # remember best loss and save checkpoint
    is_best = loss < best_loss
    best_loss = min(loss, best_loss)
    state = {"epoch": epoch,
            "arch": args.arch,
            "state_dict": model.state_dict(),
            "best_loss": best_loss,
            }
    filename = bestmodel + {0}.format(str(epoch))
    save_checkpoint(args, state, is_best, filename)
    print("   => Time: {}s".format(round(time.time()-start)))

Process Process-2:
Process Process-1:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.

KeyboardInterrupt: 