In [2]:
### WIDE RESNET IMPLEMENTATION ###

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Variable

import sys
import numpy as np

def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True)

def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform_(m.weight, gain=np.sqrt(2))
        init.constant_(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        init.constant_(m.weight, 1)
        init.constant_(m.bias, 0)

class wide_basic(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1):
        super(wide_basic, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True),
            )

    def forward(self, x):
        out = self.dropout(self.conv1(F.relu(self.bn1(x))))
        out = self.conv2(F.relu(self.bn2(out)))
        out += self.shortcut(x)

        return out

class Wide_ResNet(nn.Module):
    def __init__(self, depth, widen_factor, dropout_rate, num_classes):
        super(Wide_ResNet, self).__init__()
        self.in_planes = 16

        assert ((depth-4)%6 ==0), 'Wide-resnet depth should be 6n+4'
        n = (depth-4)/6
        k = widen_factor

        print('| Wide-Resnet %dx%d' %(depth, k))
        nStages = [16, 16*k, 32*k, 64*k]

        self.conv1 = conv3x3(3,nStages[0])
        self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1)
        self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2)
        self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2)
        self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)
        self.linear = nn.Linear(nStages[3], num_classes)

    def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride):
        strides = [stride] + [1]*(int(num_blocks)-1)
        layers = []

        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout_rate, stride))
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)

        return out

if __name__ == '__main__':
    net=Wide_ResNet(28, 10, 0.3, 10)
    y = net(Variable(torch.randn(1,3,32,32)))

    print(y.size())

| Wide-Resnet 28x10
torch.Size([1, 10])


In [5]:
############### Pytorch CIFAR configuration file ###############
import math

start_epoch = 1
num_epochs = 200
batch_size = 128
optim_type = 'SGD'

mean = {
    'cifar10': (0.4914, 0.4822, 0.4465),
    'cifar100': (0.5071, 0.4867, 0.4408),
}

std = {
    'cifar10': (0.2023, 0.1994, 0.2010),
    'cifar100': (0.2675, 0.2565, 0.2761),
}

# Only for cifar-10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def learning_rate(init, epoch):
    optim_factor = 0
    if(epoch > 160):
        optim_factor = 3
    elif(epoch > 120):
        optim_factor = 2
    elif(epoch > 60):
        optim_factor = 1

    return init*math.pow(0.2, optim_factor)

def get_hms(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)

    return h, m, s

In [None]:
##### TRAINING CELL #####
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms

import os
import sys
import time
import datetime


#Parameters settings
depth = 40 ##can be 10, 16, 22, 28(default), 34, 40
net_type = 'wide-resnet'
lr = 0.1
widen_factor = 2 #any numer, 10(default)
dropout = 0.3
dataset = 'cifar10'
testOnly = False
resume = False

# Hyper Parameter settings
use_cuda = torch.cuda.is_available()
best_acc = 0

# Data Uplaod
print('\n[Phase 1] : Data Preparation')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean[dataset], std[dataset]),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean[dataset], std[dataset]),
])

if(dataset == 'cifar10'):
    print("| Preparing CIFAR-10 dataset...")
    sys.stdout.write("| ")
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test)
    num_classes = 10
elif(dataset == 'cifar100'):
    print("| Preparing CIFAR-100 dataset...")
    sys.stdout.write("| ")
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=False, transform=transform_test)
    num_classes = 100

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# Define net
net = Wide_ResNet(depth, widen_factor, dropout, num_classes)
file_name = 'wide-resnet-'+str(depth)+'x'+str(widen_factor)


# Test only option
if (testOnly):
    print('\n[Test Phase] : Model setup')
    assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/'+dataset+os.sep+file_name+'.t7')
    net = checkpoint['net']

    if use_cuda:
        net.cuda()
        net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
        cudnn.benchmark = True

    net.eval()
    net.training = False
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

        acc = 100.*correct/total
        print("| Test Result\tAcc@1: %.2f%%" %(acc))

    sys.exit(0)

# Model
print('\n[Phase 2] : Model setup')
if(resume):
    # Load checkpoint
    print('| Resuming from checkpoint...')
    assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/'+dataset+os.sep+file_name+'.t7')
    net = checkpoint['net']
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']
else:
    print('| Building net type [' + net_type + ']...')
    net.apply(conv_init)

if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
    print('| Going fast AF with C U D A *o* !')

criterion = nn.CrossEntropyLoss()

# Training
def train(epoch):
    net.train()
    net.training = True
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(), lr=learning_rate(lr, epoch), momentum=0.9, weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, learning_rate(lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        loss = criterion(outputs, targets)  # Loss
        loss.backward()  # Backward Propagation
        optimizer.step() # Optimizer update

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                %(epoch, num_epochs, batch_idx+1,
                    (len(trainset)//batch_size)+1, loss.item(), 100.*correct/total))
        sys.stdout.flush()

def test(epoch):
    global best_acc
    net.eval()
    net.training = False
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

        # Save checkpoint when best model
        acc = 100.*correct/total
        print("\n| Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" %(epoch, loss.item(), acc))

        if acc > best_acc:
            print('| Saving Best model...\t\t\tTop1 = %.2f%%' %(acc))
            state = {
                    'net':net.module if use_cuda else net,
                    'acc':acc,
                    'epoch':epoch,
            }
            if not os.path.isdir('checkpoint'):
                os.mkdir('checkpoint')
            save_point = './checkpoint/'+dataset+os.sep
            if not os.path.isdir(save_point):
                os.mkdir(save_point)
            torch.save(state, save_point+file_name+'.t7')
            best_acc = acc

print('\n[Phase 3] : Training model')
print('| Training Epochs = ' + str(num_epochs))
print('| Initial Learning Rate = ' + str(lr))
print('| Optimizer = ' + str(optim_type))

elapsed_time = 0
for epoch in range(start_epoch, start_epoch+num_epochs):
    start_time = time.time()

    train(epoch)
    test(epoch)

    epoch_time = time.time() - start_time
    elapsed_time += epoch_time
    print('| Elapsed time : %d:%02d:%02d'  %(get_hms(elapsed_time)))

print('\n[Phase 4] : Testing model')
print('* Test results : Acc@1 = %.2f%%' %(best_acc))


[Phase 1] : Data Preparation
| Preparing CIFAR-10 dataset...
| Files already downloaded and verified
| Wide-Resnet 40x2

[Phase 2] : Model setup
| Building net type [wide-resnet]...
| Going fast AF with C U D A *o* !

[Phase 3] : Training model
| Training Epochs = 200
| Initial Learning Rate = 0.1
| Optimizer = SGD

=> Training Epoch #1, LR=0.1000
| Epoch [  1/200] Iter[391/391]		Loss: 1.2877 Acc@1: 38.544%
| Validation Epoch #1			Loss: 1.4315 Acc@1: 51.48%
| Saving Best model...			Top1 = 51.48%
| Elapsed time : 0:00:59

=> Training Epoch #2, LR=0.1000
| Epoch [  2/200] Iter[391/391]		Loss: 0.9578 Acc@1: 58.736%
| Validation Epoch #2			Loss: 1.1576 Acc@1: 60.03%
| Saving Best model...			Top1 = 60.03%
| Elapsed time : 0:01:58

=> Training Epoch #3, LR=0.1000
| Epoch [  3/200] Iter[391/391]		Loss: 0.7840 Acc@1: 65.484%
| Validation Epoch #3			Loss: 1.0964 Acc@1: 64.01%
| Saving Best model...			Top1 = 64.01%
| Elapsed time : 0:02:58

=> Training Epoch #4, LR=0.1000
| Epoch [  4/200] Iter

| Epoch [ 44/200] Iter[391/391]		Loss: 0.3948 Acc@1: 85.552%
| Validation Epoch #44			Loss: 0.5660 Acc@1: 81.10%
| Elapsed time : 0:43:29

=> Training Epoch #45, LR=0.1000
| Epoch [ 45/200] Iter[391/391]		Loss: 0.3570 Acc@1: 85.292%
| Validation Epoch #45			Loss: 0.7185 Acc@1: 77.11%
| Elapsed time : 0:44:28

=> Training Epoch #46, LR=0.1000
| Epoch [ 46/200] Iter[391/391]		Loss: 0.5731 Acc@1: 85.356%
| Validation Epoch #46			Loss: 0.4151 Acc@1: 80.61%
| Elapsed time : 0:45:28

=> Training Epoch #47, LR=0.1000
| Epoch [ 47/200] Iter[391/391]		Loss: 0.5026 Acc@1: 85.438%
| Validation Epoch #47			Loss: 0.3826 Acc@1: 79.84%
| Elapsed time : 0:46:26

=> Training Epoch #48, LR=0.1000
| Epoch [ 48/200] Iter[391/391]		Loss: 0.3908 Acc@1: 85.348%
| Validation Epoch #48			Loss: 0.6840 Acc@1: 78.21%
| Elapsed time : 0:47:24

=> Training Epoch #49, LR=0.1000
| Epoch [ 49/200] Iter[391/391]		Loss: 0.4718 Acc@1: 85.396%
| Validation Epoch #49			Loss: 0.3552 Acc@1: 84.20%
| Saving Best model...			To

| Epoch [ 91/200] Iter[391/391]		Loss: 0.1241 Acc@1: 94.178%
| Validation Epoch #91			Loss: 0.2151 Acc@1: 88.74%
| Elapsed time : 1:29:31

=> Training Epoch #92, LR=0.0200
| Epoch [ 92/200] Iter[391/391]		Loss: 0.2712 Acc@1: 94.300%
| Validation Epoch #92			Loss: 0.3623 Acc@1: 90.44%
| Elapsed time : 1:30:30

=> Training Epoch #93, LR=0.0200
| Epoch [ 93/200] Iter[391/391]		Loss: 0.2832 Acc@1: 94.084%
| Validation Epoch #93			Loss: 0.2848 Acc@1: 89.10%
| Elapsed time : 1:31:28

=> Training Epoch #94, LR=0.0200
| Epoch [ 94/200] Iter[391/391]		Loss: 0.1976 Acc@1: 94.104%
| Validation Epoch #94			Loss: 0.3292 Acc@1: 88.90%
| Elapsed time : 1:32:27

=> Training Epoch #95, LR=0.0200
| Epoch [ 95/200] Iter[391/391]		Loss: 0.2950 Acc@1: 94.540%
| Validation Epoch #95			Loss: 0.3632 Acc@1: 89.18%
| Elapsed time : 1:33:27

=> Training Epoch #96, LR=0.0200
| Epoch [ 96/200] Iter[391/391]		Loss: 0.1884 Acc@1: 94.350%
| Validation Epoch #96			Loss: 0.3958 Acc@1: 88.59%
| Elapsed time : 1:34:26

=

| Epoch [137/200] Iter[391/391]		Loss: 0.0288 Acc@1: 99.150%
| Validation Epoch #137			Loss: 0.2670 Acc@1: 93.55%
| Saving Best model...			Top1 = 93.55%
| Elapsed time : 2:14:52

=> Training Epoch #138, LR=0.0040
| Epoch [138/200] Iter[391/391]		Loss: 0.0234 Acc@1: 99.162%
| Validation Epoch #138			Loss: 0.2807 Acc@1: 93.29%
| Elapsed time : 2:15:51

=> Training Epoch #139, LR=0.0040
| Epoch [139/200] Iter[391/391]		Loss: 0.0325 Acc@1: 99.160%
| Validation Epoch #139			Loss: 0.2683 Acc@1: 93.41%
| Elapsed time : 2:16:48

=> Training Epoch #140, LR=0.0040
| Epoch [140/200] Iter[391/391]		Loss: 0.0224 Acc@1: 99.202%
| Validation Epoch #140			Loss: 0.3384 Acc@1: 92.89%
| Elapsed time : 2:17:46

=> Training Epoch #141, LR=0.0040
| Epoch [141/200] Iter[391/391]		Loss: 0.1168 Acc@1: 99.190%%
| Validation Epoch #141			Loss: 0.2912 Acc@1: 93.01%
| Elapsed time : 2:18:44

=> Training Epoch #142, LR=0.0040
| Epoch [142/200] Iter[391/391]		Loss: 0.0389 Acc@1: 99.210%
| Validation Epoch #142			Los