## Dependencies

In [1]:
import torch

import torch.nn as nn
import torch.optim as optim

import torch.nn.functional as F
from torch.autograd import Variable

import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import torchvision.models as models

import imp
import os
import sys
import math
import utils.training as train_utils; imp.reload(train_utils)
import utils.plot as plot_utils; imp.reload(plot_utils)
import time
from tqdm import tqdm

In [2]:
DATA_PATH='data/'
RESULTS_PATH='results/'
WEIGHTS_PATH='models/'

## Layers

In [73]:
class Bottleneck(nn.Module):
    def __init__(self, nChannels, growthRate):
        super(Bottleneck, self).__init__()
        interChannels = 4*growthRate
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(interChannels)
        self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3,
                               padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat((x, out), 1)
        return out

class SingleLayer(nn.Module):
    def __init__(self, nChannels, growthRate):
        super(SingleLayer, self).__init__()
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3,
                               padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = torch.cat((x, out), 1)
        return out

class Transition(nn.Module):
    def __init__(self, nChannels, nOutChannels):
        super(Transition, self).__init__()
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1,
                               bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.avg_pool2d(out, 2)
        return out

## Model

In [74]:
class DenseNet(nn.Module):
    def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
        super(DenseNet, self).__init__()

        nDenseBlocks = (depth-4) // 3
        if bottleneck:
            nDenseBlocks //= 2

        nChannels = 2*growthRate
        self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1,
                               bias=False)
        self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
        nChannels += nDenseBlocks*growthRate
        nOutChannels = int(math.floor(nChannels*reduction))
        self.trans1 = Transition(nChannels, nOutChannels)

        nChannels = nOutChannels
        self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
        nChannels += nDenseBlocks*growthRate
        nOutChannels = int(math.floor(nChannels*reduction))
        self.trans2 = Transition(nChannels, nOutChannels)

        nChannels = nOutChannels
        self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, bottleneck)
        nChannels += nDenseBlocks*growthRate

        self.bn1 = nn.BatchNorm2d(nChannels)
        self.fc = nn.Linear(nChannels, nClasses)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()

    def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
        layers = []
        for i in range(int(nDenseBlocks)):
            if bottleneck:
                layers.append(Bottleneck(nChannels, growthRate))
            else:
                layers.append(SingleLayer(nChannels, growthRate))
            nChannels += growthRate
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.dense3(out)
        out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
        out = F.log_softmax(self.fc(out))
        return out

In [75]:
# Test
model = DenseNet(12,100,.4,10,False)
print(model)

DenseNet (
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (dense1): Sequential (
    (0): SingleLayer (
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True)
      (conv1): Conv2d(24, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): SingleLayer (
      (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True)
      (conv1): Conv2d(36, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): SingleLayer (
      (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True)
      (conv1): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (3): SingleLayer (
      (bn1): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True)
      (conv1): Conv2d(60, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (4): SingleLayer (
      (bn1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True)
      (conv1): Conv2d(72, 12, 

## Data

In [76]:
BATCH_SIZE=64
CIFAR10_PATH=DATA_PATH+'cifar10/'

In [77]:
torch.cuda.manual_seed(1)

normMean = [0.49139968, 0.48215827, 0.44653124]
normStd = [0.24703233, 0.24348505, 0.26158768]
normTransform = transforms.Normalize(normMean, normStd)

trainTransform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normTransform
])
testTransform = transforms.Compose([
    transforms.ToTensor(),
    normTransform
])

kwargs = {'num_workers': 1, 'pin_memory': True}
trainLoader = DataLoader(
    dset.CIFAR10(root=CIFAR10_PATH, train=True, download=True,
                 transform=trainTransform),
    batch_size=BATCH_SIZE, shuffle=True, **kwargs)
testLoader = DataLoader(
    dset.CIFAR10(root=CIFAR10_PATH, train=False, download=True,
                 transform=testTransform),
    batch_size=BATCH_SIZE, shuffle=False, **kwargs)

Files already downloaded and verified
Files already downloaded and verified


## Train

In [82]:
existing_weights_fpath=None
nEpochs=1

net = DenseNet(growthRate=12, depth=40, reduction=1.0, 
                   bottleneck=False, nClasses=10)
net = net.cuda()

optimizer = optim.SGD(net.parameters(), lr=1e-1,
                momentum=0.9, weight_decay=1e-4)

print('  + Number of params: {}'.format(
    sum([p.data.nelement() for p in net.parameters()])))

if existing_weights_fpath:
    startEpoch = train_utils.load_weights(net, existing_weights_fpath)
    endEpoch = startEpoch + nEpochs
    print ('Resume training at epoch: {}'.format(startEpoch))
    if os.path.exists(RESULTS_PATH+'train.csv'): #assume test.csv exists
        append_write = 'a' # append if already exists
    else:
        append_write = 'w' # make a new file if not
    trainF = open(os.path.join(RESULTS_PATH, 'train.csv'), append_write)
    testF = open(os.path.join(RESULTS_PATH, 'test.csv'), append_write)
else:
    print ("Training new model from scratch")
    startEpoch = 1
    endEpoch = nEpochs
    trainF = open(os.path.join(RESULTS_PATH, 'train.csv'), 'w')
    testF = open(os.path.join(RESULTS_PATH, 'test.csv'), 'w')


for epoch in tqdm(range(startEpoch, endEpoch+1)):
    since = time.time()
    train_utils.adjust_opt("sgd", optimizer, epoch)
    train_utils.train(epoch, net, trainLoader, optimizer, trainF)
    train_utils.test(epoch, net, testLoader, optimizer, testF)
    time_elapsed = time.time() - since  
    print('Time {:.0f}m {:.0f}s\n'.format(
        time_elapsed // 60, time_elapsed % 60))
    if epoch != 1:
        os.system('./utils/plot.py {} &'.format(RESULTS_PATH))

trainF.close()
testF.close()


  0%|          | 0/1 [00:00<?, ?it/s]

  + Number of params: 1059298
Training new model from scratch


[A



Epoch 1: Train - Loss: 1.370707	Error: 43.750000


100%|██████████| 1/1 [01:12<00:00, 72.19s/it]

Test - Loss: 1.3404, Error: 4590/10000 (46%)
Time 1m 12s




