Source: https://github.com/bamos/densenet.pytorch

### setup

In [1]:
# default libraries
import os
import argparse
import time

In [2]:
# pytorch libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchnet.meter import AverageValueMeter, ClassErrorMeter

In [3]:
# global parameters
args = {
    'data_path': '../data/cifar10/',
    'epochs': 300,
    'batch_size': 64,
    'optim': 'sgd', # adam, rmsprop
    'lr': 0.1,
    'momentum': 0.9,
    'weight_decay': 1e-4, # regularity!
    'cuda': True,
    'seed': 7,
    'workers': 4,
    'intermediate_path': '../intermediate/densenet/'
}
args = argparse.Namespace(**args)

if not os.path.isdir(args.data_path):
    os.makedirs(args.data_path)
    
if not os.path.isdir(args.intermediate_path):
    os.makedirs(args.intermediate_path)

args.cuda = args.cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

### model

In [4]:
# bottleneck layer
class Bottleneck(nn.Module):
    """BN-ReLU-Conv(1x1)-BN-ReLU-Conv(3x3)"""
    def __init__(self, nChannels, growthRate):
        # it is necessary to init nn.Module
        super(Bottleneck, self).__init__()
        interChannels = 4*growthRate # as in the paper
        self.bn1 = nn.BatchNorm2d(nChannels) # batch normalization
        self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(interChannels)
        self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3,
                               padding=1, bias=False)

    def forward(self, x):
        # F is module for functions without weights!
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat((x, out), 1)
        return out

In [5]:
# without bottleneck
class SingleLayer(nn.Module):
    """BN-ReLU-Conv(3x3)"""
    def __init__(self, nChannels, growthRate):
        super(SingleLayer, self).__init__()
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3,
                               padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = torch.cat((x, out), 1)
        return out

In [6]:
# transition layer
class Transition(nn.Module):
    """Bn-Conv(1x1)-Pooling(2x2)"""
    def __init__(self, nChannels, nOutChannels):
        super(Transition, self).__init__()
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1,
                               bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.avg_pool2d(out, 2) # pooling has no weight
        return out

In [7]:
# main model
class DenseNet(nn.Module):
    def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
        super(DenseNet, self).__init__()

        nDenseBlocks = (depth-4) // 3
        if bottleneck:
            nDenseBlocks //= 2
            nChannels = 2*growthRate
        else:
            nChannels = 16
        
        self.conv1 = nn.Conv2d(3, nChannels, kernel_size=3, padding=1,
                               bias=False)
        self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks,
                                       bottleneck)
        nChannels += nDenseBlocks*growthRate
        nOutChannels = int(nChannels*reduction)
        self.trans1 = Transition(nChannels, nOutChannels)

        nChannels = nOutChannels
        self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks,
                                       bottleneck)
        nChannels += nDenseBlocks*growthRate
        nOutChannels = int(nChannels*reduction)
        self.trans2 = Transition(nChannels, nOutChannels)

        nChannels = nOutChannels
        self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks,
                                       bottleneck)
        nChannels += nDenseBlocks*growthRate

        self.bn1 = nn.BatchNorm2d(nChannels)
        self.fc = nn.Linear(nChannels, nClasses)
        
        # we initialize weights here
        self._initialize_weights()
                
    # custom weight initialization
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                init.normal(m.weight, mean=0, std=(2/n)**0.5)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant(m.weight, 1)
                init.constant(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.constant(m.bias, 0)
    
    # create nDenseBlocks
    def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
        layers = []
        for i in range(int(nDenseBlocks)):
            if bottleneck:
                layers.append(Bottleneck(nChannels, growthRate))
            else:
                layers.append(SingleLayer(nChannels, growthRate))
            nChannels += growthRate
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.dense3(out)
        # squeeze = remove all 1d dimensions
        out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8))
        out = F.log_softmax(self.fc(out))
        return out

### utils

In [8]:
def adjust_learning_rate(args, optimizer, epoch):
    if args.optim == 'sgd':
        if epoch == 150:
            lr = args.lr * 0.1
        elif epoch == 225:
            lr = args.lr * 0.01
        else:
            return

        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

### train

In [9]:
def train(args, train_loader, model, optimizer, epoch):
    model.train() # effect on Dropout or BatchNorm
    losses = AverageValueMeter()
    error = ClassErrorMeter(topk=[1], accuracy=False) # get error
    start = time.time()
    
    for i, (data, target) in enumerate(train_loader):
        if args.cuda:
            data = data.cuda()
            target = target.cuda()
        data = Variable(data)
        target = Variable(target)
        
        output = model(data)
        # because output is log-probability, the real loss will be
        # the log-probability at target!
        loss = F.nll_loss(output, target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.add(loss.data.cpu()[0] * data.size(0), data.size(0))
        error.add(output.data, target.data)
        
    print("=> EPOCH {} | Time: {}s | Loss: {:.4f} | Error: {:.2f}%".format(
            epoch, int(time.time()-start),
            losses.value()[0], error.value(k=1)))

In [10]:
def validate(args, val_loader, model, optimizer, epoch):
    model.eval()
    losses = AverageValueMeter()
    error = ClassErrorMeter(topk=[1], accuracy=False)
    start = time.time()
    
    for i, (data, target) in enumerate(val_loader):
        if args.cuda:
            data = data.cuda()
            target = target.cuda()
        data = Variable(data, volatile=True)
        target = Variable(target, volatile=True)
        
        output = model(data)
        loss = F.nll_loss(output, target)
        
        losses.add(loss.data.cpu()[0] * data.size(0), data.size(0))
        error.add(output.data, target.data)
        
    print("   * Validate | Time: {}s | Loss: {:.4f} | Error: {:.2f}%".format(
            int(time.time()-start), losses.value()[0], error.value(k=1)))

### prepare

In [11]:
# data loaders
normMean = [0.49139968, 0.48215827, 0.44653124]
normStd = [0.24703233, 0.24348505, 0.26158768]
normTransform = transforms.Normalize(normMean, normStd)

trainTransform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normTransform])
testTransform = transforms.Compose([
    transforms.ToTensor(),
    normTransform])

train_loader = DataLoader(
    datasets.CIFAR10(root=args.data_path, train=True, download=True,
                     transform=trainTransform),
    batch_size=args.batch_size, shuffle=True, num_workers=args.workers)
val_loader = DataLoader(
    datasets.CIFAR10(root=args.data_path, train=False,
                     transform=testTransform),
    batch_size=args.batch_size, shuffle=False, num_workers=args.workers)

Files already downloaded and verified


In [12]:
# create the model
model = DenseNet(growthRate=12, depth=100, reduction=0.5,
                 bottleneck=True, nClasses=10)
print("=> Number of params: {}".format(
        sum([p.data.numel() for p in model.parameters()])))
if args.cuda:
    model = model.cuda()

=> Number of params: 769162


In [13]:
# define the optimizer
if args.optim == "sgd":
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
else:
    optimizer = optim.Adam(model.parameters(),
                           weight_decay=args.weight_decay)

### run

In [14]:
# train
for epoch in range(1, args.epochs+1):
    adjust_learning_rate(args, optimizer, epoch)
    train(args, train_loader, model, optimizer, epoch)
    validate(args, val_loader, model, optimizer, epoch)

=> EPOCH 1 | Time: 189s | Loss: 1.5004 | Error: 54.87%
   * Validate | Time: 10s | Loss: 1.2864 | Error: 44.48%
=> EPOCH 2 | Time: 188s | Loss: 0.9510 | Error: 33.82%
   * Validate | Time: 10s | Loss: 0.8999 | Error: 31.44%
=> EPOCH 3 | Time: 188s | Loss: 0.7419 | Error: 25.90%
   * Validate | Time: 10s | Loss: 0.6976 | Error: 23.93%
=> EPOCH 4 | Time: 189s | Loss: 0.6120 | Error: 21.21%
   * Validate | Time: 10s | Loss: 0.7367 | Error: 25.06%
=> EPOCH 5 | Time: 189s | Loss: 0.5456 | Error: 18.96%
   * Validate | Time: 10s | Loss: 0.5753 | Error: 19.65%
=> EPOCH 6 | Time: 188s | Loss: 0.5006 | Error: 17.36%
   * Validate | Time: 10s | Loss: 0.5991 | Error: 19.43%
=> EPOCH 7 | Time: 189s | Loss: 0.4679 | Error: 16.12%
   * Validate | Time: 10s | Loss: 0.6161 | Error: 19.66%
=> EPOCH 8 | Time: 189s | Loss: 0.4400 | Error: 15.21%
   * Validate | Time: 10s | Loss: 0.4660 | Error: 15.31%
=> EPOCH 9 | Time: 189s | Loss: 0.4150 | Error: 14.33%
   * Validate | Time: 10s | Loss: 0.4968 | Error:

AttributeError: 'Namespace' object has no attribute 'intermediate'

In [15]:
torch.save(model.state_dict(), os.path.join(args.intermediate_path,
                                            "latest.pth"))