# High-level PyTorch Example

In [1]:
import os
import sys
import numpy as np
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
from torchvision import datasets, transforms
from torch.autograd import Variable
from common.params import *
from common.utils import *

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("PyTorch: ", torch.__version__)
print("Numpy: ", np.__version__)

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
PyTorch:  0.1.12_2
Numpy:  1.13.1


In [3]:
def create_lenet():
    class LenetModel(nn.Module):
        def __init__(self):
            super(LenetModel, self).__init__()
            self.conv1 = nn.Conv2d(3, 48, kernel_size=(3, 3), padding=(1, 1))
            self.conv2 = nn.Conv2d(48, 48, kernel_size=(3, 3))
            self.conv3 = nn.Conv2d(48, 96, kernel_size=(3, 3), padding=(1, 1))
            self.conv4 = nn.Conv2d(96, 96, kernel_size=(3, 3))
            self.conv5 = nn.Conv2d(96, 192, kernel_size=(3, 3), padding=(1, 1))
            self.conv6 = nn.Conv2d(192, 192, kernel_size=(3, 3))
            # feature map size is 2*2 by pooling
            self.fc1 = nn.Linear(192*2*2, 512)
            self.fc2 = nn.Linear(512, N_CLASSES)

        def forward(self, x):
            x = F.relu(self.conv2(F.relu(self.conv1(x))))
            x = F.max_pool2d(x, kernel_size=(2, 2), stride=(2, 2))
            x = F.dropout(x, 0.25)
            
            x = F.relu(self.conv4(F.relu(self.conv3(x))))
            x = F.max_pool2d(x, kernel_size=(2, 2), stride=(2, 2))
            x = F.dropout(x, 0.25)
            
            x = F.relu(self.conv6(F.relu(self.conv5(x))))
            x = F.max_pool2d(x, kernel_size=(2, 2), stride=(2, 2))
            x = F.dropout(x, 0.25)
            
            x = x.view(-1, 192*2*2)   # reshape Variable
            x = F.dropout(F.relu(self.fc1(x)), 0.5)
            x = self.fc2(x)
            return F.log_softmax(x)
    return LenetModel()

In [4]:
def init_model(m):
    opt = optim.SGD(m.parameters(),
                    lr=LR,
                    momentum=MOMENTUM,
                    weight_decay=0, 
                    nesterov=False)
    return opt

In [5]:
%%time
# Data into format for library
#x_train, x_test, y_train, y_test = mnist_for_library(channel_first=True)
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
# Torch-specific
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Downloading http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Done.
Extracting files...
Done.
Preparing train set...
Preparing test set...
Done.
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int64 int64
CPU times: user 2.89 s, sys: 1.55 s, total: 4.44 s
Wall time: 24 s


In [6]:
%%time
sym = create_lenet()
sym.cuda() # CUDA!
print(sym)

LenetModel (
  (conv1): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear (768 -> 512)
  (fc2): Linear (512 -> 10)
)
CPU times: user 1.62 s, sys: 2.48 s, total: 4.1 s
Wall time: 4.62 s


In [7]:
%%time
optimizer = init_model(sym)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 218 µs


In [8]:
%%time
# Sets training = True
sym.train()  
for j in range(EPOCHS):
    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Get samples
        data = Variable(torch.FloatTensor(data).cuda())
        target = Variable(torch.LongTensor(target).cuda())
        # Init
        optimizer.zero_grad()
        # Forwards
        output = sym(data)
        # Loss
        loss = F.cross_entropy(output, target)
        # Back-prop
        loss.backward()
        optimizer.step()
    # Log
    print(j)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
CPU times: user 7min 11s, sys: 1min 34s, total: 8min 45s
Wall time: 8min 55s


In [9]:
%%time
# Test model
# Sets training = False
sym.eval()
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, target in yield_mb(x_test, y_test, BATCHSIZE):
    # Get samples
    data = Variable(torch.FloatTensor(data).cuda())
    # Forwards
    output = sym(data)
    pred = output.data.max(1)[1].cpu().numpy().squeeze()
    # Collect results
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
    c += 1

CPU times: user 1.06 s, sys: 268 ms, total: 1.33 s
Wall time: 1.54 s


In [10]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.770132211538
