# High-level Chainer Example

In [1]:
import os
import sys
import numpy as np
import math
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import cuda
from common.params import *
from common.utils import *

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Chainer: ", chainer.__version__)
print("Numpy: ", np.__version__)

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Chainer:  2.0.2
Numpy:  1.13.1


In [3]:
class SymbolModule(chainer.Chain):
    def __init__(self):
        super(SymbolModule, self).__init__(
            conv1=L.Convolution2D(3, 50, ksize=(3,3), pad=(1,1)),
            conv2=L.Convolution2D(50, 50, ksize=(3,3), pad=(1,1)),      
            conv3=L.Convolution2D(50, 100, ksize=(3,3), pad=(1,1)),  
            conv4=L.Convolution2D(100, 100, ksize=(3,3), pad=(1,1)),  
            # feature map size is 8*8 by pooling
            fc1=L.Linear(100*8*8, 512),
            fc2=L.Linear(512, N_CLASSES),
        )
    
    def __call__(self, x):
        h = F.relu(self.conv2(F.relu(self.conv1(x))))
        h = F.max_pooling_2d(h, ksize=(2,2), stride=(2,2))
        h = F.dropout(h, 0.25)
        
        h = F.relu(self.conv4(F.relu(self.conv3(h))))
        h = F.max_pooling_2d(h, ksize=(2,2), stride=(2,2))
        h = F.dropout(h, 0.25)       
        
        h = F.dropout(F.relu(self.fc1(h)), 0.5)
        return self.fc2(h)

In [4]:
def init_model(m):
    optimizer = optimizers.MomentumSGD(lr=LR, momentum=MOMENTUM)
    optimizer.setup(m)
    return optimizer

In [5]:
def to_chainer(array, **kwargs):
    return chainer.Variable(cuda.to_gpu(array), **kwargs)

In [6]:
%%time
# Data into format for library
#x_train, x_test, y_train, y_test = mnist_for_library(channel_first=True)
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Downloading http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Done.
Extracting files...
Done.
Preparing train set...
Preparing test set...
Done.
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 3.01 s, sys: 1.67 s, total: 4.68 s
Wall time: 20.9 s


In [7]:
%%time
# Create symbol
sym = SymbolModule()
if GPU:
    chainer.cuda.get_device(0).use()  # Make a specified GPU current
    sym.to_gpu()  # Copy the model to the GPU

CPU times: user 192 ms, sys: 176 ms, total: 368 ms
Wall time: 380 ms


In [8]:
%%time
optimizer = init_model(sym)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 105 µs


In [9]:
%%time
for j in range(EPOCHS):
    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Get samples
        optimizer.update(L.Classifier(sym), to_chainer(data), to_chainer(target))
    # Log
    print(j)

0
1
2
3
4
5
6
7
8
9
CPU times: user 3min 34s, sys: 42.1 s, total: 4min 16s
Wall time: 4min 16s


In [10]:
%%time
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0

with chainer.using_config('train', False):
    for data, target in yield_mb(x_test, y_test, BATCHSIZE):
        # Forwards
        pred = chainer.cuda.to_cpu(sym(to_chainer(data)).data.argmax(-1))
        # Collect results
        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
        c += 1

CPU times: user 1.49 s, sys: 220 ms, total: 1.71 s
Wall time: 1.71 s


In [11]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.780048076923
