# Chainer cifar10 tutorial
自分の理解が深まるように少し改変  
https://github.com/pfnet/chainer/blob/master/examples/cifar/train_cifar.py

In [22]:
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys; sys.argv=['']; del sys

import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from chainer import Variable
import copy # test iterator copy


In [23]:
dataset = 'cifar10'
# dataset = 'mnist'

batchsize = 128
epochsize = 100

# Load dataset

In [24]:
if dataset == 'cifar10':
    print('Using CIFAR10 dataset.')
    class_labels = 10
    train, test = chainer.datasets.get_cifar10()
elif dataset == 'cifar100':
    print('Using CIFAR100 dataset.')
    class_labels = 100
    train, test = chainer.datasets.get_cifar100()
elif dataset == 'mnist':
    print('Using mnist dataset.')
    class_labels = 10
    train, test = chainer.datasets.get_mnist()
    train = [(it[0].reshape(1, 28, 28),it[1]) for it in train]
    test = [(it[0].reshape(1, 28, 28),it[1]) for it in test]

Using CIFAR10 dataset.


In [25]:
print(len(train))
print(len(test))

50000
10000


In [26]:
train[0][0].shape

(3, 32, 32)

In [27]:
type(train[0][0])

numpy.ndarray

In [28]:
# train = [(it[0].flatten(),it[1]) for it in train]
# test = [(it[0].flatten(),it[1]) for it in test]

In [29]:
# Load the cifar dataset
train_iter = chainer.iterators.SerialIterator(train, batchsize)
test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)
testsize = len(test)
testsize

10000

# Network

In [30]:
class VGG(chainer.Chain):
    def __init__(self, class_labels):
        initializer = chainer.initializers.HeNormal()
        c1 = 32
        c2 = 64
        c3 = 64
        c4 = 128

        super(VGG, self).__init__(
             # the size of the inputs to each layer will be inferred
            conv1=L.Convolution2D(None, c1, 3, stride=1, pad=0),
            conv2=L.Convolution2D(32, c2, 3, stride=1, pad=0),
            conv3=L.Convolution2D(64, c3, 3, stride=1, pad=1),
            conv4=L.Convolution2D(64, c4, 3, stride=1, pad=1),
            fc1=L.Linear(512, 1000, initialW=initializer),
            fc2=L.Linear(1000, class_labels, initialW=initializer),
            bnorm1=L.BatchNormalization(c1),
            bnorm2=L.BatchNormalization(c2),
            bnorm3=L.BatchNormalization(c3),
            bnorm4=L.BatchNormalization(c4),
        )

    def __call__(self, x):
        h = F.max_pooling_2d(F.relu(self.bnorm1(self.conv1(x))), 2, stride=2)
        h = F.max_pooling_2d(F.relu(self.bnorm2(self.conv2(h))), 2, stride=2)
        h = F.max_pooling_2d(F.relu(self.bnorm3(self.conv3(h))), 2, stride=2)
        h = F.max_pooling_2d(F.relu(self.bnorm4(self.conv4(h))), 2, stride=2)
        h = F.relu(self.fc1(h))
        y = self.fc2(h)        
        return y

In [31]:
class Classifier(chainer.Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__(predictor=predictor)
        
    def clear(self):
        self.loss = None
        self.accuracy = None
        
    def __call__(self, x, t):
        self.clear()
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        chainer.report({'loss': loss, 'accuracy': self.accuracy}, self)
        return loss


In [32]:

gpu_id = 0 #  use gpu
model = Classifier(VGG(class_labels))

if gpu_id >= 0:
    chainer.cuda.get_device(gpu_id).use()  # Make a specified GPU current
    model.to_gpu()  # Copy the model to the GPU

# Load weight

In [35]:
chainer.serializers.load_hdf5("chainer_tutorial_cifar10.h5", model) 

10 loops, best of 3: 102 ms per loop


In [37]:
%%timeit
chainer.serializers.load_hdf5("chainer_tutorial_cifar10.h5", model) 

10 loops, best of 3: 24 ms per loop


10 loops, best of 3: 75.4 ms per loop


# Setup an optimizer

In [13]:
optimizer = chainer.optimizers.MomentumSGD(0.1)
optimizer.use_cleargrads()
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

In [14]:
batch = train_iter.next()

In [15]:
xp = model.xp
xp

<module 'cupy' from '/home/komatsu/anaconda2/lib/python2.7/site-packages/cupy/__init__.pyc'>

In [16]:
x = xp.asarray([it[0] for it in batch], dtype=np.float32)
t = xp.asarray([it[1] for it in batch], dtype=np.int32)

In [17]:
x[0].shape

(3, 32, 32)

In [18]:
# predict without Classifier
y = model.predictor(x)
loss = F.softmax_cross_entropy(y, t)
#print(y.data)
print(loss.data)

0.247173860669


In [19]:
y.data

array([[ -3.15248442,  -6.40561676,  -1.88427174, ...,  16.68401527,
         -6.88736391,  -4.76229382],
       [  0.46081591,  -3.38987923,   2.94827199, ...,  -1.34495747,
         -3.0659709 ,  -3.54287839],
       [ -1.00618732,  12.14224815,  -4.13446903, ...,  -5.8096509 ,
         -0.80040574,  14.07317257],
       ..., 
       [ -2.12578392,  -3.45731497,   0.17912379, ...,   5.88494968,
         -3.67554474,  -1.61835313],
       [ -1.63420928,  -2.85191417,   1.45297587, ...,  -0.69618595,
         -2.26373434,   1.55776119],
       [  3.77103806,   1.64278674,  -0.63880855, ...,  -4.89644909,
          9.26553726,   0.81517953]], dtype=float32)

In [20]:
t[0]

array(7, dtype=int32)

In [21]:
%%time
from tqdm import tqdm_notebook as tqdm
# from tqdm import tqdm

# run
xp = model.xp

pbar = tqdm(xrange(epochsize))
for epoch in pbar:
    for batch in train_iter:
        # data separation
        x = xp.asarray([it[0] for it in batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()

        # terminate
        if train_iter.is_new_epoch is True:
            break

    # evaluate model   
    sum_loss = 0
    sum_acc = 0
    test_iter_copy = copy.copy(test_iter)
    for test_batch in test_iter_copy:
        # data separation
        x = xp.asarray([it[0] for it in test_batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in test_batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        sum_loss += loss.data * len(test_batch)
        sum_acc += model.accuracy.data * len(test_batch)
    mean_loss = sum_loss / testsize
    mean_acc = sum_acc / testsize
#     print(''.format(epoch=epoch))
    print('epoch : {epoch}, Mean loss: {loss}, Mean accuracy: {acc}'.format(epoch=epoch, loss=mean_loss, acc=mean_acc))
    # pbar.set_description('epoch : {epoch}'.format(epoch=epoch))

epoch : 0, Mean loss: 0.703432738781, Mean accuracy: 0.776799976826
epoch : 1, Mean loss: 0.660486578941, Mean accuracy: 0.791499972343
epoch : 2, Mean loss: 0.660650730133, Mean accuracy: 0.787699997425
epoch : 3, Mean loss: 0.679507434368, Mean accuracy: 0.78409999609
epoch : 4, Mean loss: 0.693096160889, Mean accuracy: 0.775200009346
epoch : 5, Mean loss: 0.669409990311, Mean accuracy: 0.79390001297
epoch : 6, Mean loss: 0.626967310905, Mean accuracy: 0.793200016022
epoch : 7, Mean loss: 0.673231124878, Mean accuracy: 0.782100021839
epoch : 8, Mean loss: 0.660661041737, Mean accuracy: 0.787699997425
epoch : 9, Mean loss: 0.683186531067, Mean accuracy: 0.782800018787
epoch : 10, Mean loss: 0.689220309258, Mean accuracy: 0.779600024223
epoch : 11, Mean loss: 0.671059727669, Mean accuracy: 0.784500002861
epoch : 12, Mean loss: 0.741511642933, Mean accuracy: 0.76700001955
epoch : 13, Mean loss: 0.685344696045, Mean accuracy: 0.791899979115
epoch : 14, Mean loss: 0.756933569908, Mean acc

# Save model

In [None]:
chainer.serializers.save_hdf5("chainer_tutorial_cifar10.h5", model) 

In [23]:
chainer.serializers.save_npz("chainer_tutorial_cifar10.npz", model) 