# Chainer mnist tutorial
自分の理解が深まるように少し改変  
http://docs.chainer.org/en/stable/tutorial/basic.html

In [126]:
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys; sys.argv=['']; del sys

import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from chainer import Variable
import copy # test iterator copy

# Network

In [81]:
class MLP(chainer.Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
             # the size of the inputs to each layer will be inferred
            l1=L.Linear(784, n_units),  # n_in -> n_units
            l2=L.Linear(n_units, n_units),  # n_units -> n_units
            l3=L.Linear(n_units, n_out),    # n_units -> n_out
            )
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y

In [96]:
class Classifier(chainer.Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__(predictor=predictor)
        self.accuracy = None
    def __call__(self, x, t):
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        chainer.report({'loss': loss, 'accuracy': self.accuracy}, self)
        return loss


In [97]:
gpu_id = 0 #  use gpu
model = Classifier(MLP(1000, 10))

if gpu_id >= 0:
    chainer.cuda.get_device(gpu_id).use()  # Make a specified GPU current
    model.to_gpu()  # Copy the model to the GPU

# Setup an optimizer

In [98]:
optimizer = chainer.optimizers.SGD()
# optimizer = chainer.optimizers.MomentumSGD()
optimizer.use_cleargrads()

In [99]:
optimizer.setup(model)
optimizer.target

<__main__.Classifier at 0x7f444c634d50>

# Load dataset

In [100]:
# Load the MNIST dataset
train, test = chainer.datasets.get_mnist()
train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100, repeat=False, shuffle=False)

# Run manually
dataはnumpy形式　GPUの有無に応じてcupyに変更する

In [101]:
batch = train_iter.next()

In [102]:
xp = model.xp
xp

<module 'cupy' from '/home/komatsu/anaconda2/lib/python2.7/site-packages/cupy/__init__.pyc'>

In [103]:
x = xp.asarray([it[0] for it in batch], dtype=np.float32)
t = xp.asarray([it[1] for it in batch], dtype=np.int32)

In [104]:
t

array([6, 2, 3, 4, 5, 6, 4, 0, 8, 2, 6, 6, 9, 5, 7, 6, 1, 8, 7, 1, 6, 5, 6,
       9, 0, 0, 6, 1, 7, 1, 1, 4, 6, 7, 9, 2, 0, 2, 6, 4, 9, 2, 9, 8, 3, 9,
       8, 5, 8, 7, 7, 9, 3, 5, 1, 5, 1, 4, 0, 3, 4, 9, 4, 8, 8, 2, 5, 9, 6,
       3, 9, 9, 8, 0, 4, 1, 7, 3, 1, 0, 3, 5, 6, 6, 8, 0, 5, 9, 7, 8, 3, 2,
       3, 7, 2, 1, 3, 9, 3, 8], dtype=int32)

In [105]:
type(t)

cupy.core.core.ndarray

In [106]:
x.shape

(100, 784)

## predict

In [107]:
# predict without Classifier
y = model.predictor(x)
loss = F.softmax_cross_entropy(y, t)
#print(y.data)
print(loss.data)

[[ 0.26716429 -0.0498517   0.18256906  0.23069705  0.05639438 -0.10639153
   0.12205505  0.0361958  -0.01332716  0.28266028]
 [ 0.12073089  0.13905387 -0.07146449  0.06680758  0.10860203 -0.01722271
   0.03685853 -0.23692617 -0.03443903  0.20013787]
 [ 0.15522207 -0.04847419  0.05707862  0.2503624   0.06084583 -0.09175757
   0.0527268  -0.0893645  -0.07871582 -0.07998484]
 [ 0.24428926  0.03566321 -0.10973764  0.10700005  0.11170004 -0.00931807
   0.09277786 -0.03669683  0.18683572  0.00750732]
 [ 0.19380745  0.10304111 -0.13998446  0.27420381  0.16632356  0.06229278
   0.11802386 -0.212038   -0.02707849 -0.03473621]
 [ 0.20962606  0.05662105  0.02327721  0.11459622  0.12235951  0.00976416
   0.06178858 -0.00801183 -0.05154353  0.03278422]
 [ 0.12869045 -0.14108269 -0.04610553  0.07982061 -0.09339776 -0.21632953
   0.17145656 -0.09841295  0.07178839 -0.03134733]
 [ 0.3230553  -0.28360224 -0.01819221  0.22198357  0.00400162 -0.2296188
   0.16240102 -0.36118239 -0.18633536  0.22606188]
 

In [108]:
# predict with Classifier
loss = model(x, t)
print(loss.data)

2.28014588356


## 簡単に時間計測

In [109]:
%%timeit
y = model.predictor(x)
loss = F.softmax_cross_entropy(y, t)

The slowest run took 4.39 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 3: 1.19 ms per loop


多分Accuracyの計算が入っているかいないかで計算時間が変わる。   
Flagでself.compute_accuracyをつけると良い。  
https://github.com/pfnet/chainer/blob/v1.19.0/chainer/links/model/classifier.py

In [110]:
%%timeit
loss = model(x, t)

1000 loops, best of 3: 1.41 ms per loop


## update


In [111]:
model.cleargrads()
loss.backward()

In [112]:
model.predictor.l1.W.grad[0][400:420]

array([ -2.11363169e-03,  -1.38228748e-03,  -5.11461927e-04,
        -4.56609094e-04,  -1.31238729e-03,  -1.66235387e-03,
        -1.29003066e-03,  -9.21345490e-04,  -9.10570612e-04,
        -7.74228130e-04,  -7.32739863e-04,  -4.95720014e-04,
        -5.64320420e-04,  -2.39379238e-04,   1.68128259e-04,
        -8.32440346e-05,  -2.68386299e-04,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00], dtype=float32)

In [113]:
optimizer.update()

In [114]:
# repeat manually
for i in range(2000):
    loss = model(x, t)
    if i % 100 is 0:
        print("{} : {}".format(i, loss.data))
    model.cleargrads()
    loss.backward()
    optimizer.update()

0 : 2.26911950111
100 : 1.37029600143
200 : 0.689813196659
300 : 0.363195955753
400 : 0.213089868426
500 : 0.138499110937
600 : 0.0976763814688
700 : 0.0731864646077
800 : 0.0573908016086
900 : 0.0465756244957
1000 : 0.038815908134
1100 : 0.0330368168652
1200 : 0.0285944696516
1300 : 0.0250963009894
1400 : 0.0222843922675
1500 : 0.0199799295515
1600 : 0.0180639214814
1700 : 0.0164525602013
1800 : 0.0150790829211
1900 : 0.0138964653015


# Run all
GPU: 0  
unit: 1000  
Minibatch-size: 100  
epoch: 20  

In [1]:
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys; sys.argv=['']; del sys

import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from chainer import Variable
import copy # test iterator copy

In [2]:
class MLP(chainer.Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
             # the size of the inputs to each layer will be inferred
            l1=L.Linear(784, n_units),  # n_in -> n_units
            l2=L.Linear(n_units, n_units),  # n_units -> n_units
            l3=L.Linear(n_units, n_out),    # n_units -> n_out
            )
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y

In [3]:
class Classifier(chainer.Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__(predictor=predictor)
        self.accuracy = None
    def __call__(self, x, t):
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        chainer.report({'loss': loss, 'accuracy': self.accuracy}, self)
        return loss


In [4]:
unitsize = 1000
batchsize = 100
epochsize = 10

In [5]:
gpu_id = 0 #  use gpu
model = Classifier(MLP(unitsize, 10))

if gpu_id >= 0:
    chainer.cuda.get_device(gpu_id).use()  # Make a specified GPU current
    model.to_gpu()  # Copy the model to the GPU

In [6]:
# optimizer
# optimizer = chainer.optimizers.SGD()
optimizer = chainer.optimizers.Adam()

optimizer.use_cleargrads()
optimizer.setup(model)

In [7]:
# Load the MNIST dataset
train, test = chainer.datasets.get_mnist()
train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100, repeat=False, shuffle=False)
testsize = len(test)

In [None]:
%%time
from tqdm import tqdm_notebook as tqdm
# from tqdm import tqdm

# run
xp = model.xp

pbar = tqdm(xrange(epochsize))
for epoch in pbar:
    for batch in train_iter:
        # data separation
        x = xp.asarray([it[0] for it in batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        model.cleargrads()
        loss.backward()
        optimizer.update()

        # terminate
        if train_iter.is_new_epoch is True:
            break

    # evaluate model   
    sum_loss = 0
    sum_acc = 0
    test_iter_copy = copy.copy(test_iter)
    for test_batch in test_iter_copy:
        # data separation
        x = xp.asarray([it[0] for it in test_batch], dtype=np.float32)
        t = xp.asarray([it[1] for it in test_batch], dtype=np.int32)
        # compute grad
        loss = model(x, t)
        sum_loss += loss.data * len(test_batch)
        sum_acc += model.accuracy.data * len(test_batch)
    mean_loss = sum_loss / testsize
    mean_acc = sum_acc / testsize
#     print(''.format(epoch=epoch))
    print('epoch : {epoch}, Mean loss: {loss}, Mean accuracy: {acc}'.format(epoch=epoch, loss=mean_loss, acc=mean_acc))
    # pbar.set_description('epoch : {epoch}'.format(epoch=epoch))

epoch : 0, Mean loss: 0.103590451181, Mean accuracy: 0.965399980545
epoch : 1, Mean loss: 0.0772355273366, Mean accuracy: 0.975199997425
epoch : 2, Mean loss: 0.0724863857031, Mean accuracy: 0.977900028229
epoch : 3, Mean loss: 0.0671059712768, Mean accuracy: 0.979600012302
epoch : 4, Mean loss: 0.0751234218478, Mean accuracy: 0.978900015354
epoch : 5, Mean loss: 0.0699494481087, Mean accuracy: 0.981100022793
epoch : 6, Mean loss: 0.0859361812472, Mean accuracy: 0.978200018406
epoch : 7, Mean loss: 0.086266040802, Mean accuracy: 0.978100001812
epoch : 8, Mean loss: 0.0963259860873, Mean accuracy: 0.977999985218
