# chainerの練習

In [2]:
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys; sys.argv=['']; del sys

import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from chainer import Variable

# Forward/Backward Computation

In [3]:
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)

In [4]:
y = x**2 - 2*x +1

In [5]:
y.data

array([ 16.], dtype=float32)

In [6]:
y.backward()

In [7]:
x.grad

array([ 8.], dtype=float32)

In [8]:
y.grad

array([ 1.], dtype=float32)

In [9]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = x**2 - 2*x +1
y.data

array([[  0.,   1.,   4.],
       [  9.,  16.,  25.]], dtype=float32)

In [10]:
y.grad = np.ones((2, 3), dtype=np.float32)
y.backward()
x.grad

array([[  0.,   2.,   4.],
       [  6.,   8.,  10.]], dtype=float32)

# Links

In [11]:
f = L.Linear(3,2)

In [12]:
f.W.data

array([[-0.87365782,  0.04174001, -0.40185058],
       [ 0.2854577 ,  0.36902165, -0.69121253]], dtype=float32)

In [13]:
f.b.data

array([ 0.,  0.], dtype=float32)

In [14]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = f(x)
y.data

array([[-1.99572957, -1.05013645],
       [-5.69703484, -1.16033602]], dtype=float32)

In [15]:
f.W.grad

array([[ nan,  nan,  nan],
       [ nan,  nan,  nan]], dtype=float32)

In [16]:
f.cleargrads()

In [17]:
f.W.grad

In [18]:
y.grad = np.ones((2,2), dtype=np.float32)

In [19]:
y.backward()

In [20]:
f.W.grad

array([[ 5.,  7.,  9.],
       [ 5.,  7.,  9.]], dtype=float32)

In [21]:
f.b.grad

array([ 2.,  2.], dtype=float32)

# Write a model as a chain

In [22]:
class MyChain(chainer.Chain):
    def __init__(self):
        super(MyChain, self).__init__(
            l1=L.Linear(4, 3),
            l2=L.Linear(3, 2),
        )
    def __call__(self, x):
        h=self.l1(x)
        return self.l2(h)

In [23]:
model = MyChain()

In [24]:
x = Variable(np.array([[0, 1, 2, 3], [0, 4, 5, 6]], dtype=np.float32))

In [25]:
y = model(x)
y.data

array([[ 0.73875928,  1.07063448],
       [ 2.15814734,  2.00058246]], dtype=float32)

In [26]:
model.l1.W.data

array([[-0.16259685, -0.46346357,  0.3334302 ,  0.64184391],
       [-0.14333589, -0.2851606 , -0.20084308,  0.55678707],
       [ 0.29526463,  0.31250039,  0.55511034,  0.19107708]], dtype=float32)

In [27]:
model.cleargrads()

In [28]:
y.grad = np.ones((2,2), dtype=np.float32)

In [29]:
model.l1.W.grad

In [30]:
y.backward()

In [31]:
model.l1.W.grad

array([[ 0.        ,  1.49492407,  2.0928936 ,  2.69086337],
       [ 0.        , -0.08842878, -0.12380029, -0.1591718 ],
       [ 0.        ,  2.98171139,  4.17439604,  5.36708021]], dtype=float32)

# Optimizer

In [32]:
optimizer = chainer.optimizers.SGD()
# optimizer = chainer.optimizers.MomentumSGD()

optimizer.use_cleargrads()

In [33]:
optimizer.setup(model)
optimizer.target

<__main__.MyChain at 0x7f61a9a07a90>

In [34]:
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

In [35]:
x = Variable(np.random.uniform(-1, 1, (2, 4)).astype('f'))

In [36]:
x.data

array([[ 0.67686433, -0.04751061,  0.24931756, -0.93056661],
       [-0.25877294, -0.87302017, -0.82438076,  0.84493047]], dtype=float32)

In [37]:
model(x).data

array([[ 0.1316663 , -0.21336028],
       [-0.39569858,  0.20802499]], dtype=float32)

In [38]:
loss = F.absolute(F.sum(model(x)))
loss.data

array(0.2693675756454468, dtype=float32)

In [39]:
model.cleargrads()
loss.backward()

In [40]:
optimizer.update()

In [41]:
loss.data

array(0.2693675756454468, dtype=float32)

In [42]:
loss = F.sum(model(x))

In [43]:
loss.data

array(-0.1590743064880371, dtype=float32)

In [44]:
for i in range(100):
    loss = F.absolute(F.sum(model(x)))
    print(loss.data)
    model.cleargrads()
    loss.backward()
    optimizer.update()

0.159074306488
0.049871891737
0.0583672225475
0.0494199991226
0.0587580651045
0.0489689707756
0.0591480880976
0.0485190153122
0.0595372915268
0.0480700135231
0.059925660491
0.047621935606
0.0603130757809
0.0471749305725
0.0606997013092
0.0467287898064
0.0610855072737
0.0462836623192
0.0614704936743
0.0458394885063
0.0618546158075
0.0453963577747
0.0622379332781
0.0449541509151
0.0626202821732
0.0445129275322
0.0630019158125
0.0440725982189
0.0633827447891
0.0436332523823
0.0637626945972
0.0431948304176
0.0641417652369
0.0427574515343
0.064520150423
0.0423209071159
0.0648976117373
0.0418854653835
0.0652742683887
0.0414508283138
0.0656501352787
0.0410171151161
0.0660251379013
0.0405844449997
0.0663993358612
0.0401527285576
0.0667727291584
0.0397218167782
0.0671453177929
0.0392918884754
0.067517131567
0.0388630628586
0.0678880810738
0.0384350717068
0.0682581961155
0.03800791502
0.0686275810003
0.0375817716122
0.0689961314201
0.0371564626694
0.0693639367819
0.0367322266102
0.0697308182716


In [45]:
model.l1.W.grad

array([[ -1.20526142e-01,   2.64957339e-01,   1.65832356e-01,
          2.49910448e-02],
       [  2.65920837e-03,  -6.15512393e-03,  -3.85650201e-03,
         -2.81093497e-04],
       [ -2.28617266e-01,   5.03838122e-01,   3.14931691e-01,
          4.69525494e-02]], dtype=float32)

# Dataset

In [46]:
# Load the MNIST dataset
train, test = chainer.datasets.get_mnist()

In [47]:
type(train)

chainer.datasets.tuple_dataset.TupleDataset

In [48]:
len(train)

60000

In [49]:
len(test)

10000

In [50]:
type(train[0][0])

numpy.ndarray

In [51]:
len(train[0][0])

784

In [52]:
train[0][1]

5

# SerialIterator

In [72]:
train_iter = chainer.iterators.SerialIterator(train, 6000)
test_iter = chainer.iterators.SerialIterator(test, 6000, repeat=False, shuffle=False)

In [73]:
test_iter.is_new_epoch

False

In [74]:
batch = test_iter.next()

In [76]:
len(batch)

6000

In [77]:
batch = test_iter.next()

In [78]:
len(batch)

4000

In [79]:
test_iter.is_new_epoch

True

In [63]:
for it in train_iter:
    print(train_iter.current_position)

10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
0


KeyboardInterrupt: 