# 第3章 Chainerの使い方

---

# 3.2 基本オブジェクト

## 3.2.1 Variable

- 変数に入る実際のデータは配列
- Variableの変数の演算結果もVariableの変数になる
- dataという属性で参照できる

In [62]:
import numpy as np
import chainer
from chainer import Variable, optimizers, serializers, utils
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
import chainer.computational_graph as c
from chainer.functions.loss.mean_squared_error import mean_squared_error


x1 = Variable(np.array([1], dtype=np.float32))
x2 = Variable(np.array([2], dtype=np.float32))
x3 = Variable(np.array([3], dtype=np.float32))

z = (x1-2*x2-1)**2+(x2*x3-1)**2+1
z.data

array([ 42.], dtype=float32)

<br>
- 順方向にいったん計算したことになるので、微分値を得るためには、逆向きの計算を行う

In [63]:
z.backward()
x1.grad

array([-8.], dtype=float32)

In [64]:
x2.grad

array([ 46.], dtype=float32)

In [65]:
x3.grad

array([ 20.], dtype=float32)

#### 微分の式
$$
\frac{\partial z}{\partial x_1}=2(x_1-2x_2-1)\\
\frac{\partial z}{\partial x_2}=-4(x_1-2x_2-1)+2x_3(x_2x_3-1)\\
\frac{\partial z}{\partial x_3}=2x_3(x_2x_3-1)\\
$$
<div style="text-align: center;">
$(x_1,x_2,x_3)=(1,2,3)$を代入すれば<br>　
</div>
$$
\frac{\partial z}{\partial x_1}=2\times(1-2\times2-1)=-8\\
\frac{\partial z}{\partial x_2}=-4\times(1-2\times2-1)+2\times3(2\times3-1)=46\\
\frac{\partial z}{\partial x_3}=2\times3\times(2\times3-1)=20\\
$$

## 3.2.2 Function

In [66]:
x = Variable(np.array([-1], dtype=np.float32))
F.sin(x).data #sin関数

array([-0.84147096], dtype=float32)

In [67]:
F.sigmoid(x).data #sigmoid関数

array([ 0.2689414], dtype=float32)

<br>
- $(\,\cos (x)\,)’ = -\sin(x)$ を確かめてみる

In [68]:
x = Variable(np.array([-0.5], dtype=np.float32))
z = F.cos(x)
z.data

array([ 0.87758255], dtype=float32)

In [69]:
z.backward()
x.grad

array([ 0.47942555], dtype=float32)

In [70]:
(-1)*F.sin(x).data # (cos(x))' = -sin(x)

array([ 0.47942555], dtype=float32)

<br>
- シグモイド関数についても$(\;f'(x)=(1-f(x))\;f(x)\;)$を確かめてみる

In [71]:
x = Variable(np.array([-0.5], dtype=np.float32))
z = F.sigmoid(x)
z.data

array([ 0.37754068], dtype=float32)

In [72]:
z.backward()
x.grad

array([ 0.23500371], dtype=float32)

In [73]:
((1-F.sigmoid(x))*F.sigmoid(x)).data # f'(x)=(1-f(x))*f(x)

array([ 0.23500371], dtype=float32)

<br>
- 変数が多次元である場合は関数の傾きの次元をあらかじめ教えておく必要がある

In [74]:
x = Variable(np.array([-1, 0, 1], dtype=np.float32))
z = F.sin(x)
z.grad = np.ones(3, dtype=np.float32)
z.backward()
x.grad

array([ 0.54030228,  1.        ,  0.54030228], dtype=float32)

<br>
## 3.2.3 links

In [75]:
h = L.Linear(3,4)

- パラメータはWとb
- 最初にWには適当な値がbには0が入っている

In [76]:
h.W.data

array([[-0.51988679, -0.8054769 ,  0.55033976],
       [ 1.07935274, -0.11930526,  0.39620265],
       [ 0.95457995,  0.02897514, -0.27164912],
       [ 0.90051645,  0.24939117, -1.59304452]], dtype=float32)

In [77]:
h.b.data

array([ 0.,  0.,  0.,  0.], dtype=float32)

<br>
- 入力はバッチ（データの集合）
- 下の例は2つの3次元のベクトルを作って$\;h\;$に与えている

In [78]:
x = Variable(np.array(range(6)).astype(np.float32).reshape(2,3))
x.data

array([[ 0.,  1.,  2.],
       [ 3.,  4.,  5.]], dtype=float32)

In [79]:
y = h(x)
y.data

array([[ 0.29520261,  0.67310005, -0.51432312, -2.93669796],
       [-2.02986932,  4.74185038,  1.62139487, -4.26610851]], dtype=float32)

正しく計算できているのかの確認

In [80]:
w = h.W.data
x0 = x.data
x0.dot(w.T) + h.b.data

array([[ 0.29520261,  0.67310005, -0.51432312, -2.93669796],
       [-2.02986932,  4.74185038,  1.62139487, -4.26610851]], dtype=float32)

---

# 3.3 Chainクラス

```python
class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__(
            l1=L.Linear(4, 3),
            l2=L.Linear(3, 3)
        )

    def __call__(self, x, y):
        fv = self.fwd(x, y)
        loss = F.mean_squared_error(fv, y)
        return loss

    def fwd(self, x, y):
        return F.sigmoid(self.l1(x))

```

---

# 3.4 optimizers

```python
model = MyChain()
optimizer = optimizers.SGD()
optimizer.setup(model)

model.zerograds()
loss = model(x, y)
loss.backward()
optimizer.update()
```

---

# +α　AND・OR・XORの論理演算を学習させてみる

### 参考
[Chainerに入門、And/Or/Xorの実装 ](http://qiita.com/daisukelab/items/6ad3242eeba140023191)

[chainerでニューラルネットを学んでみるよ(chainerでニューラルネット2)](http://hi-king.hatenablog.com/entry/2015/06/27/194630)

## 層が1つの場合

In [81]:
# And/Or/Xor classifier network example
#
# This is re-written version of:
#   http://hi-king.hatenablog.com/entry/2015/06/27/194630
# By following chainer introduction:
#   http://docs.chainer.org/en/stable/tutorial/basic.html

## Chainer cliche
import numpy as np
import chainer
from chainer import Function, Variable, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L

# Neural Network

## Network definition
class NN2x2x1dim(Chain):
    def __init__(self):
        super(NN2x2x1dim, self).__init__(
            l = L.Linear(2, 2),
        )
    def __call__(self, x):
        h = self.l(x)
        return h

# Sub routine

## Utility: Summarize current results
def summarize(model, optimizer, inputs, outputs):
    sum_loss, sum_accuracy = 0, 0
    print('model says:')
    for i in range(len(inputs)):
        x  = Variable(inputs[i].reshape(1,2).astype(np.float32))
        t  = Variable(outputs[i].astype(np.int32))
        y = model.predictor(x)
        loss = model(x, t)
        sum_loss += loss.data
        sum_accuracy += model.accuracy.data
        print('  %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], np.argmax(y.data), y.data[0,0], y.data[0,1]))
    #mean_loss = sum_loss / len(inputs)
    #mean_accuracy = sum_accuracy / len(inputs)
    #print sum_loss, sum_accuracy, mean_loss, mean_accuracy

## Runs learning loop
def learning_looper(model, optimizer, inputs, outputs, epoch_size):
    augment_size = 100
    for epoch in range(epoch_size):
        print('epoch %d' % epoch)
        for a in range(augment_size):
            for i in range(len(inputs)):
                x = Variable(inputs[i].reshape(1,2).astype(np.float32))
                t = Variable(outputs[i].astype(np.int32))
                optimizer.update(model, x, t)
        summarize(model, optimizer, inputs, outputs)

# Main
## Test data
inputs = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]], dtype=np.float32)
and_outputs = np.array([[0], [0], [0], [1]], dtype=np.int32)
or_outputs = np.array([[0], [1], [1], [1]], dtype=np.int32)
xor_outputs = np.array([[0], [1], [1], [0]], dtype=np.int32)

## AND Test --> will learn successfully
## Model & Optimizer instance
and_model = L.Classifier(NN2x2x1dim())
optimizer = optimizers.SGD()
# quicker) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(and_model)
print('<<AND: Before learning>>')
summarize(and_model, optimizer, inputs, and_outputs)
print('\n<<AND: After Learning>>')
learning_looper(and_model, optimizer, inputs, and_outputs, epoch_size = 5)

## OR Test --> will learn successfully
## Model & Optimizer instance
or_model = L.Classifier(NN2x2x1dim())
optimizer = optimizers.SGD()
optimizer.setup(or_model)
print('\n---------\n\n<<OR: Before learning>>')
summarize(or_model, optimizer, inputs, or_outputs)
print('\n<<OR: After Learning>>')
learning_looper(or_model, optimizer, inputs, or_outputs, epoch_size = 5)

## XOR Test --> will FAIL, single link is not enough for XOR
## Model & Optimizer instance
xor_model = L.Classifier(NN2x2x1dim())
optimizer = optimizers.SGD()
optimizer.setup(xor_model)
print('\n---------\n\n<<XOR: Before learning>>')
summarize(xor_model, optimizer, inputs, xor_outputs)
print('\n<<XOR: After Learning>>')
learning_looper(xor_model, optimizer, inputs, xor_outputs, epoch_size = 20)


<<AND: Before learning>>
model says:
  0 & 0 = 0 (zero:0.000000 one:0.000000)
  0 & 1 = 0 (zero:0.524215 one:-0.120258)
  1 & 0 = 0 (zero:-0.268319 one:-0.892265)
  1 & 1 = 0 (zero:0.255896 one:-1.012523)

<<AND: After Learning>>
epoch 0
model says:
  0 & 0 = 0 (zero:0.381592 one:-0.381592)
  0 & 1 = 0 (zero:0.536354 one:-0.132397)
  1 & 0 = 0 (zero:-0.253692 one:-0.906892)
  1 & 1 = 0 (zero:-0.098930 one:-0.657697)
epoch 1
model says:
  0 & 0 = 0 (zero:0.699044 one:-0.699044)
  0 & 1 = 0 (zero:0.589686 one:-0.185729)
  1 & 0 = 0 (zero:-0.198748 one:-0.961835)
  1 & 1 = 0 (zero:-0.308107 one:-0.448520)
epoch 2
model says:
  0 & 0 = 0 (zero:0.960165 one:-0.960165)
  0 & 1 = 0 (zero:0.648383 one:-0.244426)
  1 & 0 = 0 (zero:-0.138965 one:-1.021619)
  1 & 1 = 1 (zero:-0.450747 one:-0.305880)
epoch 3
model says:
  0 & 0 = 0 (zero:1.180916 one:-1.180916)
  0 & 1 = 0 (zero:0.705063 one:-0.301106)
  1 & 0 = 0 (zero:-0.081522 one:-1.079061)
  1 & 1 = 1 (zero:-0.557375 one:-0.199251)
epoch 4
mo

## 層が2つの場合

In [82]:
# Chainer training: And/Or/Xor classifier network example with 2 links.
#
# This is re-written version of:
#   http://hi-king.hatenablog.com/entry/2015/06/27/194630
# By following chainer introduction:
#   http://docs.chainer.org/en/stable/tutorial/basic.html

## Chainer cliche
import numpy as np
import chainer
from chainer import Function, Variable, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F

import chainer.links as L

# Neural Network

## Network definition
class NN2x2_2links(Chain):
    def __init__(self):
        super(NN2x2_2links, self).__init__(
            l1 = L.Linear(2, 2),
            l2 = L.Linear(2, 2),
        )
    def __call__(self, x):
        h = self.l2(F.sigmoid(self.l1(x)))
        return h

# Sub routine

## Utility: Summarize current results
def summarize(model, optimizer, inputs, outputs):
    sum_loss, sum_accuracy = 0, 0
    print('model says:')
    for i in range(len(inputs)):
        x  = Variable(inputs[i].reshape(1,2).astype(np.float32))
        t  = Variable(outputs[i].astype(np.int32))
        y = model.predictor(x)
        loss = model(x, t)
        sum_loss += loss.data
        sum_accuracy += model.accuracy.data
        print('  %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], np.argmax(y.data), y.data[0,0], y.data[0,1]))
    #mean_loss = sum_loss / len(inputs)
    #mean_accuracy = sum_accuracy / len(inputs)
    #print sum_loss, sum_accuracy, mean_loss, mean_accuracy

## Runs learning loop
def learning_looper(model, optimizer, inputs, outputs, epoch_size):
    augment_size = 100
    for epoch in range(epoch_size):
        print('epoch %d' % epoch)
        for a in range(augment_size):
            for i in range(len(inputs)):
                x = Variable(inputs[i].reshape(1,2).astype(np.float32))
                t = Variable(outputs[i].astype(np.int32))
                optimizer.update(model, x, t)
        summarize(model, optimizer, inputs, outputs)
        
## Runs XOR_learning loop
def XOR_learning_looper(model, optimizer, inputs, outputs, epoch_size):
    augment_size = 100
    for epoch in range(epoch_size):
        if epoch%10==0:
            print('epoch %d' % epoch)
        for a in range(augment_size):
            for i in range(len(inputs)):
                x = Variable(inputs[i].reshape(1,2).astype(np.float32))
                t = Variable(outputs[i].astype(np.int32))
                optimizer.update(model, x, t)
        if epoch%10==0:
            summarize(model, optimizer, inputs, outputs)

# Main

## Test data
inputs = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]], dtype=np.float32)
and_outputs = np.array([[0], [0], [0], [1]], dtype=np.int32)
or_outputs = np.array([[0], [1], [1], [1]], dtype=np.int32)
xor_outputs = np.array([[0], [1], [1], [0]], dtype=np.int32)

## AND Test --> will learn successfully
and_model = L.Classifier(NN2x2_2links())
optimizer = optimizers.SGD()
# do it quicker) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(and_model)
print('<<AND: Before learning>>')
summarize(and_model, optimizer, inputs, and_outputs)
print('\n<<AND: After Learning>>')
learning_looper(and_model, optimizer, inputs, and_outputs, epoch_size = 21)

## OR Test --> will learn successfully
or_model = L.Classifier(NN2x2_2links())
optimizer = optimizers.SGD()
optimizer.setup(or_model)
print('\n---------\n\n<<OR: Before learning>>')
summarize(or_model, optimizer, inputs, or_outputs)
print('\n<<OR: After Learning>>')
learning_looper(or_model, optimizer, inputs, or_outputs, epoch_size = 21)

## XOR Test --> will learn successfully
xor_model = L.Classifier(NN2x2_2links())
#optimizer = optimizers.SGD()
optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(xor_model)
print('\n---------\n\n<<XOR: Before learning>>')
summarize(xor_model, optimizer, inputs, xor_outputs)
print('\n<<XOR: After Learning>>')
XOR_learning_looper(xor_model, optimizer, inputs, xor_outputs, epoch_size = 251)


<<AND: Before learning>>
model says:
  0 & 0 = 1 (zero:-0.179573 one:0.387857)
  0 & 1 = 0 (zero:0.067836 one:0.012461)
  1 & 0 = 1 (zero:-0.168325 one:0.311524)
  1 & 1 = 1 (zero:0.022426 one:0.023212)

<<AND: After Learning>>
epoch 0
model says:
  0 & 0 = 0 (zero:0.537365 one:-0.350361)
  0 & 1 = 0 (zero:0.719311 one:-0.649290)
  1 & 0 = 0 (zero:0.416817 one:-0.296986)
  1 & 1 = 0 (zero:0.551563 one:-0.514293)
epoch 1
model says:
  0 & 0 = 0 (zero:0.661491 one:-0.481877)
  0 & 1 = 0 (zero:0.793295 one:-0.726322)
  1 & 0 = 0 (zero:0.476812 one:-0.364197)
  1 & 1 = 0 (zero:0.566894 one:-0.532505)
epoch 2
model says:
  0 & 0 = 0 (zero:0.725207 one:-0.549399)
  0 & 1 = 0 (zero:0.812829 one:-0.747653)
  1 & 0 = 0 (zero:0.477082 one:-0.368259)
  1 & 1 = 0 (zero:0.528156 one:-0.495635)
epoch 3
model says:
  0 & 0 = 0 (zero:0.784492 one:-0.609967)
  0 & 1 = 0 (zero:0.829188 one:-0.764965)
  1 & 0 = 0 (zero:0.467142 one:-0.359827)
  1 & 1 = 0 (zero:0.482500 one:-0.451108)
epoch 4
model says:


model says:
  0 & 0 = 0 (zero:0.857957 one:-1.138844)
  0 & 1 = 1 (zero:-1.964046 one:1.378404)
  1 & 0 = 1 (zero:-1.712421 one:1.043203)
  1 & 1 = 1 (zero:-2.685595 one:2.007698)

---------

<<XOR: Before learning>>
model says:
  0 & 0 = 0 (zero:0.467032 one:-0.762461)
  0 & 1 = 0 (zero:0.557843 one:-0.932123)
  1 & 0 = 0 (zero:0.457739 one:-0.904331)
  1 & 1 = 0 (zero:0.552320 one:-1.057605)

<<XOR: After Learning>>
epoch 0
model says:
  0 & 0 = 1 (zero:-0.171513 one:-0.133042)
  0 & 1 = 0 (zero:-0.187776 one:-0.214917)
  1 & 0 = 1 (zero:-0.259501 one:-0.203865)
  1 & 1 = 0 (zero:-0.264028 one:-0.277891)
epoch 10
model says:
  0 & 0 = 0 (zero:1.396721 one:-1.509013)
  0 & 1 = 1 (zero:-1.922323 one:1.037175)
  1 & 0 = 1 (zero:-1.922133 one:1.036954)
  1 & 1 = 0 (zero:1.037982 one:-1.705206)
epoch 20
model says:
  0 & 0 = 0 (zero:2.145499 one:-2.217210)
  0 & 1 = 1 (zero:-2.752809 one:1.834697)
  1 & 0 = 1 (zero:-2.752818 one:1.834687)
  1 & 1 = 0 (zero:1.916067 one:-2.560771)
epoch 30

## こっちでもできます

In [83]:
#!/usr/bin/env python

# 参考元 : http://hi-king.hatenablog.com/entry/2015/06/27/194630

import random
import argparse
import numpy
import chainer
import chainer.optimizers


class SmallClassificationModel(chainer.Chain):
    def __init__(self):
        super(SmallClassificationModel, self).__init__(
            l1 = chainer.links.Linear(2, 2)
            )
    def _forward(self, x):
        h = self.l1(x)
        return h

    def train(self, x_data, y_data):
        x = chainer.Variable(x_data.reshape(1,2).astype(numpy.float32))
        y = chainer.Variable(y_data.astype(numpy.int32))
        h = self._forward(x)

        self.zerograds()
        error = chainer.functions.softmax_cross_entropy(h, y)
        accuracy = chainer.functions.accuracy(h, y)
        error.backward()
        optimizer.update()
        if epoch%100==0:
            print(' %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], h.data.argmax(), h.data[0,0], h.data[0,1]))
        #print("error: {}".format(error.data[0]))
        #print("accuracy: {}".format(accuracy.data))

class ClassificationModel(chainer.Chain):
    def __init__(self):
        super(ClassificationModel, self).__init__(
            l1 = chainer.links.Linear(2, 2),
            l2 = chainer.links.Linear(2, 2)
            )
    def _forward(self, x):
        h = self.l2(chainer.functions.sigmoid(self.l1(x)))
        return h

    def train(self, x_data, y_data, epoch):
        x = chainer.Variable(x_data.reshape(1,2).astype(numpy.float32))
        y = chainer.Variable(y_data.astype(numpy.int32))
        h = self._forward(x)

        self.zerograds()
        error = chainer.functions.softmax_cross_entropy(h, y)
        accuracy = chainer.functions.accuracy(h, y)
        error.backward()
        optimizer.update()
        if epoch%100==0:
            print(' %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], h.data.argmax(), h.data[0,0], h.data[0,1]))

class RegressionModel(chainer.Chain):
    def __init__(self):
        super(RegressionModel, self).__init__(
            l1 = chainer.links.Linear(2, 2),
            l2 = chainer.links.Linear(2, 1)
            )

    def _forward(self, x):
        h = self.l2(chainer.functions.sigmoid(self.l1(x)))
        return h

    def train(self, x_data, y_data, epoch):
        x = chainer.Variable(x_data.reshape(1,2).astype(numpy.float32))
        y = chainer.Variable(y_data.reshape(1,1).astype(numpy.float32))
        h = self._forward(x)
        self.zerograds()
        error = chainer.functions.mean_squared_error(h, y)
        error.backward()
        optimizer.update()
        if epoch%100==0:
                print('x: {}  h: {})'.format(x.data, h.data))


#model = SmallClassificationModel()     # 層が1つの場合
model = ClassificationModel()           # 層が2つの場合
#model = RegressionModel()              # 重回帰でやった場合

optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(model)

data_xor = [
    [numpy.array([0,0]), numpy.array([0])],
    [numpy.array([0,1]), numpy.array([1])],
    [numpy.array([1,0]), numpy.array([1])],
    [numpy.array([1,1]), numpy.array([0])],
]

data_and = [
    [numpy.array([0,0]), numpy.array([0])],
    [numpy.array([0,1]), numpy.array([0])],
    [numpy.array([1,0]), numpy.array([0])],
    [numpy.array([1,1]), numpy.array([1])],
]

data_or = [
    [numpy.array([0,0]), numpy.array([0])],
    [numpy.array([0,1]), numpy.array([1])],
    [numpy.array([1,0]), numpy.array([1])],
    [numpy.array([1,1]), numpy.array([1])],
]

for epoch in range(1001):
    if epoch%100==0:
        print("epoch: %d" %epoch)
    for invec, outvec in data_xor:
        model.train(invec, outvec, epoch)


epoch: 0
 0 & 0 = 0 (zero:0.556705 one:-0.042976)
 0 & 1 = 0 (zero:0.666973 one:-0.017424)
 1 & 0 = 0 (zero:0.439101 one:0.003760)
 1 & 1 = 0 (zero:0.529542 one:0.050103)
epoch: 100
 0 & 0 = 0 (zero:0.268885 one:0.245163)
 0 & 1 = 0 (zero:0.362178 one:0.293970)
 1 & 0 = 1 (zero:0.193413 one:0.251139)
 1 & 1 = 1 (zero:0.271345 one:0.315815)
epoch: 200
 0 & 0 = 0 (zero:0.269798 one:0.247691)
 0 & 1 = 0 (zero:0.361424 one:0.305817)
 1 & 0 = 1 (zero:0.203206 one:0.247351)
 1 & 1 = 1 (zero:0.279754 one:0.320722)
epoch: 300
 0 & 0 = 0 (zero:0.271520 one:0.249168)
 0 & 1 = 0 (zero:0.362605 one:0.315441)
 1 & 0 = 1 (zero:0.210518 one:0.245588)
 1 & 1 = 1 (zero:0.286647 one:0.326352)
epoch: 400
 0 & 0 = 0 (zero:0.274231 one:0.249548)
 0 & 1 = 0 (zero:0.365175 one:0.323945)
 1 & 0 = 1 (zero:0.216460 one:0.245223)
 1 & 1 = 1 (zero:0.292468 one:0.333037)
epoch: 500
 0 & 0 = 0 (zero:0.278565 one:0.248413)
 0 & 1 = 0 (zero:0.369048 one:0.332204)
 1 & 0 = 1 (zero:0.221778 one:0.246102)
 1 & 1 = 1 (ze