# 第3章 Chainerの使い方

---

# 3.2 基本オブジェクト

## 3.2.1 Variable

- 変数に入る実際のデータは配列
- Variableの変数の演算結果もVariableの変数になる
- dataという属性で参照できる

In [2]:
import numpy as np
import chainer
from chainer import Variable, optimizers, serializers, utils
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
import chainer.computational_graph as c
from chainer.functions.loss.mean_squared_error import mean_squared_error


x1 = Variable(np.array([1], dtype=np.float32))
x2 = Variable(np.array([2], dtype=np.float32))
x3 = Variable(np.array([3], dtype=np.float32))

z = (x1-2*x2-1)**2+(x2*x3-1)**2+1
z.data

array([ 42.], dtype=float32)

<br>
- 順方向にいったん計算したことになるので、微分値を得るためには、逆向きの計算を行う

In [4]:
z.backward()
x1.grad

In [5]:
x2.grad

In [4]:
x3.grad

array([ 20.], dtype=float32)

#### 微分の式
$$
\frac{\partial z}{\partial x_1}=2(x_1-2x_2-1)\\
\frac{\partial z}{\partial x_2}=-4(x_1-2x_2-1)+2x_3(x_2x_3-1)\\
\frac{\partial z}{\partial x_3}=2x_3(x_2x_3-1)\\
$$
<div style="text-align: center;">
$(x_1,x_2,x_3)=(1,2,3)$を代入すれば<br>　
</div>
$$
\frac{\partial z}{\partial x_1}=2\times(1-2\times2-1)=-8\\
\frac{\partial z}{\partial x_2}=-4\times(1-2\times2-1)+2\times3(2\times3-1)=46\\
\frac{\partial z}{\partial x_3}=2\times3\times(2\times3-1)=20\\
$$

## 3.2.2 Function

In [5]:
x = Variable(np.array([-1], dtype=np.float32))
F.sin(x).data #sin関数

array([-0.84147096], dtype=float32)

In [6]:
F.sigmoid(x).data #sigmoid関数

array([ 0.2689414], dtype=float32)

<br>
- $(\,\cos (x)\,)’ = -\sin(x)$ を確かめてみる

In [7]:
x = Variable(np.array([-0.5], dtype=np.float32))
z = F.cos(x)
z.data

array([ 0.87758255], dtype=float32)

In [8]:
z.backward()
x.grad

array([ 0.47942555], dtype=float32)

In [9]:
(-1)*F.sin(x).data # (cos(x))' = -sin(x)

array([ 0.47942555], dtype=float32)

<br>
- シグモイド関数についても$(\;f'(x)=(1-f(x))\;f(x)\;)$を確かめてみる

In [10]:
x = Variable(np.array([-0.5], dtype=np.float32))
z = F.sigmoid(x)
z.data

array([ 0.37754068], dtype=float32)

In [11]:
z.backward()
x.grad

array([ 0.23500371], dtype=float32)

In [12]:
((1-F.sigmoid(x))*F.sigmoid(x)).data # f'(x)=(1-f(x))*f(x)

array([ 0.23500371], dtype=float32)

<br>
- 変数が多次元である場合は関数の傾きの次元をあらかじめ教えておく必要がある

In [13]:
x = Variable(np.array([-1, 0, 1], dtype=np.float32))
z = F.sin(x)
z.grad = np.ones(3, dtype=np.float32)
z.backward()
x.grad

array([ 0.54030228,  1.        ,  0.54030228], dtype=float32)

<br>
## 3.2.3 links

In [14]:
h = L.Linear(3,4)

- パラメータは$W$と$b$
- 最初に$W$には適当な値が$b$には0が入っている

In [15]:
h.W.data

array([[-0.73528326, -0.42754531, -0.79784918],
       [-0.3103115 ,  0.74695861, -0.09231593],
       [ 0.22093138, -0.93409973,  0.30161089],
       [ 0.65773547, -0.11706574, -0.79936308]], dtype=float32)

In [16]:
h.b.data

array([ 0.,  0.,  0.,  0.], dtype=float32)

<br>
- 入力はバッチ（データの集合）
- 下の例は2つの3次元のベクトルを作って$\;h\;$に与えている

In [17]:
x = Variable(np.array(range(6)).astype(np.float32).reshape(2,3))
x.data

array([[ 0.,  1.,  2.],
       [ 3.,  4.,  5.]], dtype=float32)

In [18]:
y = h(x)
y.data

array([[-2.02324367,  0.56232679, -0.33087796, -1.71579194],
       [-7.90527678,  1.59532034, -1.56555033, -2.49187183]], dtype=float32)

正しく計算できているのかの確認

In [19]:
w = h.W.data
x0 = x.data
x0.dot(w.T) + h.b.data

array([[-2.02324367,  0.56232679, -0.33087796, -1.71579194],
       [-7.90527678,  1.59532034, -1.56555033, -2.49187183]], dtype=float32)

---

# 3.3 Chainクラス

```python
class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__(
            l1=L.Linear(4, 3),
            l2=L.Linear(3, 3)
        )

    def __call__(self, x, y):
        fv = self.fwd(x, y)
        loss = F.mean_squared_error(fv, y)
        return loss

    def fwd(self, x, y):
        return F.sigmoid(self.l1(x))

```

---

# 3.4 optimizers

```python
model = MyChain() # モデルの生成
optimizer = optimizers.SGD() # 最適化アルゴリズムの選択
optimizer.setup(model) # アルゴリズムにモデルをセット

model.zerograds() # 勾配の初期化
loss = model(x, y) # 順方向に計算して誤差を算出
loss.backward() # 逆方向の計算、勾配の計算
optimizer.update() # パラメータの更新
```

---

# +α　AND・OR・XORの論理演算を学習させてみる

### 参考
[Chainerに入門、And/Or/Xorの実装 ](http://qiita.com/daisukelab/items/6ad3242eeba140023191)

[chainerでニューラルネットを学んでみるよ(chainerでニューラルネット2)](http://hi-king.hatenablog.com/entry/2015/06/27/194630)

## 層が1つの場合

In [8]:
# And/Or/Xor classifier network example
#
# This is re-written version of:
#   http://hi-king.hatenablog.com/entry/2015/06/27/194630
# By following chainer introduction:
#   http://docs.chainer.org/en/stable/tutorial/basic.html

## Chainer cliche
import numpy as np
import chainer
from chainer import Function, Variable, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L

# Neural Network

## Network definition
class NN2x2x1dim(Chain):
    def __init__(self):
        super(NN2x2x1dim, self).__init__(
            l = L.Linear(2, 2),
        )
    def __call__(self, x):
        h = self.l(x)
        return h

# Sub routine

## Utility: Summarize current results
def summarize(model, optimizer, inputs, outputs):
    sum_loss, sum_accuracy = 0, 0
    print('model says:')
    for i in range(len(inputs)):
        x  = Variable(inputs[i].reshape(1,2).astype(np.float32))
        t  = Variable(outputs[i].astype(np.int32))
        y = model.predictor(x)
        loss = model(x, t)
        sum_loss += loss.data
        sum_accuracy += model.accuracy.data
        print('  %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], np.argmax(y.data), y.data[0,0], y.data[0,1]))
    #mean_loss = sum_loss / len(inputs)
    #mean_accuracy = sum_accuracy / len(inputs)
    #print sum_loss, sum_accuracy, mean_loss, mean_accuracy

## Runs learning loop
def learning_looper(model, optimizer, inputs, outputs, epoch_size):
    augment_size = 100
    for epoch in range(epoch_size):
        print('epoch %d' % epoch)
        for a in range(augment_size):
            for i in range(len(inputs)):
                x = Variable(inputs[i].reshape(1,2).astype(np.float32))
                t = Variable(outputs[i].astype(np.int32))
                optimizer.update(model, x, t)
        summarize(model, optimizer, inputs, outputs)

# Main
## Test data
inputs = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]], dtype=np.float32)
and_outputs = np.array([[0], [0], [0], [1]], dtype=np.int32)
or_outputs = np.array([[0], [1], [1], [1]], dtype=np.int32)
xor_outputs = np.array([[0], [1], [1], [0]], dtype=np.int32)

## AND Test --> will learn successfully
## Model & Optimizer instance
and_model = L.Classifier(NN2x2x1dim())
optimizer = optimizers.SGD()
# quicker) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(and_model)
print('<<AND: Before learning>>')
summarize(and_model, optimizer, inputs, and_outputs)
print('\n<<AND: After Learning>>')
learning_looper(and_model, optimizer, inputs, and_outputs, epoch_size = 5)

## OR Test --> will learn successfully
## Model & Optimizer instance
or_model = L.Classifier(NN2x2x1dim())
optimizer = optimizers.SGD()
optimizer.setup(or_model)
print('\n---------\n\n<<OR: Before learning>>')
summarize(or_model, optimizer, inputs, or_outputs)
print('\n<<OR: After Learning>>')
learning_looper(or_model, optimizer, inputs, or_outputs, epoch_size = 5)

## XOR Test --> will FAIL, single link is not enough for XOR
## Model & Optimizer instance
xor_model = L.Classifier(NN2x2x1dim())
optimizer = optimizers.SGD()
optimizer.setup(xor_model)
print('\n---------\n\n<<XOR: Before learning>>')
summarize(xor_model, optimizer, inputs, xor_outputs)
print('\n<<XOR: After Learning>>')
learning_looper(xor_model, optimizer, inputs, xor_outputs, epoch_size = 20)


<<AND: Before learning>>
model says:


NameError: name 'y' is not defined

## 層が2つの場合

In [18]:
# Chainer training: And/Or/Xor classifier network example with 2 links.
#
# This is re-written version of:
#   http://hi-king.hatenablog.com/entry/2015/06/27/194630
# By following chainer introduction:
#   http://docs.chainer.org/en/stable/tutorial/basic.html

## Chainer cliche
import numpy as np
import chainer
from chainer import Function, Variable, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F

import chainer.links as L

# Neural Network

## Network definition
class NN2x2_2links(Chain):
    def __init__(self):
        super(NN2x2_2links, self).__init__(
            l1 = L.Linear(2, 2),
            l2 = L.Linear(2, 2),
        )
    def __call__(self, x, y):
        fv = self.forward(x,y)
        loss = F.mean_squared_error(fv,y)
        return loss
    
    def forward(self, x):
        return self.l2(F.sigmoid(self.l1(x)))
        

# Sub routine

## Utility: Summarize current results
"""
def summarize(model, optimizer, inputs, outputs):
    sum_loss, sum_accuracy = 0, 0
    print('model says:')
    for i in range(len(inputs)):
        x  = Variable(inputs[i].reshape(1,2).astype(np.float32))
        t  = Variable(outputs[i].astype(np.int32))
        y = model.predictor(x)
        #loss = model(x, t)
        #sum_loss += loss.data
        #sum_accuracy += model.accuracy.data
        print('  %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], np.argmax(y.data), y.data[0,0], y.data[0,1]))
    #mean_loss = sum_loss / len(inputs)
    #mean_accuracy = sum_accuracy / len(inputs)
    #print sum_loss, sum_accuracy, mean_loss, mean_accuracy
"""
## Runs learning loop
def learning_looper(model, optimizer, inputs, outputs, epoch_size):
    augment_size = 100
    for epoch in range(epoch_size):
        print('epoch %d' % epoch)
        for a in range(augment_size):
            for i in range(len(inputs)):
                x = Variable(inputs[i].reshape(1,2).astype(np.float32))
                t = Variable(outputs[i].astype(np.int32))
                #optimizer.update(model, x, t)                
                h = model.forward(x)
                model.zerograds()
                error = F.softmax_cross_entropy(h, t)
                accuracy = F.accuracy(h, t)
                error.backward()
                optimizer.update()
        #summarize(model, optimizer, inputs, outputs)
        print('  %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], np.argmax(h.data), h.data[0,0], h.data[0,1]))
        
"""
## Runs XOR_learning loop
def XOR_learning_looper(model, optimizer, inputs, outputs, epoch_size):
    augment_size = 100
    for epoch in range(epoch_size):
        if epoch%10==0:
            print('epoch %d' % epoch)
        for a in range(augment_size):
            for i in range(len(inputs)):
                x = Variable(inputs[i].reshape(1,2).astype(np.float32))
                t = Variable(outputs[i].astype(np.int32))
                optimizer.update(model, x, t)
        if epoch%10==0:
            summarize(model, optimizer, inputs, outputs)
"""        
# Main

## Test data
inputs = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]], dtype=np.float32)
and_outputs = np.array([[0], [0], [0], [1]], dtype=np.int32)
or_outputs = np.array([[0], [1], [1], [1]], dtype=np.int32)
xor_outputs = np.array([[0], [1], [1], [0]], dtype=np.int32)

## AND Test --> will learn successfully
#and_model = L.Classifier(NN2x2_2links())
and_model = NN2x2_2links()
optimizer = optimizers.SGD()
# do it quicker) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(and_model)
print('<<AND: Before learning>>')
#summarize(and_model, optimizer, inputs, and_outputs)
print('\n<<AND: After Learning>>')
learning_looper(and_model, optimizer, inputs, and_outputs, epoch_size = 21)

## OR Test --> will learn successfully
#or_model = L.Classifier(NN2x2_2links())
or_model = NN2x2_2links()
optimizer = optimizers.SGD()
optimizer.setup(or_model)
print('\n---------\n\n<<OR: Before learning>>')
#summarize(or_model, optimizer, inputs, or_outputs)
print('\n<<OR: After Learning>>')
learning_looper(or_model, optimizer, inputs, or_outputs, epoch_size = 21)

## XOR Test --> will learn successfully
xor_model = L.Classifier(NN2x2_2links())
#optimizer = optimizers.SGD()
optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(xor_model)
print('\n---------\n\n<<XOR: Before learning>>')
#summarize(xor_model, optimizer, inputs, xor_outputs)
print('\n<<XOR: After Learning>>')
#XOR_learning_looper(xor_model, optimizer, inputs, xor_outputs, epoch_size = 251)


<<AND: Before learning>>

<<AND: After Learning>>
epoch 0


NameError: name 'y' is not defined

## こっちでもできます

In [1]:
#!/usr/bin/env python

# 参考元 : http://hi-king.hatenablog.com/entry/2015/06/27/194630

import random
import argparse
import numpy
import chainer
import chainer.optimizers


class SmallClassificationModel(chainer.Chain):
    def __init__(self):
        super(SmallClassificationModel, self).__init__(
            l1 = chainer.links.Linear(2, 2)
            )
    def _forward(self, x):
        h = self.l1(x)
        return h

    def train(self, x_data, y_data):
        x = chainer.Variable(x_data.reshape(1,2).astype(numpy.float32))
        y = chainer.Variable(y_data.astype(numpy.int32))
        h = self._forward(x)

        self.zerograds()
        error = chainer.functions.softmax_cross_entropy(h, y)
        accuracy = chainer.functions.accuracy(h, y)
        error.backward()
        optimizer.update()
        if epoch%100==0:
            print(' %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], h.data.argmax(), h.data[0,0], h.data[0,1]))
        #print("error: {}".format(error.data[0]))
        #print("accuracy: {}".format(accuracy.data))

class ClassificationModel(chainer.Chain):
    def __init__(self):
        super(ClassificationModel, self).__init__(
            l1 = chainer.links.Linear(2, 2),
            l2 = chainer.links.Linear(2, 2)
            )
    def _forward(self, x):
        h = self.l2(chainer.functions.sigmoid(self.l1(x)))
        return h

    def train(self, x_data, y_data, epoch):
        x = chainer.Variable(x_data.reshape(1,2).astype(numpy.float32))
        y = chainer.Variable(y_data.astype(numpy.int32))
        h = self._forward(x)

        self.zerograds()
        error = chainer.functions.softmax_cross_entropy(h, y)
        accuracy = chainer.functions.accuracy(h, y)
        error.backward()
        optimizer.update()
        if epoch%100==0:
            print(' %d & %d = %d (zero:%f one:%f)' % (x.data[0,0], x.data[0,1], h.data.argmax(), h.data[0,0], h.data[0,1]))

class RegressionModel(chainer.Chain):
    def __init__(self):
        super(RegressionModel, self).__init__(
            l1 = chainer.links.Linear(2, 2),
            l2 = chainer.links.Linear(2, 1)
            )

    def _forward(self, x):
        h = self.l2(chainer.functions.sigmoid(self.l1(x)))
        return h

    def train(self, x_data, y_data, epoch):
        x = chainer.Variable(x_data.reshape(1,2).astype(numpy.float32))
        y = chainer.Variable(y_data.reshape(1,1).astype(numpy.float32))
        h = self._forward(x)
        self.zerograds()
        error = chainer.functions.mean_squared_error(h, y)
        error.backward()
        optimizer.update()
        if epoch%100==0:
                print('x: {}  h: {})'.format(x.data, h.data))


#model = SmallClassificationModel()     # 層が1つの場合
model = ClassificationModel()           # 層が2つの場合
#model = RegressionModel()              # 重回帰でやった場合

optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(model)

data_xor = [
    [numpy.array([0,0]), numpy.array([0])],
    [numpy.array([0,1]), numpy.array([1])],
    [numpy.array([1,0]), numpy.array([1])],
    [numpy.array([1,1]), numpy.array([0])],
]

data_and = [
    [numpy.array([0,0]), numpy.array([0])],
    [numpy.array([0,1]), numpy.array([0])],
    [numpy.array([1,0]), numpy.array([0])],
    [numpy.array([1,1]), numpy.array([1])],
]

data_or = [
    [numpy.array([0,0]), numpy.array([0])],
    [numpy.array([0,1]), numpy.array([1])],
    [numpy.array([1,0]), numpy.array([1])],
    [numpy.array([1,1]), numpy.array([1])],
]

for epoch in range(1001):
    if epoch%100==0:
        print("epoch: %d" %epoch)
    for invec, outvec in data_xor:
        model.train(invec, outvec, epoch)


epoch: 0
 0 & 0 = 0 (zero:0.878019 one:0.573771)
 0 & 1 = 0 (zero:0.718681 one:0.384091)
 1 & 0 = 0 (zero:0.658738 one:0.436243)
 1 & 1 = 0 (zero:0.514890 one:0.291071)
epoch: 100
 0 & 0 = 0 (zero:0.767173 one:0.674628)
 0 & 1 = 0 (zero:0.545713 one:0.543038)
 1 & 0 = 0 (zero:0.522334 one:0.517389)
 1 & 1 = 1 (zero:0.348787 one:0.435395)
epoch: 200
 0 & 0 = 0 (zero:0.823804 one:0.578874)
 0 & 1 = 1 (zero:0.498525 one:0.543165)
 1 & 0 = 1 (zero:0.433706 one:0.435473)
 1 & 1 = 1 (zero:0.271064 one:0.451696)
epoch: 300
 0 & 0 = 0 (zero:1.005237 one:0.392361)
 0 & 1 = 1 (zero:0.425922 one:0.555449)
 1 & 0 = 1 (zero:0.292928 one:0.404549)
 1 & 1 = 1 (zero:0.180805 one:0.512586)
epoch: 400
 0 & 0 = 0 (zero:1.377135 one:0.117970)
 0 & 1 = 1 (zero:0.335019 one:0.618482)
 1 & 0 = 1 (zero:0.164425 one:0.467239)
 1 & 1 = 1 (zero:0.105640 one:0.616617)
epoch: 500
 0 & 0 = 0 (zero:1.745432 one:-0.165976)
 0 & 1 = 1 (zero:0.287419 one:0.675842)
 1 & 0 = 1 (zero:0.078467 one:0.554091)
 1 & 1 = 1 (zer