# 第3回 演習課題

## 課題1．単純パーセプトロンの実装と学習

In [1]:
import numpy as np
from sklearn.utils import shuffle
np.random.seed(1234)

1.層をLayerクラスとして定義

In [2]:
class Layer:
    def __init__(self,in_dim,out_dim,function):
        self.W = np.zeros((in_dim,out_dim))
        self.b = np.zeros(out_dim)
        self.function = function

    #forward propagation
    def fprop(self,x):
        u = x.dot(self.W) + self.b
        z = self.function(u)
        return z

2.ステップ関数

ヒント：ステップ関数

* $u\geq0$のとき，$f(u)=+1$
* $u<0$のとき，$f(u)=-1$

In [3]:
def step(x):
    y = np.sign(x)
    y[y==0] = 1
    return y

4.データセットの設定とレイヤーインスタンス

In [4]:
#OR
train_X = np.array([[0,1],[1,0],[0,0],[1,1]])
train_y = np.array([[1],[1],[-1],[1]])
test_X,test_y = train_X,train_y

layer = Layer(2,1,step)

5.train関数とtest関数

ヒント：パーセプトロン学習則

$y_n\neq d_n$のとき
* $w^{(t+1)}=w^{(t)}+\epsilon x_nd_n$　
* $b^{(t+1)}=b^{(t)}+\epsilon d_n$

In [5]:
def train(x,d,eps=1):
    #forward propagation
    y = layer.fprop(x)

    #update parameters
    if y * d != 1:
        layer.W = layer.W + eps*d*x.T
        layer.b = layer.b + eps*d

def test(x):
    y = layer.fprop(x)
    return y

5.パラメータの更新

In [6]:
#epoch
for epoch in range(10):
    #online learning
    for x,y in zip(train_X,train_y):
        train(x[np.newaxis,:],y[np.newaxis,:],eps=1)
pred_y = test(test_X)
print(pred_y)

[[ 1.]
 [ 1.]
 [-1.]
 [ 1.]]


## 課題2．活性化関数とその微分の実装

1.シグモイド関数とその微分

In [2]:
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))
def diff_sigmoid(x):
    v = sigmoid(x)
    return v * (1.0 - v)

２.ソフトマックス関数とその微分

In [3]:
def softmax(x):
    v = np.exp(x)
    return v / np.sum(v, axis=1)[:, np.newaxis]
def diff_softmax(x):
    v = softmax(x)
    return v * (np.ones(x.shape) - v)

2.tanh関数とその微分

In [4]:
def tanh(x):
    return np.tanh(x)
def diff_tanh(x):
    v = tahnh(x)
    return 1 - v*v

## 課題3．多層パーセプトロンの実装と学習

1.Layerクラス

In [29]:
class Layer:
    def __init__(self,in_dim,out_dim,function,diff_function):
        #Xavier
        self.W = np.random.uniform(
                                    low=-np.sqrt(6./(in_dim+out_dim)), 
                                    high=np.sqrt(6./(in_dim+out_dim)), 
                                    size=(in_dim, out_dim))
        self.b = np.zeros(out_dim)
        self.function = function
        
        self.diff_function = diff_function
        self.u     = None
        self.delta = None

    #forward propagation
    def fprop(self,x):
        self.u = np.dot(x, self.W) + self.b
        z = self.function(self.u)
        return z

    #back propagation
    def bprop(self,delta,W):
        self.delta = self.diff_function(self.u) * delta.dot(W.T)
        return self.delta

2.ネットワーク全体の順伝播

In [30]:
def fprops(layers, x):
    z = x
    for layer in layers:
        z = layer.fprop(z)    
    return z

3.ネットワーク全体の誤差逆伝播

In [31]:
def bprops(layers, delta):
    for i,layer in enumerate(layers[::-1]):
        if i == 0:
            layer.delta = delta
        else:
            delta = layer.bprop(delta, _W)
        _W = layer.W

4.データセットの設定とネットワークの定義

In [32]:
#XOR
train_X = np.array([[0,1],[1,0],[0,0],[1,1]])
train_y = np.array([[1],[1],[0],[0]])
test_X,test_y = train_X,train_y

layers = [Layer(2,3,sigmoid,diff_sigmoid),
          Layer(3,1,sigmoid,diff_sigmoid)]

5.train関数とtest関数

In [33]:
def train(X,d,eps=1):
    #forward propagation
    y = fprops(layers,X)
        
    #cost function & delta
    cost = np.sum(-d * np.log(y) - (1 - d) * np.log(1 - y))
    delta = y - d
    
    #back propagation
    bprops(layers,delta)

    #update parameters
    z = X
    for layer in layers:
        dW = np.dot(z.T, layer.delta)  # P.52
        db = np.dot(np.ones(len(z)),layer.delta)  # P.52

        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db

        z = layer.fprop(z)
        
    #train cost
    y = fprops(layers,X)
    cost = np.sum(-d * np.log(y) - (1 - d) * np.log(1 - y)) # (2.8)
    
    return cost

def test(X,d):
    #test cost
    y = fprops(layers,X)
    cost = np.sum(-d * np.log(y) - (1 - d) * np.log(1 - y))
    return cost,y

6.パラメータの更新

In [41]:
#epoch
for epoch in range(100):
    #online learning
    train_X, train_y = shuffle(train_X, train_y)
    train(train_X, train_y)
    #for x,y in zip(train_X,train_y):
    #    train(x[np.newaxis,:],y[np.newaxis,:])
    n,pred_y = test(test_X,test_y)
print(pred_y)

[[  9.98639156e-01]
 [  9.98644379e-01]
 [  4.48392422e-04]
 [  2.90083746e-03]]


## 宿題．MNISTデータセットを多層パーセプトロンで学習

ヒント
* 出力yはone-of-k表現
* 最終層の活性化関数はsoftmax関数，誤差関数は多クラス交差エントロピー
* 最終層のデルタは教科書参照

In [5]:
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from sklearn.metrics import f1_score

In [6]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')

In [31]:
mnist_x, mnist_y = shuffle(mnist.data.astype("float32")/255.0, mnist.target.astype("int32"))
train_x, test_x, train_y, test_y = train_test_split(mnist_x, mnist_y, test_size=0.2, random_state=42)

In [32]:
# one-of-k表現に変換
from sklearn.preprocessing import LabelBinarizer
train_label_y = LabelBinarizer().fit_transform(train_y)
test_label_y = LabelBinarizer().fit_transform(test_y)

### Layerクラス

In [9]:
class Layer:
    def __init__(self, in_dim, out_dim, function, diff_function):
        #Xavier
        self.W = np.random.uniform(low=-np.sqrt(6./(in_dim+out_dim)), 
                                   high=np.sqrt(6./(in_dim+out_dim)), 
                                   size=(in_dim, out_dim)).astype('float32')
        self.b = np.zeros(out_dim)
        self.function = function
        
        self.diff_function = diff_function
        self.u     = None
        self.delta = None

    #forward propagation
    def fprop(self,x):
        self.u = np.dot(x, self.W) + self.b
        z = self.function(self.u)
        return z

    #back propagation
    def bprop(self,delta,W):
        self.delta = self.diff_function(self.u) * np.dot(delta, W.T)
        return self.delta

### ネットワーク全体の順伝播

In [10]:
def fprops(layers, x):
    z = x
    for layer in layers:
        z = layer.fprop(z)    
    return z

### ネットワーク全体の誤差逆伝播

In [11]:
def bprops(layers, delta):
    for i,layer in enumerate(layers[::-1]):
        if i == 0:
            layer.delta = delta
        else:
            delta = layer.bprop(delta, _W)
        _W = layer.W

### ネットワークの定義

In [39]:
layers = [
    Layer(784, 500, sigmoid, diff_sigmoid),
    Layer(500, 500, sigmoid, diff_sigmoid),
    Layer(500, 10, softmax, diff_softmax),
]

### train関数とtest関数

In [40]:
def train(X, d, eps=0.1):
    # forward propagation
    y = fprops(layers, X)

    # 出力層のdelta
    delta = y - d
    
    # back propagation
    bprops(layers, delta)

    # update parameters
    z = X
    for layer in layers:
        dW = np.dot(z.T, layer.delta) / X.shape[0] # P.52
        db = np.dot(np.ones(len(z)), layer.delta) / X.shape[0] # P.52

        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db

        z = layer.fprop(z)
        
    # train cost
    #y = fprops(layers, X)
    #cost = -np.sum(d * np.log(y)) # (2.11)
    #return cost

def test(X, d):
    # test cost
    y = fprops(layers, X)
    cost = -np.sum(d * np.log(y)) # (2.11)
    return cost, y

### パラメータの更新

#### オンライン学習

`train_x`の行毎に学習する

In [None]:
# epoch
for epoch in range(50):
    X, Y = shuffle(train_x, train_label_y)
    for x, y in zip(X, Y):
        train(x[np.newaxis,:], y[np.newaxis,:], 0.01)

    #if True:
    if ((epoch+1) % 10 == 0) or (epoch == 0):
        cost, pred_y = test(test_x, test_label_y)
        pred = np.argmax(pred_y, axis=1)
        print("EPOCH:: {:3d}, Validatioon Cost:: {:.3f}, Validation F1:: {:.3f}".format(epoch+1,
                                                                                     float(cost),
                                                                                     f1_score(test_y, pred, average="micro")))

EPOCH::   1, Validatioon Cost:: 3220.633, Validation F1:: 0.935
EPOCH::  10, Validatioon Cost:: 1379.637, Validation F1:: 0.972
EPOCH::  20, Validatioon Cost:: 1739.872, Validation F1:: 0.976
EPOCH::  30, Validatioon Cost:: 1763.878, Validation F1:: 0.980
EPOCH::  40, Validatioon Cost:: 1862.736, Validation F1:: 0.980
EPOCH::  50, Validatioon Cost:: 1919.475, Validation F1:: 0.980


#### ミニバッチ学習

教科書 3.3節

In [41]:
## Iterate
batch_size = 100
nbatches = train_x.shape[0] // batch_size

for epoch in range(300):
    x, y = shuffle(train_x, train_label_y)
    for i in range(nbatches):
        start = i * batch_size
        end = start + batch_size
        train(x[start:end], y[start:end], 0.01)

    #if True:
    if ((epoch+1) % 10 == 0) or (epoch == 0):
        cost, pred_y = test(test_x, test_label_y)
        pred = np.argmax(pred_y, axis=1)
        print("EPOCH:: {:3d}, Validatioon Cost:: {:.3f}, Validation F1:: {:.3f}".format(epoch+1,
                                                                                     float(cost),
                                                                                     f1_score(test_y, pred, average="micro")))

EPOCH::   1, Validatioon Cost:: 31103.659, Validation F1:: 0.319
EPOCH::  10, Validatioon Cost:: 9508.765, Validation F1:: 0.829
EPOCH::  20, Validatioon Cost:: 5979.770, Validation F1:: 0.883
EPOCH::  30, Validatioon Cost:: 5103.961, Validation F1:: 0.898
EPOCH::  40, Validatioon Cost:: 4694.852, Validation F1:: 0.905
EPOCH::  50, Validatioon Cost:: 4464.803, Validation F1:: 0.910
EPOCH::  60, Validatioon Cost:: 4293.919, Validation F1:: 0.913
EPOCH::  70, Validatioon Cost:: 4161.937, Validation F1:: 0.914
EPOCH::  80, Validatioon Cost:: 4069.268, Validation F1:: 0.917
EPOCH::  90, Validatioon Cost:: 3966.292, Validation F1:: 0.918
EPOCH:: 100, Validatioon Cost:: 3900.359, Validation F1:: 0.919
EPOCH:: 110, Validatioon Cost:: 3788.660, Validation F1:: 0.922
EPOCH:: 120, Validatioon Cost:: 3710.749, Validation F1:: 0.924
EPOCH:: 130, Validatioon Cost:: 3648.865, Validation F1:: 0.925
EPOCH:: 140, Validatioon Cost:: 3536.552, Validation F1:: 0.928
EPOCH:: 150, Validatioon Cost:: 3462.20

In [42]:
# テストデータを用いて予測精度を計算
from sklearn.metrics import confusion_matrix, classification_report
pred = [np.argmax(v) for v in pred_y]
print(confusion_matrix(test_y, pred))
print(classification_report(test_y, pred))

[[1323    0    0    2    3    9   12    3    9    3]
 [   0 1502    7    4    2    5    0    4    6    1]
 [   7    7 1314   16    6    4   13   15   10    2]
 [   5   11   24 1348    3   28    6   10   19    6]
 [   1    3    5    1 1269    0   15    5    6   36]
 [   9    4    1   16    6 1162   13    3   17   11]
 [   7    2    6    0    8   14 1340    2    4    0]
 [   3    8   15    4    6    5    0 1410    0   28]
 [   2   14    8   19    6   17   13    1 1294   11]
 [   8    6    2    8   33    4    0   20   10 1330]]
             precision    recall  f1-score   support

          0       0.97      0.97      0.97      1364
          1       0.96      0.98      0.97      1531
          2       0.95      0.94      0.95      1394
          3       0.95      0.92      0.94      1460
          4       0.95      0.95      0.95      1341
          5       0.93      0.94      0.93      1242
          6       0.95      0.97      0.96      1383
          7       0.96      0.95      0.96  