# 第3回 演習課題

## 課題1．単純パーセプトロンの実装と学習

In [1]:
import numpy as np
from sklearn.utils import shuffle
np.random.seed(1234)

1.層をLayerクラスとして定義

In [2]:
class Layer:
    def __init__(self,in_dim,out_dim,function):
        self.W = np.zeros((in_dim,out_dim))
        self.b = np.zeros(out_dim)
        self.function = function

    #forward propagation
    def fprop(self,x):
        u = x.dot(self.W) + self.b
        z = self.function(u)
        return z

2.ステップ関数

ヒント：ステップ関数

* $u\geq0$のとき，$f(u)=+1$
* $u<0$のとき，$f(u)=-1$

In [3]:
def step(x):
    y = np.sign(x)
    y[y==0] = 1
    return y

4.データセットの設定とレイヤーインスタンス

In [17]:
#OR
train_X = np.array([[0,1],[1,0],[0,0],[1,1]])
train_y = np.array([[1],[1],[-1],[1]])
test_X,test_y = train_X,train_y

layer = Layer(2,1,step)

5.train関数とtest関数

ヒント：パーセプトロン学習則

$y_n\neq d_n$のとき
* $w^{(t+1)}=w^{(t)}+\epsilon x_nd_n$　
* $b^{(t+1)}=b^{(t)}+\epsilon d_n$

In [18]:
def train(x,d,eps=1):
    #forward propagation
    y = layer.fprop(x)

    #update parameters
    if y * d != 1:
        layer.W = layer.W + eps*d*x.T
        layer.b = layer.b + eps*d

def test(x):
    y = layer.fprop(x)
    return y

5.パラメータの更新

In [19]:
#epoch
for epoch in range(10):
    #online learning
    for x,y in zip(train_X,train_y):
        train(x[np.newaxis,:],y[np.newaxis,:],eps=1)
pred_y = test(test_X)
print(pred_y)

[[ 1.]
 [ 1.]
 [-1.]
 [ 1.]]


## 課題2．活性化関数とその微分の実装

1.シグモイド関数とその微分

In [21]:
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))
def diff_sigmoid(x):
    v = sigmoid(x)
    return v * (1.0 - v)

２.ソフトマックス関数とその微分

In [22]:
def softmax(x):
    v = np.exp(x)
    return v / np.sum(v, axis=1)[:, np.newaxis]
def diff_softmax(x):
    v = softmax(x)
    return v * (np.ones(x.shape) - v)

2.tanh関数とその微分

In [26]:
def tanh(x):
    return np.tanh(x)
def diff_tanh(x):
    v = tahnh(x)
    return 1 - v*v

## 課題3．多層パーセプトロンの実装と学習

1.Layerクラス

In [28]:
class Layer:
    def __init__(self,in_dim,out_dim,function,diff_function):
        #Xavier
        self.W = np.random.uniform(
                                    low=-np.sqrt(6./(in_dim+out_dim)), 
                                    high=np.sqrt(6./(in_dim+out_dim)), 
                                    size=(in_dim, out_dim))
        self.b = np.zeros(out_dim)
        self.function = function
        
        self.diff_function = diff_function
        self.u     = None
        self.delta = None

    #forward propagation
    def fprop(self,x):
        self.u = np.dot(x, self.W) + self.b
        z = self.function(self.u)
        return z

    #back propagation
    def bprop(self,delta,W):
        self.delta = self.diff_function(self.u) * delta.dot(W.T)
        return self.delta

2.ネットワーク全体の順伝播

In [29]:
def fprops(layers, x):
    z = x
    for layer in layers:
        z = layer.fprop(z)    
    return z

3.ネットワーク全体の誤差逆伝播

In [30]:
def bprops(layers, delta):
    for i,layer in enumerate(layers[::-1]):
        if i == 0:
            layer.delta = delta
        else:
            delta = layer.bprop(delta, _W)
        _W = layer.W

4.データセットの設定とネットワークの定義

In [31]:
#XOR
train_X = np.array([[0,1],[1,0],[0,0],[1,1]])
train_y = np.array([[1],[1],[0],[0]])
test_X,test_y = train_X,train_y

layers = [Layer(2,3,sigmoid,diff_sigmoid),
          Layer(3,1,sigmoid,diff_sigmoid)]

5.train関数とtest関数

In [53]:
def train(X,d,eps=1):
    #forward propagation
    y = fprops(layers,X)
        
    #cost function & delta
    cost = np.sum(-d * np.log(y) - (1 - d) * np.log(1 - y))
    delta = y - d
    
    #back propagation
    bprops(layers,delta)

    #update parameters
    z = X
    for layer in layers:
        dW = np.dot(z.T, layer.delta)  # P.52
        db = np.dot(np.ones(len(z)),layer.delta)  # P.52

        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db

        z = layer.fprop(z)
        
    #train cost
    y = fprops(layers,X)
    cost = np.sum(-d * np.log(y) - (1 - d) * np.log(1 - y))
    
    return cost

def test(X,d):
    #test cost
    y = fprops(layers,X)
    cost = np.sum(-d * np.log(y) - (1 - d) * np.log(1 - y))
    return cost,y

6.パラメータの更新

In [52]:
#epoch
for epoch in range(1):
    #online learning
    train_X, train_y = shuffle(train_X, train_y)
    for x,y in zip(train_X,train_y):
        train(x[np.newaxis,:],y[np.newaxis,:])
    cost,pred_y = test(test_X,test_y)
print(pred_y)

[[ 0.01333522]]
[[ 0.07146602]]
[[ 0.96116923]]
[[ 0.96263244]]
[[ 0.96631323]
 [ 0.96645907]
 [ 0.01326808]
 [ 0.07223139]]


In [55]:
print(train_X.shape)
print(train_X)
print(layers[0].W.shape)
print(layers[0].W)
print(layers[0].delta)
print(layers[0].delta.shape)
np.dot(train_X, layers[0].W)

(4, 2)
[[0 0]
 [1 1]
 [1 0]
 [0 1]]
(2, 3)
[[-4.15840954 -5.6538648   3.73025822]
 [-4.0622834   3.8685367  -5.57080021]]
[[ 0.00699668 -0.02678873 -0.00020463]]
(1, 3)


array([[ 0.        ,  0.        ,  0.        ],
       [-8.22069294, -1.7853281 , -1.84054199],
       [-4.15840954, -5.6538648 ,  3.73025822],
       [-4.0622834 ,  3.8685367 , -5.57080021]])

## 宿題．MNISTデータセットを多層パーセプトロンで学習

ヒント
* 出力yはone-of-k表現
* 最終層の活性化関数はsoftmax関数，誤差関数は多クラス交差エントロピー