# Multi Layer Perceptron (MLP) の実装

## 0. MNISTデータの読み込み

In [None]:
import sys

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split

### MNISTデータの読み込み

In [None]:
mnist = fetch_mldata('MNIST original', data_home='./')
X, Y = mnist.data, mnist.target
X = X / 255.
Y = Y.astype("int")

In [None]:
for i in range(10):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X[i * 6500].reshape(28, 28), cmap='gray_r')
    plt.axis("off")

In [None]:
train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.2, random_state=2)
train_y = np.eye(10)[train_y].astype(np.int32)
test_y = np.eye(10)[test_y].astype(np.int32)
train_n = train_x.shape[0]
test_n = test_x.shape[0]

In [None]:
class Softmax:
    def __init__(self):
        self.x = None
        
    def __call__(self, x):
        self.x = x
        exp_x = np.exp(x - x.max(axis=1, keepdims=True))
        y = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return y

    def backward(self, x):
        return self.x * (1 -  self.x)

In [None]:
class Sigmoid:
    def __init__(self):
        self.y = None
        
    def __call__(self, x):
        y = 1 / (1 + np.exp(-x))
        self.y = y
        return y
    
    def backward(self, x):
        return self.y * (1 -  self.y)

In [None]:
class ReLU:
    def __call__(self, x):
        return x * (x > 0)
    
    def backward(self, x):
        return 1 * (x > 0)

In [None]:
class Linear:
    def __init__(self, in_dim, out_dim, activation):
        self.W = np.random.uniform(low=-0.08, high=0.08, size=(in_dim, out_dim))
#         self.W = np.random.randn(in_dim, out_dim) * np.sqrt(2.0 / in_dim)  # Heの初期値
        self.b = np.zeros(out_dim)
        self.activation = activation()
        self.delta = None
        self.x = None
        self.dW = None
        self.db = None

    def __call__(self, x):
        self.x = x
        self.u = np.dot(x, self.W) + self.b
        self.z = self.activation(self.u)
        return self.z
    
    def backward(self, dout):
        self.delta = dout * self.activation.backward(self.u)
        
        self.dW = np.dot(self.x.T, self.delta)
        self.db = np.dot(np.ones(len(self.x)), self.delta)
        
        return np.dot(self.delta, self.W.T)

In [None]:
class MLP():
    def __init__(self, layers):
        self.layers = layers
        
    def train(self, x, t, lr):     
        # 1. 順伝播
        self.y = x
        for layer in self.layers:
            self.y = layer(self.y)
        self.loss = np.sum(-t*np.log(self.y + 1e-7)) / len(x)
        
        # 2. 誤差逆伝播
        # 最終層の誤差
#         delta = (self.y - t) / len(self.layers[-1].x)
        delta = self.y - t
        self.layers[-1].delta = delta
        self.layers[-1].dW = np.dot(self.layers[-1].x.T, self.layers[-1].delta)
        self.layers[-1].db = np.dot(np.ones(len(self.layers[-1].x)), self.layers[-1].delta)
        dout = np.dot(self.layers[-1].delta, self.layers[-1].W.T)
        
        # 中間層の誤差を計算
        for layer in self.layers[-2::-1]:
            dout = layer.backward(dout)
        
        # 3. 各層のパラメータを更新
        for layer in self.layers:
            layer.W -= lr * layer.dW
            layer.b -= lr * layer.db
            
        return self.loss
    
    
    def test(self, x, t):
        # 順伝播 (train関数と全く同じでOK)
        self.y = x
        for layer in self.layers:
            self.y = layer(self.y)
        self.loss = np.sum(-t*np.log(self.y + 1e-7)) / len(x)
        return self.loss

In [None]:
del model

In [None]:
model = MLP([Linear(784, 1000, ReLU),
                        Linear(1000, 1000, ReLU),
                        Linear(1000, 10, Softmax)])

In [None]:
n_epoch = 20
batchsize = 100
lr = 0.01

losses = {'train':[], 'test': []}
accuracies = {'train':[], 'test':[]}
for epoch in range(n_epoch):
    print('epoch %d |' % epoch,)
    
    # Training
    sum_loss = 0
    pred_y = []
    perm = np.random.permutation(train_n)
    
    for i in range(0, train_n, batchsize):
        x = train_x[perm[i:i+batchsize]]
        t = train_y[perm[i:i+batchsize]]
        
        sum_loss += model.train(x, t, lr) * len(x)
        pred_y.extend(np.argmax(model.y, axis=1))
    
    loss = sum_loss / train_n
    losses['train'].append(loss)
    accuracy = np.sum(np.eye(10)[pred_y] * train_y[perm]) / train_n
    accuracies['train'].append(accuracy)
    print('Train loss %.3f, accuracy %.4f |' %(loss, accuracy),)
    
    
    # Testing
    sum_loss = 0
    pred_y = []
    
    for i in range(0, test_n, batchsize):
        x = test_x[i: i+batchsize]
        t = test_y[i: i+batchsize]
        
        sum_loss += model.test(x, t) * len(x)
        pred_y.extend(np.argmax(model.y, axis=1))

    loss = sum_loss / test_n
    losses['test'].append(loss)
    accuracy = np.sum(np.eye(10)[pred_y] * test_y) / test_n
    accuracies['test'].append(accuracy)
    print('Test loss %.3f, accuracy %.4f' %(loss, accuracy))