# 第2回講義 演習

In [None]:
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from keras.datasets import mnist

import numpy as np

np.random.seed(34)

## 目次

課題1. ロジスティック回帰の実装と学習 (OR)
1. シグモイド関数
2. データセットの設定と重みの定義
3. train関数とvalid関数
4. 学習

課題2. ソフトマックス回帰の実装と学習 (MNIST)
1. ソフトマックス関数
2. データセットの設定と重みの定義
3. train関数とvalid関数
4. 学習

## 課題1. ロジスティック回帰の実装と学習 (OR)

### 1. シグモイド関数
$$
    \sigma({\bf x}) = \frac{1}{1 + \exp(-{\bf x})} = \frac{\exp({\bf x})}{1 + \exp({\bf x})}
$$

In [None]:
def sigmoid(x):
    # 単純な実装
    # return 1 / (1 + np.exp(-x))
    
    # expのoverflow対策を施した実装
    # x >=0 のとき sigmoid(x) = 1 / (1 + exp(-x))
    # x < 0 のとき sigmoid(x) = exp(x) / (1 + exp(x))
    return np.exp(np.minimum(x, 0)) / (1 + np.exp(- np.abs(x)))

### 2. データセットの設定と重みの定義

In [None]:
# ORのデータセット
x_train_or = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
y_train_or = np.array([[1], [1], [0], [1]])
x_valid_or, y_valid_or = x_train_or, y_train_or
x_test_or, y_test_or = x_train_or, y_train_or

# 重み (入力の次元数: 2, 出力の次元数: 1)
W_or = np.random.uniform(low=-0.08, high=0.08, size=(2, 1)).astype('float32')
b_or = np.zeros(shape=(1,)).astype('float32')

### 3. train関数とvalid関数

#### 3.1. 目的関数（2クラス交差エントロピー誤差関数）

$$ E ({\bf x}, {\bf y}; {\bf W}, {\bf b} ) =  -\frac{1}{N}\sum^N_{i=1} \left[ {\bf y}_i \log {\bf \hat{y}}_i ({\bf x}_i; {\bf W}, {\bf b}) + (1 - {\bf y}_i) \log \{ 1 - {\bf \hat{y}}_i ({\bf x}_i; {\bf W}, {\bf b}) \}\right] $$

#### 3.2. モデルの推論
$$
    {\bf \hat{y}}_i = \sigma({\bf W} {\bf x}_i + {\bf b})
$$

#### 3.3. モデルの学習
\begin{align*}
    \delta_i &= {\bf \hat{y}}_i - {\bf y}_i \\
    \nabla_{\bf W} E &= \frac{1}{N}\sum^N_{i=1}\delta_i {\bf x}^{\mathrm{T}}_i \\
    \nabla_{\bf b} E &= \frac{1}{N}\sum^N_{i=1}\delta_i  \\
    {\bf W} &\leftarrow {\bf W} - \epsilon \nabla_{\bf W} E \\
    {\bf b} &\leftarrow {\bf b} - \epsilon \nabla_{\bf b} E \\
\end{align*}

In [None]:
# logの中身が0になるのを防ぐ
def np_log(x):
    return np.log(np.clip(a=x, a_min=1e-10, a_max=1e+10))

In [None]:
def train_or(x, y, eps=1.0):
    """
    :param x: np.ndarray, 入力データ, shape=(batch_size, 入力の次元数)
    :param y: np.ndarray, 教師ラベル, shape=(batch_size, 出力の次元数)
    :param eps: float, 学習率
    """
    global W_or, b_or
    
    batch_size = x.shape[0]
    
    # 予測
    y_hat = sigmoid(np.matmul(x, W_or) + b_or) # shape: (batch_size, 出力の次元数)
    
    # 目的関数の評価
    cost = (- y * np_log(y_hat) - (1 - y) * np_log(1 - y_hat)).mean()
    delta = y_hat - y # shape: (batch_size, 出力の次元数)
    
    # パラメータの更新
    dW = np.matmul(x.T, delta) / batch_size # shape: (入力の次元数, 出力の次元数)
    db = np.matmul(np.ones(shape=(batch_size,)), delta) / batch_size # shape: (出力の次元数,)
    W_or -= eps * dW
    b_or -= eps * db

    return cost

def valid_or(x, y):
    y_hat = sigmoid(np.matmul(x, W_or) + b_or)
    cost = (- y * np_log(y_hat) - (1 - y) * np_log(1 - y_hat)).mean()
    return cost, y_hat

### 4. 学習

In [None]:
for epoch in range(1000):
    x_train_or, y_train_or = shuffle(x_train_or, y_train_or)
    cost = train_or(x_train_or, y_train_or)
    cost, y_pred = valid_or(x_valid_or, y_valid_or)

print(y_pred)

## 課題2. ソフトマックス回帰の実装と学習 (MNIST)

### 1. ソフトマックス関数
$$
    \mathrm{softmax}({\bf x})_k = \frac{\exp({\bf x}_k)}{\sum^K_{k'=1} \exp({\bf x}_{k'})} \hspace{10mm} \text{for} \, k=1,\ldots, K
$$

In [None]:
def softmax(x):
    x -= x.max(axis=1, keepdims=True) # expのoverflowを防ぐ
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)

### 2. データセットの設定と重みの定義

In [None]:
(x_mnist_1, y_mnist_1), (x_mnist_2, y_mnist_2) = mnist.load_data()

x_mnist = np.r_[x_mnist_1, x_mnist_2]
y_mnist = np.r_[y_mnist_1, y_mnist_2]

x_mnist = x_mnist.astype('float32') / 255.
y_mnist = np.eye(N=10)[y_mnist.astype('int32').flatten()]

x_mnist=x_mnist.reshape(x_mnist.shape[0],-1)

x_train_mnist, x_test_mnist, y_train_mnist, y_test_mnist = train_test_split(x_mnist, y_mnist, test_size=10000)
x_train_mnist, x_valid_mnist, y_train_mnist, y_valid_mnist = train_test_split(x_train_mnist, y_train_mnist, test_size=10000)

In [None]:
# 重み (入力の次元数: 784, 出力の次元数: 10)
W_mnist = np.random.uniform(low=-0.08, high=0.08, size=(784, 10)).astype('float32')
b_mnist = np.zeros(shape=(10,)).astype('float32')

### 3. train関数とvalid関数

#### 3.1. 目的関数（多クラス交差エントロピー誤差関数）

$$ E ({\bf x}, {\bf y}; {\bf W}, {\bf b} ) =  -\frac{1}{N}\sum^N_{i=1} \sum^K_{k=1} {\bf y}_{i, k} \log {\bf \hat{y}}_{i, k} ({\bf x}_i; {\bf W}, {\bf b}) $$

#### 3.2. モデルの推論
$$
    {\bf \hat{y}}_i = \mathrm{softmax}({\bf W}{\bf x}_i + {\bf b})
$$

#### 3.3. モデルの学習
\begin{align*}
    \delta_i &= {\bf \hat{y}}_i - {\bf y}_i \\
    \nabla_{\bf W} E &= \frac{1}{N}\sum^N_{i=1}\delta_i {\bf x}^{\mathrm{T}}_i \\
    \nabla_{\bf b} E &= \frac{1}{N}\sum^N_{i=1}\delta_i  \\
    {\bf W} &\leftarrow {\bf W} - \epsilon \nabla_{\bf W} E \\
    {\bf b} &\leftarrow {\bf b} - \epsilon \nabla_{\bf b} E \\
\end{align*}

In [None]:
def train_mnist(x, y, eps=1.0):
    """
    :param x: np.ndarray, 入力データ, shape=(batch_size, 入力の次元数)
    :param y: np.ndarray, 教師ラベル, shape=(batch_size, 出力の次元数)
    :param eps: float, 学習率
    """
    global W_mnist, b_mnist
    
    batch_size = x.shape[0]
    
    # 予測
    y_hat = softmax(np.matmul(x, W_mnist) + b_mnist) # shape: (batch_size, 出力の次元数)
    
    # 目的関数の評価
    cost = (- y * np_log(y_hat)).sum(axis=1).mean()
    delta = y_hat - y # shape: (batch_size, 出力の次元数)
    
    # パラメータの更新
    dW = np.matmul(x.T, delta) / batch_size # shape: (入力の次元数, 出力の次元数)
    db = np.matmul(np.ones(shape=(batch_size,)), delta) / batch_size # shape: (出力の次元数,)
    W_mnist -= eps * dW
    b_mnist -= eps * db

    return cost

def valid_mnist(x, y):
    y_hat = softmax(np.matmul(x, W_mnist) + b_mnist)
    cost = (- y * np_log(y_hat)).sum(axis=1).mean()
    
    return cost, y_hat

### 4. 学習

In [None]:
for epoch in range(100):
    x_train_mnist, y_train_mnist = shuffle(x_train_mnist, y_train_mnist)
    cost = train_mnist(x_train_mnist, y_train_mnist)
    cost, y_pred = valid_mnist(x_valid_mnist, y_valid_mnist)
    if epoch % 10 == 9 or epoch == 0:
        print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(
            epoch + 1,
            cost,
            accuracy_score(y_valid_mnist.argmax(axis=1), y_pred.argmax(axis=1))
        ))