# 学習

ステップ１で作った関数をもう一度実装

In [2]:
# numpyを導入
import numpy as np

# モデル作成
def create_model():    
    model = {
        # 荷重を -5 ~ 5 の乱数で初期化
        "weights" : np.random.uniform(-5, 5, 2),    
        # バイアスも！
        "bias"    : np.random.uniform(-5, 5, 1)}
    return model

# 推論
def predict(model, activation, x):
    
    # 足し算を計算し…
    y = model["weights"][0] * x[0] + model["weights"][1] * x[1] + model["bias"]
    
    # 活性化で処理し、その結果を返す
    y = activation(y)
    return y

# 線形活性化関数
def linear_activation(x):
    return x

# ステップ活性化関数
def step_activation(x):
    if x >= 0.5:
        return 1
    else:
        return 0    

## 誤差を計算

正しい答え（ラベル）と推論した答えの差分を用い、学習させる。

まず、「AND」のラベルを準備しよう：

In [3]:
# 入力
x_list = np.array([
    [0, 0], 
    [0, 1], 
    [1, 0], 
    [1, 1]
], dtype = float)
print(x_list.shape)

#期待してる出力（ラベル）
y_true = np.array([
    [0], 
    [0], 
    [0], 
    [1]
], dtype = float)
print(y_true.shape)

(4, 2)
(4, 1)


誤差（損失）関数を実装しよう。

課題により、適切な関数を使うべきが、今回の入門課題はただの「差分」にしよう。

In [4]:
# 損失関数
def error(y_true, y_pred):
    return y_true - y_pred

In [5]:
# 推論とラベルの誤差は：
model = create_model()

In [None]:
# 結果を表示する
def print_results(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]

    for i in range(data_size):
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)
        err = error(y_t, y_p)
        print(x, y_t, "->", y_p, "err:", err)

In [7]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> [3.98567413] err: [-3.98567413]
[0. 1.] [0.] -> [0.73090718] err: [-0.73090718]
[1. 0.] [0.] -> [8.11939151] err: [-8.11939151]
[1. 1.] [1.] -> [4.86462457] err: [-3.86462457]


In [8]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 1 err: [-1.]
[0. 1.] [0.] -> 1 err: [-1.]
[1. 0.] [0.] -> 1 err: [-1.]
[1. 1.] [1.] -> 1 err: [0.]


## 学習
誤差にて、荷重を調整しよう。ただ、「入力」は「０」であると、出力に影響がないため、入力は「１」のときだけに荷重を調整する、つまり：

$$ w_i' = w_i + x_i \cdot error(y_{true}, y_{pred}) $$ 


In [None]:
# 荷重を更新する関数
def update_weight(w, x, err):
    return w + x * err

# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]

    # 誤差の平均
    mse = 0

    # さて、１個ずつを処理しよう
    for i in range(data_size):

        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)

        # 誤差を計算
        err  = error(y_t, y_p)
        mse += err * err

        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]

        w0   = update_weight(w0, x[0], err)
        w1   = update_weight(w1, x[1], err)
        bias = update_weight(bias, 1 , err)

        model["weights"][0] = w0[0]
        model["weights"][1] = w1[0]
        model["bias"] = bias[0]

    #誤差（損失）としては、平均値を返す
    return mse / data_size

In [10]:
loss = fit_single_step(model, linear_activation, x_list, y_true)
print("loss:", loss)

loss: [37.85886901]


In [11]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> 4.254766942751925 err: [-4.25476694]
[0. 1.] [0.] -> 12.643251269366607 err: [-12.64325127]
[1. 0.] [0.] -> 9.388484326614682 err: [-9.38848433]
[1. 1.] [1.] -> 17.776968653229364 err: [-16.77696865]


In [12]:
def fit(model, activation, x_list, y_true, epochs):
    
    for i in range(epochs):
        loss = fit_single_step(model, activation, x_list, y_true)
        print("epoch:", i, "loss:", loss)

In [13]:
fit(model, linear_activation, x_list, y_true, 100)

epoch: 0 loss: [26.03679821]
epoch: 1 loss: [54.54599546]
epoch: 2 loss: [26.34888996]
epoch: 3 loss: [67.00133848]
epoch: 4 loss: [28.66098171]
epoch: 5 loss: [81.4566815]
epoch: 6 loss: [32.97307346]
epoch: 7 loss: [97.91202452]
epoch: 8 loss: [39.28516521]
epoch: 9 loss: [116.36736754]
epoch: 10 loss: [47.59725696]
epoch: 11 loss: [136.82271056]
epoch: 12 loss: [57.90934871]
epoch: 13 loss: [159.27805358]
epoch: 14 loss: [70.22144046]
epoch: 15 loss: [183.73339659]
epoch: 16 loss: [84.53353221]
epoch: 17 loss: [210.18873961]
epoch: 18 loss: [100.84562396]
epoch: 19 loss: [238.64408263]
epoch: 20 loss: [119.1577157]
epoch: 21 loss: [269.09942565]
epoch: 22 loss: [139.46980745]
epoch: 23 loss: [301.55476867]
epoch: 24 loss: [161.7818992]
epoch: 25 loss: [336.01011169]
epoch: 26 loss: [186.09399095]
epoch: 27 loss: [372.46545471]
epoch: 28 loss: [212.4060827]
epoch: 29 loss: [410.92079772]
epoch: 30 loss: [240.71817445]
epoch: 31 loss: [451.37614074]
epoch: 32 loss: [271.0302662]
epoch

## 学習率とは

上記のように、差分だけを直そうとすると、平均的な誤差がお大きくなってしまう。その理由は、確か、勾配の方向が正しいが、ステップが大きいすぎる。つまり、最適な数値から大幅に超えてしまい、段々離れてしまう。

「学習率」という係数で、ステップの大きさを小さくし、少しずつ最適な数値に近づくようにする。

In [None]:
# 学習は「fit」とよく言われる
def fit_single_step(model, activation, x_list, y_true):

    # データセットのサイズは入力のshapeから求める
    data_size = x_list.shape[0]

    # 誤差の平均
    mse = 0

    # さて、１個ずつを処理しよう
    for i in range(data_size):

        # 推論
        x   = x_list[i]
        y_t = y_true[i]
        y_p = predict(model, activation, x)

        # 誤差を計算
        err = error(y_t, y_p)
        mse = err * err

        # 学習率
        learning_rate = 0.01

        # 荷重を更新
        w0   = model["weights"][0]
        w1   = model["weights"][1]
        bias = model["bias"]

        w0   = update_weight(w0, x[0], err * learning_rate)
        w1   = update_weight(w1, x[1], err * learning_rate)
        bias = update_weight(bias, 1 , err * learning_rate)

        model["weights"][0] = w0[0]
        model["weights"][1] = w1[0]
        model["bias"] = bias[0]

    return mse / data_size

In [15]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [3351.1425915]
epoch: 1 loss: [3104.55633317]
epoch: 2 loss: [2880.94456366]
epoch: 3 loss: [2677.94607308]
epoch: 4 loss: [2493.45736193]
epoch: 5 loss: [2325.60336468]
epoch: 6 loss: [2172.71158543]
epoch: 7 loss: [2033.28924186]
epoch: 8 loss: [1906.00306165]
epoch: 9 loss: [1789.6614182]
epoch: 10 loss: [1683.19852984]
epoch: 11 loss: [1585.66047942]
epoch: 12 loss: [1496.19284044]
epoch: 13 loss: [1414.02972104]
epoch: 14 loss: [1338.48405973]
epoch: 15 loss: [1268.93902647]
epoch: 16 loss: [1204.84040007]
epoch: 17 loss: [1145.68980813]
epoch: 18 loss: [1091.03872928]
epoch: 19 loss: [1040.48316927]
epoch: 20 loss: [993.65893286]
epoch: 21 loss: [950.23742286]
epoch: 22 loss: [909.92190552]
epoch: 23 loss: [872.44418864]
epoch: 24 loss: [837.56166533]
epoch: 25 loss: [805.05468147]
epoch: 26 loss: [774.72419015]
epoch: 27 loss: [746.38966045]
epoch: 28 loss: [719.88721185]
epoch: 29 loss: [695.06794877]
epoch: 30 loss: [671.79647289]
epoch: 31 loss: [649.94955325]


In [16]:
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> -0.38812046755534374 err: [0.38812047]
[0. 1.] [0.] -> 0.22664478763054247 err: [-0.22664479]
[1. 0.] [0.] -> 0.23053542483961215 err: [-0.23053542]
[1. 1.] [1.] -> 0.8453006800254983 err: [0.15469932]


In [17]:
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [0.] -> 0 err: [0.]
[1. 0.] [0.] -> 0 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]


## 練習:「OR」を学習させる

In [None]:
#期待してる出力（ラベル）
y_true = np.array([
    [0],
    [1],
    [1],
    [1]
], dtype = float)
print(y_true.shape)

(4, 1)


In [19]:
# 推論とラベルの誤差は：
model = create_model()

In [20]:
fit(model, linear_activation, x_list, y_true, 1000)

epoch: 0 loss: [12.32774644]
epoch: 1 loss: [11.16905281]
epoch: 2 loss: [10.1329487]
epoch: 3 loss: [9.20573876]
epoch: 4 loss: [8.37530849]
epoch: 5 loss: [7.63093808]
epoch: 6 loss: [6.96313848]
epoch: 7 loss: [6.36350693]
epoch: 8 loss: [5.82459971]
epoch: 9 loss: [5.33982001]
epoch: 10 loss: [4.90331904]
epoch: 11 loss: [4.50990896]
epoch: 12 loss: [4.154986]
epoch: 13 loss: [3.83446281]
epoch: 14 loss: [3.54470862]
epoch: 15 loss: [3.28249663]
epoch: 16 loss: [3.04495748]
epoch: 17 loss: [2.82953824]
epoch: 18 loss: [2.63396617]
epoch: 19 loss: [2.45621678]
epoch: 20 loss: [2.29448556]
epoch: 21 loss: [2.14716309]
epoch: 22 loss: [2.01281297]
epoch: 23 loss: [1.89015233]
epoch: 24 loss: [1.77803465]
epoch: 25 loss: [1.67543453]
epoch: 26 loss: [1.5814342]
epoch: 27 loss: [1.49521159]
epoch: 28 loss: [1.41602983]
epoch: 29 loss: [1.34322786]
epoch: 30 loss: [1.27621214]
epoch: 31 loss: [1.21444937]
epoch: 32 loss: [1.15745994]
epoch: 33 loss: [1.10481216]
epoch: 34 loss: [1.056117

In [21]:
# 生の出力（活性化なしで）
print_results(model, linear_activation, x_list, y_true)

[0. 0.] [0.] -> 0.24858386288124149 err: [-0.24858386]
[0. 1.] [1.] -> 0.7493469395704597 err: [0.25065306]
[1. 0.] [1.] -> 0.7468098764553397 err: [0.25319012]
[1. 1.] [1.] -> 1.247572953144558 err: [-0.24757295]


In [22]:
# ステップ関数を適用した後：
print_results(model, step_activation, x_list, y_true)

[0. 0.] [0.] -> 0 err: [0.]
[0. 1.] [1.] -> 1 err: [0.]
[1. 0.] [1.] -> 1 err: [0.]
[1. 1.] [1.] -> 1 err: [0.]
