<a href="https://colab.research.google.com/github/guten-morgen3776/NN-numpy/blob/main/multi_layer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np

class Affine:
  def __init__(self, W, b):
    self.W = W
    self.b = b
    self.x = None
    self.dW = None
    self.db = None #これから保存するから枠だけ作っておく

  def forward(self, x):
    self.x = x
    Z = np.dot(self.W, self.x) + self.b
    return Z

  def backward(self, dZ):
    m = self.x.shape[1] #入力行列の列数
    self.dW = np.dot(dZ, self.x.T) / m
    self.db = np.sum(dZ, axis=1, keepdims=True) / m
    dx = np.dot(self.W.T, dZ)
    return dx




In [7]:
#テスト

x = np.random.randn(2, 5)
W = np.random.randn(3, 2)
b = np.zeros((3, 1))

layer = Affine(W, b)
out = layer.forward(x)
print(out.shape)

dout = np.random.randn(3, 5)
dx = layer.backward(dout)
dW = layer.dW
db = layer.db
print(dW.shape)
print(db.shape)


(3, 5)
(3, 2)
(3, 1)


In [8]:
class Relu:
  def __init__(self):
    self.mask = None #順伝播と逆伝播を連動させるために0以下の場所をメモする

  def forward(self, x):
    self.mask = (x <= 0)
    out = x.copy()
    out[self.mask] = 0
    return out

  def backward(self, dout):
    dx = dout.copy()
    dx[self.mask] = 0
    return dx




In [9]:
#テスト
x = np.array([1, 3, -1])
relu = Relu()
out = relu.forward(x)
print(out)
print(relu.backward(out))

[1 3 0]
[1 3 0]


In [10]:
class sigmoid:
  def __init__(self):
    self.out = None

  def forward(self, x):
    out = 1 / (1 + np.exp(-x))
    self.out = out
    return out

  def backward(self, dout):
    dx = dout * (1 - self.out) * self.out
    return dx



In [12]:
from collections import OrderedDict

class MultiLayerNet:
  def __init__(self, input_size, hidden_size_list, output_size):
    self.params = {}
    self.layers = OrderedDict()
    all_size_list = [input_size] + hidden_size_list + [output_size]
    layer_num = len(all_size_list) - 1

    #層ごとの初期設定
    for idx in range(1, layer_num + 1):
      input_dim = all_size_list[idx - 1]
      output_dim = all_size_list[idx]
      W = np.random.randn(output_dim, input_dim) * 0.01
      b = np.zeros((output_dim, 1))

      self.layers['Affine' + str(idx)] = Affine(W, b)
      if idx == layer_num:
        self.layers['Sigmoid' + str(idx)] = sigmoid()
      else:
        self.layers['Relu' + str(idx)] = Relu()

  def predict(self, x):
    for layer in self.layers.values():
      x = layer.forward(x)
      return x

  def gradient(self, x, t):
    y = self.predict(x)
    dout = y - t
    layers = list(self.layers.values())
    layers.reverse()

    for layer in layers:
      dout = layer.backward(dout)

    grads = {}
    for name, layer in self.layers.items():
      if isinstance(layer,Affine):
        idx = name[6:]
        grads['W' + idx] = layer.dW
        grads['b' + idx] = layer.db
    return grads




In [13]:
import keras
from keras.datasets import mnist

(train_X, train_y), (test_X, test_y) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [15]:
print(train_X.shape)
print(train_y.shape)
print(test_X.shape)
print(test_y.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [17]:
#平らにして正規化
from tensorflow.keras.utils import to_categorical
x_train = train_X.reshape(60000, 784) / 255
x_test = test_X.reshape(10000, 784) / 255
t_train = to_categorical(train_y, 10)
t_test = to_categorical(test_y, 10)


In [None]:
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
iters_num = 10000

for i in range(iters_num):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = x_train[batch_mask]
  t_batch = t_train[batch_mask]
