<a href="https://colab.research.google.com/github/YunSeoHwan/DNN_Base/blob/main/CH04/Gradient_TwoLayerNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **TwoLayerNet**

## **Define**

In [1]:
import sys, os
import numpy as np
sys.path.append('/content/drive/MyDrive/Colab Notebooks/DNN_base/common/')
from functions import *
from gradient import numerical_gradient

In [2]:
class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):

        # weight reset
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)

        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1
        z = sigmoid(a1)
        a2 = np.dot(z, W2) + b2
        y = softmax(a2)

        return y

    def loss(self, x, t):
        y = self.predict(x)

        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        acc = np.sum(y == t) / float(x.shape[0])
        return acc

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

In [7]:
x = np.array([[1, 2], [2, 4], [3, 6]])
np.argmax(x, axis=1) # index return

array([1, 1, 1])

## **Return**

In [3]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

print(f"W1 shape : {net.params['W1'].shape}")
print(f"b1 shape : {net.params['b1'].shape}")
print(f"W2 shape : {net.params['W2'].shape}")
print(f"b2 shape : {net.params['b2'].shape}")

W1 shape : (784, 100)
b1 shape : (100,)
W2 shape : (100, 10)
b2 shape : (10,)


In [4]:
x = np.random.rand(1, 784)  # 더미 입력 데이터(1장 분량)
t = np.random.rand(1, 10)  # 더미 정답 레이블(1장 분량)
y = net.predict(x)

grads = net.numerical_gradient(x, t)  # 기울기 계산

print(f"W1 grad shape : {net.params['W1'].shape}")
print(f"b1 grad shape : {net.params['b1'].shape}")
print(f"W2 grad shape : {net.params['W2'].shape}")
print(f"b2 grad shape : {net.params['b2'].shape}")

W1 grad shape : (784, 100)
b1 grad shape : (100,)
W2 grad shape : (100, 10)
b2 grad shape : (10,)
