In [1]:
import sys, os
sys.path.append("..")

In [2]:
from DLFS_book.common.functions import *
from DLFS_book.common.gradient import numerical_gradient

In [3]:
class ThreeLayerNet:
    def __init__(self, input_size, hidden1_size, hidden2_size, output_size, weight_init_std=0.01):
        self.params= {}
        
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden1_size)
        self.params["b1"] = np.zeros(hidden1_size)
        
        self.params["W2"] = weight_init_std * np.random.randn(hidden1_size, hidden2_size)
        self.params["b2"] = np.zeros(hidden2_size)
        
        self.params["W3"] = weight_init_std * np.random.randn(hidden2_size, output_size)
        self.params["b3"] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2, W3 = self.params["W1"], self.params["W2"], self.params["W3"]
        b1, b2, b3 = self.params["b1"], self.params["b2"], self.params["b3"]
        
        x = x @ W1 + b1
        x = sigmoid(x)
        x = x @ W2 + b2
        x = sigmoid(x)
        x = x @ W3 + b3
        
        return softmax(x)
    
    def loss(self, x, y):
        y_pred = self.predict(x)
        return cross_entropy_error(y_pred, y)
        
    def accuracy(self, x, y):
        y_pred = self.predict(x)
        y_pred = np.argmax(y_pred, axis = 1)
        y = np.argmax(y, axis= 1)
        
        accuracy = np.sum(y_pred == y) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads["W1"]= numerical_gradient(loss_W, self.params["W1"])
        grads["W2"]= numerical_gradient(loss_W, self.params["W2"])
        grads["W3"]= numerical_gradient(loss_W, self.params["W3"])
        
        grads["b1"]= numerical_gradient(loss_W, self.params["b1"])
        grads["b2"]= numerical_gradient(loss_W, self.params["b2"])
        grads["b3"]= numerical_gradient(loss_W, self.params["b3"])
        
        return grads

In [4]:
net = ThreeLayerNet(input_size=784, hidden1_size = 300, hidden2_size=100, output_size=10) 

In [5]:
net.params["W1"].shape

(784, 300)

In [6]:
x = np.random.rand(100, 784)
t = np.random.rand(100, 10)
y = net.predict(x)
grad = net.numerical_gradient(x, t)

#net.params -= alpha * net.grads  # 학습 1번

KeyboardInterrupt: 

In [None]:
print(grad.shape)

In [None]:
print(grad)