In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys, os

from mod import *

In [None]:
import time

# 4장 - 신경망 학습

## 4.2 손실함수

### 4.2.1 평균 제곱 오차

In [None]:
y = [.1, .05, .6, 0, .05, .1, 0, .1, 0, 0]
t = np.zeros(len(y))
t[2] = 1

t1 = np.zeros(len(y))
t1[3] = 1

In [None]:
sum((y-t)**2) / 2

In [None]:
sum((y-t1)**2) / 2

In [None]:
def mse(y, t):
    return np.sum((y-t)**2) / 2

In [None]:
mse(y, t)

### 4.2.2 교차엔트로피 오차

In [None]:
def _cee(y, t):
    delta = 1e-7
    arr = np.log(np.array(y) + delta)
    return -np.sum(arr * t)

### 4.2.3 미니배치 학습

In [None]:
cwd = os.getcwd()
sys.path.append(os.getcwd())
os.chdir('./deep-learning-from-scratch-master')

from dataset.mnist import load_mnist

In [None]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

In [None]:
prints(x_train)
prints(t_train)

In [None]:
batch_size = 10

train_size = x_train.shape[0]
batch_mask = np.random.choice(train_size, batch_size)

x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

### 4.2.4 (배치용) 교차 엔트로피 오차 구현하기

In [None]:
def cee(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
    if t.size == y.size:
        t = t.argmax(axis=1)
    
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size) ,t] + 1e-7)) / batch_size # y[np.arange(batch_size) ,t]: y에 대해 인덱싱 수행

In [None]:
import random
_randint = [random.randint(1, batch_size) for _ in range(batch_size)]
randint = np.array(_randint)
randint.reshape(1, randint.size)
[np.arange(batch_size)]

## 4.3 수치 미분

### 4.3.1 미분

In [None]:
def num_diff(f, x):
    h = 1e-4
    return (f(x+h)-f(x-h)) / (2*h)

In [None]:
def function_1(x):
    return 0.01*x**2 + 0.1*x

In [None]:
def gradient_graph(f, x):
    a = num_diff(f, x)
    b = function_1(x) - a * x
    range_x = np.arange(x-10, x+10, 0.1)
    y = a * range_x + b
    plt.plot(range_x, y)

In [None]:
x = np.arange(0, 20, .1)
y = function_1(x)

x_value = 10
gradient_graph(function_1, x_value)
plt.plot(x, y)
plt.scatter(x_value, function_1(x_value))

plt.show()

In [None]:
num_diff(function_1, 10)

### 4.3.3 편미분

In [None]:
def function_2(x):
    return x[0]**2 + x[1]**2

assert function_2([1, 2]) == 5

In [None]:
def function_tmp1(x0):
    return x0**2 + 4.0**2.0
num_diff(function_tmp1, 3.0)

In [None]:
def function_tmp2(x1):
    return 3.0**2 + x1**2.0
num_diff(function_tmp2, 4.0)

## 4.4 기울기

In [None]:
h = 1e-4
def f(x):
    return x[0]**2 + x[1]**2

assert f([1, 2]) == 5

In [None]:
def num_grad(f, x):
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]

        #f(x+h) 계산
        x[idx] = tmp_val + h
        fxh1 = f(x)

        #f(x-h) 계산
        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val

    return grad

assert all(num_grad(f, np.array([3., 4.])).round() == np.array([6., 8.]))

### 4.4.1 경사법(경사하강법)

In [None]:
def grad_descent(f, init_x, lr=.01, step_num=100):
    x = init_x
    a, b = x
    plt.scatter(a, b)
    for _ in range(step_num):
        x -= lr * num_grad(f, x)
        a, b = x
        plt.scatter(a, b)
        plt.text(a, b, _+1)
    return x

In [None]:
f([3, 4])

In [None]:
init_x = np.array([-3., 4.])
lr = .1
step_num = 100

In [None]:
grad_descent(f=f, init_x=init_x, lr=lr, step_num=step_num)

In [None]:
#todaytime = 1610353965.7306166
#page = 131
#next_thing_to-do = "matplotlib을 이용하여 변화를 추적한다"

In [None]:
import datetime

def getstartTime():
    startTime = datetime.datetime.today()
    print(startTime.strftime("%Y년 %m월 %d일 %A, %H시 %M분"))
    return startTime

In [None]:
startTime = getstartTime()

In [None]:
endTime = datetime.datetime.today()
studyingSeconds = endTime - startTime
studyingTimeint = (studyingSeconds.total_seconds() / 60)

## 4.4.2 신경망에서의 기울기

In [None]:
# 2021년 01월 14일 Thursday, 16시 16분
# startTime = startTime()

In [None]:
from mod import softmax

In [None]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3)
        
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cee(y, t)
        return loss

In [None]:
lr = 1
x = np.array([.6, .9])
t = np.array([0, 0, 1])
net = simpleNet()

In [None]:
def f(W):
    return net.loss(x, t)

In [None]:
def num_grad(f, x):
    original_shape = x.shape    
    x = x.reshape(x.size, 1)    
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]

        #f(x+h) 계산
        x[idx] = tmp_val + h
        fxh1 = f(x)

        #f(x-h) 계산
        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        
    return grad.reshape(original_shape)    

# assert all(num_grad(f, np.array([3., 4.])).round() == np.array([6., 8.]))

In [None]:
net.W -= (lr * num_grad(f, net.W))
net.loss(x, t)

In [None]:
net.W

## 4.5 학습 알고리즘 구현하기

In [None]:
from common.gradient import numerical_gradient
from common.functions import *

In [None]:
def sigmoid(x):
    e = np.exp(-x)
    return 1/(1+e)

In [None]:
def softmax(x):
    x = np.array(x)
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

In [None]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

In [None]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.ones(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.ones(output_size)
        
    def predict(self, x):
        W1 = self.params['W1']
        b1 = self.params['b1']
        W2 = self.params['W2']
        b2 = self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y
    
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y==t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}

        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads
    
    #빠른 gradient 함수
    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads

In [None]:
input_size = 2
hidden_size = 3
output_size = 2
net = TwoLayerNet(input_size, hidden_size, output_size)

dataCnt = 5
x = np.random.randn(dataCnt * input_size).reshape(dataCnt, input_size)

t = [[0,1],[1,0],[0,1],[1,0],[0,1]]
t = np.array(t)

In [None]:
y = net.predict(x)
loss = net.loss(y, t)
accuracy = net.accuracy(y, t)
grads = net.numerical_gradient(x, t)

### 4.5.2 미니배치 학습 구현하기
2021년 01월 25일 Monday

In [None]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist

In [None]:
# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# 하이퍼파라미터
iters_num = 10000  # 반복 횟수를 적절히 설정한다.
train_size = x_train.shape[0]
batch_size = 100   # 미니배치 크기
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)

In [None]:
for i in range(iters_num):
    # 미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 기울기 계산
    # grad = network.numerical_gradient(x_batch, t_batch)
    grad = network.gradient(x_batch, t_batch)
    
    # 매개변수 갱신
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    # 학습 경과 기록
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    # 1에폭당 정확도 계산
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

In [None]:
# 그래프 그리기
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()