<a href="https://colab.research.google.com/github/eventia/mnistcode/blob/master/aicode12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 고등학교 수학과 파이썬으로 배우는 인공지능 소스코드  **Ch 12**

In [0]:
# 12.1. MNIST 데이터 입력

import numpy as np 
from keras.datasets import mnist
(x_train, t_train), (x_test, t_test) = mnist.load_data()
t_trainlbl, t_testlbl = t_train, t_test

# 28X28 을 784 로 수정
x_train = x_train.reshape(60000,784)    # 주석 (1)
x_test = x_test.reshape(10000,784)    

# one-hot label 
T0 = np.zeros((t_train.size, 10))    #(60000,10) = 000
T1 = np.zeros((t_test.size, 10))    #(10000,10) = 000

for idx in range(t_train.size): T0[idx][t_train[idx]] = 1    #(3))
for idx in range(t_test.size): T1[idx][t_test[idx]] = 1

t_train, t_test = T0, T1

# normalize 0.0 ~ 1.0
x_train = x_train / 255
x_test = x_test / 255

print('MNIST DataSets 준비 완료')


Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
MNIST DataSets 준비 완료


In [0]:
# 12.2. 함수정의

# 미분함수 
def numerical_diff(f, x):
    h = 1e-4    # 0.0001
    nd_coef = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        index = it.multi_index
        tmp = x[index]
        x[index] = tmp + h
        fxh2 = f()    # f(x+h)
        x[index] = tmp - h 
        fxh1 = f()    # f(x-h)
        nd_coef[index] = (fxh2 - fxh1) / (2*h)
        x[index] = tmp 
        it.iternext()
    return nd_coef

# 시그모이드
def sigmoid(x):
    return 1 / (1+np.exp(-x))

# 소프트맥스
def softmax(x):
    if x.ndim == 1:  # 기본 1개 처리과정 , 벡터입력
        x = x - np.max(x) 
        return np.exp(x) / np.sum(np.exp(x))
    if x.ndim == 2:  # 배치용 n 개 처리, 행렬입력
        x = x.T - np.max(x.T, axis=0)
        return (np.exp(x) / np.sum(np.exp(x), axis=0)).T

# 크로스엔트로피오차
def cee(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)  # 크기가 1xN 인 2차원 행렬로 재구성
        y = y.reshape(1, y.size)
    result = -np.sum(t * np.log(y + 1e-7))  / y.shape[0] 
    return result 


In [0]:
# 12.3. 프로세스별 클래스 생성

class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        result = x.copy()
        result[self.mask] = 0
        return result


class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        self.out = sigmoid(x)
        return self.out


class Affine:
    def __init__(self, W, b):
        self.W = W    # W0, W1
        self.b = b    # b0, b1
        self.x = None
        self.dW = None    # W0, W1 의 기울기
        self.db = None    # b0, b1 의 기울기

    def forward(self, x):
        self.x = x
        result = np.dot(self.x, self.W) + self.b
        return result


class SoftmaxWithLoss:
    def __init__(self):
        self.y = None    # 출력(계산결과)
        self.t = None    # 정답(MNIST레이블)
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        result = cee(self.y, self.t)
        return result


In [0]:
# 12.4. 네트워크클래스 생성

class SimpleNetwork:
    def __init__(self, inputx, hidden, outy, weight):
        # 가중치 초기화
        self.netMat = {}
        self.netMat['W0'] = weight * np.random.randn(inputx, hidden)
        self.netMat['b0'] = np.zeros(hidden)
        self.netMat['W1'] = weight * np.random.randn(hidden, outy) 
        self.netMat['b1'] = np.zeros(outy)

        # 계층 생성
        self.netLayers = {}
        self.netLayers['Affine1'] = Affine(self.netMat['W0'], 
                                           self.netMat['b0'])
        self.netLayers['Relu1'] = Relu()
        self.netLayers['Affine2'] = Affine(self.netMat['W1'], 
                                           self.netMat['b1'])
        self.netLayers['Softmax'] = SoftmaxWithLoss()

    def predict(self, x):
        x = self.netLayers['Affine1'].forward(x)
        x = self.netLayers['Relu1'].forward(x)
        x = self.netLayers['Affine2'].forward(x)
        return x
        
    # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return self.netLayers['Softmax'].forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    def numerical_gradient(self, x, t):
        lossfunc = lambda : self.loss(x, t)
        grads = {}
        grads['W0'] = numerical_diff(lossfunc, self.netMat['W0'])
        grads['b0'] = numerical_diff(lossfunc, self.netMat['b0'])
        grads['W1'] = numerical_diff(lossfunc, self.netMat['W1'])
        grads['b1'] = numerical_diff(lossfunc, self.netMat['b1'])
        return grads


In [0]:
# 12.5. 미분을 이용한 학습과 검증

import time    # Using Time Module(시간측정)
t1 = time.time()    # save nowTime(현재 시간 측정)

train_size = x_train.shape[0]    # size of TrainData (입력데이터 크기) 60000
lr = 0.1    # learning rate(학습률)
iter = 0    # Iternation Number (반복횟수)

iters_num = 1000
batch_size = 20
iter_per_epoch = 1

network = SimpleNetwork(inputx=784, hidden=50, outy=10, weight = 0.2)

print('loss = _______  time = ________  n = ______ | [TrainAcc] [TestAcc]')

for i in range(iters_num):    # 1000
    batch_mask = np.random.choice(train_size, batch_size)    #(1)주석 
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 기울기 계산
    grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분(편 미분) 방식

    # 갱신
    for key in ('W0', 'b0', 'W1', 'b1'): network.netMat[key] -= lr * grad[key]
    
    loss = network.loss(x_batch, t_batch)

    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    iter = iter + 1

    print('loss = {:7.4f}  '.format(loss), end='')
    print('time = {:8.4f}  '.format(time.time()-t1), end='')    
    print('n = {:06d} |  {:6.4f}  {:9.4f}'.format(iter, train_acc, test_acc))


loss = _______  time = ________  n = ______ | [TrainAcc] [TestAcc]
loss =  2.1324  time =  15.1599  n = 000001 |  0.1068     0.1097
loss =  2.5134  time =  30.0661  n = 000002 |  0.1537     0.1625
loss =  1.8259  time =  44.9280  n = 000003 |  0.1613     0.1622
loss =  1.3325  time =  59.7791  n = 000004 |  0.1893     0.1909
loss =  1.8377  time =  74.5542  n = 000005 |  0.2281     0.2304
loss =  1.8078  time =  89.5424  n = 000006 |  0.2567     0.2607
loss =  1.5525  time = 104.8017  n = 000007 |  0.2727     0.2761
loss =  1.6940  time = 119.8533  n = 000008 |  0.3178     0.3295
loss =  1.2581  time = 134.6920  n = 000009 |  0.3338     0.3477
loss =  1.3563  time = 149.8605  n = 000010 |  0.3388     0.3494
loss =  1.3285  time = 164.9146  n = 000011 |  0.3514     0.3616
loss =  0.9924  time = 179.8845  n = 000012 |  0.4269     0.4331
loss =  1.1199  time = 194.6569  n = 000013 |  0.4235     0.4265
loss =  1.0018  time = 209.3665  n = 000014 |  0.4527     0.4634
loss =  1.2897  time = 