### [예제 4] 은닉층 2개 오차역전파 예제

In [1]:
import numpy as np
from datetime import datetime

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
# NeuralNetwork Class

class NeuralNetwork:
    
    # 생성자
    def __init__(self, input_nodes, hidden_nodes_1, hidden_nodes_2, output_nodes, learning_rate):
        
        self.input_nodes = input_nodes
        self.hidden_nodes_1 = hidden_nodes_1
        self.hidden_nodes_2 = hidden_nodes_2
        self.output_nodes = output_nodes
        
        # 2층 hidden layer unit 
        # 가중치 W, 바이어스 b 초기화
        #self.W2 = np.random.rand(self.input_nodes,self.hidden_nodes)
        # Xavier/He 방법으로 self.W2 가중치 초기화
        self.W2 = np.random.randn(self.input_nodes, self.hidden_nodes_1) / np.sqrt(self.input_nodes/2)
        self.b2 = np.random.rand(self.hidden_nodes_1)
        
        # Xavier/He 방법으로 self.W3 가중치 초기화
        self.W3 = np.random.randn(self.hidden_nodes_1, self.hidden_nodes_2) / np.sqrt(self.hidden_nodes_1/2)
        self.b3= np.random.rand(self.hidden_nodes_2)
        
        # 3층 output layer unit
        #self.W3 = np.random.rand(self.hidden_nodes, self.output_nodes)
        # Xavier/He 방법으로 self.W3 가중치 초기화
        self.W4 = np.random.randn(self.hidden_nodes_2, self.output_nodes) / np.sqrt(self.hidden_nodes_2/2)
        self.b4 = np.random.rand(self.output_nodes)
                                
        # 4층 output layer 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z4 = np.zeros([1,output_nodes])
        self.A4 = np.zeros([1,output_nodes])
        
        # 3층 hidden layer 2 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z3 = np.zeros([1,hidden_nodes_2])
        self.A3 = np.zeros([1,hidden_nodes_2])
        
        # 2층 hidden layer 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z2 = np.zeros([1,hidden_nodes_1])
        self.A2 = np.zeros([1,hidden_nodes_1])
        
        # 1층 input layer 출력 a 정의 (모두 행렬로 표시)
        self.Z1 = np.zeros([1,input_nodes])    
        self.A1 = np.zeros([1,input_nodes])       
        
        # 학습률 learning rate 초기화
        self.learning_rate = learning_rate
        
    # 손실함수
    def feed_forward(self):
        
        delta = 1e-7    # log 무한대 발산 방지
        
        # 1층 출력 계산, 가중합과 출력은 입력 값과 동일함
        self.Z1 = self.input_data
        self.A1 = self.input_data
        
        # 2층 가중합, 출력 계산    
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = sigmoid(self.Z2)
        
        # 3층 가중합, 출력 계산    
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = sigmoid(self.Z3)
        
        # 4층 가중합 , 출력 계산
        self.Z4 = np.dot(self.A3, self.W4) + self.b4
        y = self.A4 = sigmoid(self.Z4)
        
        #MSE
        #return ( np.sum( (self.A4-self.target_data)**2 ) ) / ( len(self.input_data) )
        # cross-entropy 
        return  -np.sum( self.target_data*np.log(y + delta) + (1-self.target_data)*np.log((1 - y)+delta ) )

    
    # 손실 값 계산
    def loss_val(self):
        
        delta = 1e-7    # log 무한대 발산 방지
        
        # 1층 출력 계산, 가중합과 출력은 입력 값과 동일함
        self.Z1 = self.input_data
        self.A1 = self.input_data
        
        # 2층 가중합, 출력 계산    
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = sigmoid(self.Z2)
        
        # 3층 가중합, 출력 계산    
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = sigmoid(self.Z3)
        
        # 4층 가중합 , 출력 계산
        self.Z4 = np.dot(self.A3, self.W4) + self.b4
        y = self.A4 = sigmoid(self.Z4)
        
        #MSE
        #return ( np.sum( (self.A4-self.target_data)**2 ) ) / ( len(self.input_data) )
        # cross-entropy 
        return  -np.sum( self.target_data*np.log(y + delta) + (1-self.target_data)*np.log((1 - y)+delta ) )
    
    # query, 즉 미래 값 예측 함수
    def predict(self, input_data):
        
        # input_data 는 행렬로 입력됨 즉, (1, 784) shape 을 가짐
        Z2 = np.dot(input_data, self.W2) + self.b2
        A2 = sigmoid(Z2)
        
        Z3 = np.dot(A2, self.W3) + self.b3
        A3 = sigmoid(Z3)
        
        Z4 = np.dot(A3, self.W4) + self.b4
        y = A4 = sigmoid(Z4)
        
        predicted_num = np.argmax(y)
    
        return predicted_num
    

    # 정확도 측정함수
    def accuracy(self, test_input_data, test_target_data):
        
        matched_list = []
        not_matched_list = []
        
        for index in range(len(test_input_data)):
                        
            label = int(test_target_data[index])
                        
            # one-hot encoding을 위한 데이터 정규화 (data normalize)
            data = (test_input_data[index] / 255.0 * 0.99) + 0.01
                  
            # predict 를 위해서 vector 을 matrix 로 변환하여 인수로 넘겨줌
            predicted_num = self.predict(np.array(data, ndmin=2)) 
        
            if label == predicted_num:
                matched_list.append(index)
                
            else:
                
                not_matched_list.append(index)
                
        accuracy_val = (len(matched_list)/(len(test_input_data)))
        
        return accuracy_val, not_matched_list
    
            
    # input_data : 784 개,  target_data : 10개
    def train(self, input_data, target_data):  
        
        self.target_data = target_data    
        self.input_data = input_data
        
        # 먼저 feed forward 를 통해서 최종 출력값과 이를 바탕으로 현재의 에러 값 계산
        loss_val = self.feed_forward()
        
        
        # 출력층 loss 인 loss_4, W4, b4 계산
        loss_4 = (self.A4-self.target_data) * self.A4 * (1-self.A4)     
        
        W4_diff = np.dot(self.A3.T, loss_4)
        b4_diff = loss_4
        
        self.W4 = self.W4 - self.learning_rate * W4_diff        
        self.b4 = self.b4 - self.learning_rate * b4_diff
                
        # 은닉층 2 loss 인 loss_3, W3, b3 계산
        loss_3 = np.dot(loss_4, self.W4.T) * self.A3 * (1-self.A3)
        
        W3_diff = np.dot(self.A2.T, loss_3)
        b3_diff = loss_3
        
        self.W3 = self.W3 - self.learning_rate * W3_diff        
        self.b3 = self.b3 - self.learning_rate * b3_diff         
        
        # 은닉층 1 loss 인 loss_2, W2, b2 계산  
        loss_2 = np.dot(loss_3, self.W3.T) * self.A2 * (1-self.A2)
        
        W2_diff = np.dot(self.A1.T, loss_2)
        b2_diff = loss_2
        
        self.W2 = self.W2 - self.learning_rate * W2_diff        
        self.b2 = self.b2 - self.learning_rate * b2_diff

In [3]:
# 0~9 숫자 이미지가 784개의 숫자 (28X28) 로 구성되어 있는 training data 읽어옴

try:
    
    training_data = np.loadtxt('./mnist_train.csv', delimiter=',', dtype=np.float32)

    print("training_data.shape = ", training_data.shape)
    print("training_data[0,0] = ", training_data[0,0], ", len(training_data[0]) = ", len(training_data[0]))
    
except Exception as err:
    
    print('Exception occur !!')

training_data.shape =  (60000, 785)
training_data[0,0] =  5.0 , len(training_data[0]) =  785


### 784 X 30 X 20 X 10 테스트

In [4]:
input_nodes = 784

hidden_nodes_1 = 30
hidden_nodes_2 = 20

output_nodes = 10
learning_rate = 0.1
epochs = 2

nn = NeuralNetwork(input_nodes, hidden_nodes_1, hidden_nodes_2, output_nodes, learning_rate)

start_time = datetime.now()

for i in range(epochs):
    
    for step in range(len(training_data)):  # train
    
        # input_data, target_data normalize
        
        target_data = np.zeros(output_nodes) + 0.01    
        target_data[int(training_data[step, 0])] = 0.99
        #target_data = training_data[step, 0]
    
        input_data = ((training_data[step, 1:] / 255.0) * 0.99) + 0.01
        #input_data = training_data[step, 1:]
    
        nn.train( np.array(input_data, ndmin=2), np.array(target_data, ndmin=2) )
    
        if step % 1000 == 0:
            print("epochs = ", i, ", step = ", step,  ",  loss_val = ", nn.loss_val())
        
end_time = datetime.now() 
print("\nelapsed time = ", end_time - start_time) 

epochs =  0 , step =  0 ,  loss_val =  8.564689764083967
epochs =  0 , step =  1000 ,  loss_val =  3.24117147692315
epochs =  0 , step =  2000 ,  loss_val =  3.379551725998059
epochs =  0 , step =  3000 ,  loss_val =  3.775318302970159
epochs =  0 , step =  4000 ,  loss_val =  2.6708777263704753
epochs =  0 , step =  5000 ,  loss_val =  1.8197282358707807
epochs =  0 , step =  6000 ,  loss_val =  1.206203256148244
epochs =  0 , step =  7000 ,  loss_val =  2.2699108483852037
epochs =  0 , step =  8000 ,  loss_val =  0.945958768728595
epochs =  0 , step =  9000 ,  loss_val =  1.3130330122271803
epochs =  0 , step =  10000 ,  loss_val =  0.9174772911387616
epochs =  0 , step =  11000 ,  loss_val =  0.9301654647906228
epochs =  0 , step =  12000 ,  loss_val =  1.285427709800529
epochs =  0 , step =  13000 ,  loss_val =  1.1500843164478054
epochs =  0 , step =  14000 ,  loss_val =  0.6923268410966206
epochs =  0 , step =  15000 ,  loss_val =  1.3007946980044574
epochs =  0 , step =  16000 ,

In [6]:
# 0~9 숫자 이미지가 784개의 숫자 (28X28) 로 구성되어 있는 test data 읽어옴

try:
    test_data = np.loadtxt('./mnist_test.csv', delimiter=',', dtype=np.float32)

    test_input_data = test_data[ : , 1: ]
    test_target_data = test_data[ : , 0 ]

    print("test_data.shape = ", test_data.shape)
    print("test_data[0,0] = ", test_data[0,0], ", len(test_data[0]) = ", len(test_data[0]))

    # measure accuracy
    (acc_ret, false_list) = nn.accuracy(test_input_data, test_target_data)   

    print('Accuracy = ', 100*acc_ret)
    
except Exception as err:
    
    print('Exception occur !!')

test_data.shape =  (10000, 785)
test_data[0,0] =  7.0 , len(test_data[0]) =  785
Accuracy =  93.58


### 784 X 40 X 40 X 10 테스트

In [7]:
input_nodes = 784

hidden_nodes_1 = 40
hidden_nodes_2 = 40

output_nodes = 10
learning_rate = 0.1
epochs = 2

nn = NeuralNetwork(input_nodes, hidden_nodes_1, hidden_nodes_2, output_nodes, learning_rate)

start_time = datetime.now()

for i in range(epochs):
    
    for step in range(len(training_data)):  # train
    
        # input_data, target_data normalize
        
        target_data = np.zeros(output_nodes) + 0.01    
        target_data[int(training_data[step, 0])] = 0.99
        #target_data = training_data[step, 0]
    
        input_data = ((training_data[step, 1:] / 255.0) * 0.99) + 0.01
        #input_data = training_data[step, 1:]
    
        nn.train( np.array(input_data, ndmin=2), np.array(target_data, ndmin=2) )
    
        if step % 1000 == 0:
            print("epochs = ", i, ", step = ", step,  ",  loss_val = ", nn.loss_val())
        
end_time = datetime.now() 
print("\nelapsed time = ", end_time - start_time) 

epochs =  0 , step =  0 ,  loss_val =  11.37364336921664
epochs =  0 , step =  1000 ,  loss_val =  3.1209096533375584
epochs =  0 , step =  2000 ,  loss_val =  3.1599753476383303
epochs =  0 , step =  3000 ,  loss_val =  3.5179254514092353
epochs =  0 , step =  4000 ,  loss_val =  1.8405294678634703
epochs =  0 , step =  5000 ,  loss_val =  1.286411728959947
epochs =  0 , step =  6000 ,  loss_val =  1.0087927039234454
epochs =  0 , step =  7000 ,  loss_val =  1.947120410954809
epochs =  0 , step =  8000 ,  loss_val =  0.7746781628643669
epochs =  0 , step =  9000 ,  loss_val =  1.1394699961014016
epochs =  0 , step =  10000 ,  loss_val =  0.7900309269569629
epochs =  0 , step =  11000 ,  loss_val =  0.7745870585040694
epochs =  0 , step =  12000 ,  loss_val =  0.9586291910462993
epochs =  0 , step =  13000 ,  loss_val =  0.975649787691054
epochs =  0 , step =  14000 ,  loss_val =  0.7099317421594491
epochs =  0 , step =  15000 ,  loss_val =  1.0079426547144976
epochs =  0 , step =  160

In [8]:
# 0~9 숫자 이미지가 784개의 숫자 (28X28) 로 구성되어 있는 test data 읽어옴

try:
    test_data = np.loadtxt('./mnist_test.csv', delimiter=',', dtype=np.float32)

    test_input_data = test_data[ : , 1: ]
    test_target_data = test_data[ : , 0 ]

    print("test_data.shape = ", test_data.shape)
    print("test_data[0,0] = ", test_data[0,0], ", len(test_data[0]) = ", len(test_data[0]))

    # measure accuracy
    (acc_ret, false_list) = nn.accuracy(test_input_data, test_target_data)   

    print('Accuracy = ', 100*acc_ret)
    
except Exception as err:
    
    print('Exception occur !!')

test_data.shape =  (10000, 785)
test_data[0,0] =  7.0 , len(test_data[0]) =  785
Accuracy =  94.38
