### [예제 4] 은닉층 3개 오차역전파 예제

In [1]:
import numpy as np
from datetime import datetime

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
# NeuralNetwork Class

class NeuralNetwork:
    
    # 생성자
    def __init__(self, input_nodes, hidden_nodes_1, hidden_nodes_2, hidden_nodes_3, output_nodes, learning_rate):
        
        self.input_nodes = input_nodes
        self.hidden_nodes_1 = hidden_nodes_1
        self.hidden_nodes_2 = hidden_nodes_2
        self.hidden_nodes_3 = hidden_nodes_3
        self.output_nodes = output_nodes
        
        ############################### 가중치 / 바이어스 초기화 #############################################
        # 2층 hidden layer unit 
        # Xavier/He 방법으로 self.W2 가중치 초기화
        self.W2 = np.random.randn(self.input_nodes, self.hidden_nodes_1) / np.sqrt(self.input_nodes/2)
        self.b2 = np.random.rand(self.hidden_nodes_1)
        
        # 3층 hidden layer unit 
        # Xavier/He 방법으로 self.W3 가중치 초기화
        self.W3 = np.random.randn(self.hidden_nodes_1, self.hidden_nodes_2) / np.sqrt(self.hidden_nodes_1/2)
        self.b3= np.random.rand(self.hidden_nodes_2)
        
        # 4층 hidden layer unit 
        # Xavier/He 방법으로 self.W4 가중치 초기화
        self.W4 = np.random.randn(self.hidden_nodes_2, self.hidden_nodes_3) / np.sqrt(self.hidden_nodes_2/2)
        self.b4 = np.random.rand(self.hidden_nodes_3)
        
        # 5층 output layer unit 
        # Xavier/He 방법으로 self.W4 가중치 초기화
        self.W5 = np.random.randn(self.hidden_nodes_3, self.output_nodes) / np.sqrt(self.hidden_nodes_3/2)
        self.b5 = np.random.rand(self.output_nodes)
                          
        ############################### 선형회귀 Z / 출력 A 초기화 ##########################################
        # 5층 output layer 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z5 = np.zeros([1,output_nodes])
        self.A5 = np.zeros([1,output_nodes])
        
        # 4층 hidden layer 3 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z4 = np.zeros([1,hidden_nodes_3])
        self.A4 = np.zeros([1,hidden_nodes_3])
        
        # 3층 hidden layer 2 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z3 = np.zeros([1,hidden_nodes_2])
        self.A3 = np.zeros([1,hidden_nodes_2])
        
        # 2층 hidden layer 가중합 z, 출력 a 정의 (모두 행렬로 표시)
        self.Z2 = np.zeros([1,hidden_nodes_1])
        self.A2 = np.zeros([1,hidden_nodes_1])
        
        # 1층 input layer 출력 a 정의 (모두 행렬로 표시)
        self.Z1 = np.zeros([1,input_nodes])    
        self.A1 = np.zeros([1,input_nodes])       
        
        # 학습률 learning rate 초기화
        self.learning_rate = learning_rate
        
    # 손실함수
    def feed_forward(self):
        
        delta = 1e-7    # log 무한대 발산 방지
        
        # 1층 출력 계산, 가중합과 출력은 입력 값과 동일함
        self.Z1 = self.input_data
        self.A1 = self.input_data
        
        # 2층 가중합, 출력 계산    
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = sigmoid(self.Z2)
        
        # 3층 가중합, 출력 계산    
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = sigmoid(self.Z3)
        
        # 4층 가중합 , 출력 계산
        self.Z4 = np.dot(self.A3, self.W4) + self.b4
        self.A4 = sigmoid(self.Z4)
        
        # 5층 가중합 , 출력 계산
        self.Z5 = np.dot(self.A4, self.W5) + self.b5
        y = self.A5 = sigmoid(self.Z5)        
        
        # cross-entropy 
        return  -np.sum( self.target_data*np.log(y + delta) + (1-self.target_data)*np.log((1 - y)+delta ) )

    
    # 손실 값 계산
    def loss_val(self):
        
        delta = 1e-7    # log 무한대 발산 방지
        
        # 1층 출력 계산, 가중합과 출력은 입력 값과 동일함
        self.Z1 = self.input_data
        self.A1 = self.input_data
        
        # 2층 가중합, 출력 계산    
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = sigmoid(self.Z2)
        
        # 3층 가중합, 출력 계산    
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = sigmoid(self.Z3)
        
        # 4층 가중합 , 출력 계산
        self.Z4 = np.dot(self.A3, self.W4) + self.b4
        self.A4 = sigmoid(self.Z4)
        
        # 5층 가중합 , 출력 계산
        self.Z5 = np.dot(self.A4, self.W5) + self.b5
        y = self.A5 = sigmoid(self.Z5)        
        
        # cross-entropy 
        return  -np.sum( self.target_data*np.log(y + delta) + (1-self.target_data)*np.log((1 - y)+delta ) )
    
    # query, 즉 미래 값 예측 함수
    def predict(self, input_data):
        
        # input_data 는 행렬로 입력됨 즉, (1, 784) shape 을 가짐
        Z2 = np.dot(input_data, self.W2) + self.b2
        A2 = sigmoid(Z2)
        
        Z3 = np.dot(A2, self.W3) + self.b3
        A3 = sigmoid(Z3)
        
        Z4 = np.dot(A3, self.W4) + self.b4
        A4 = sigmoid(Z4)
        
        Z5 = np.dot(A4, self.W5) + self.b5
        y = A5 = sigmoid(Z5)
        
        predicted_num = np.argmax(y)
    
        return predicted_num
    

    # 정확도 측정함수
    def accuracy(self, test_input_data, test_target_data):
        
        matched_list = []
        not_matched_list = []
        
        for index in range(len(test_input_data)):
                        
            label = int(test_target_data[index])
                        
            # one-hot encoding을 위한 데이터 정규화 (data normalize)
            data = (test_input_data[index] / 255.0 * 0.99) + 0.01
                  
            # predict 를 위해서 vector 을 matrix 로 변환하여 인수로 넘겨줌
            predicted_num = self.predict(np.array(data, ndmin=2)) 
        
            if label == predicted_num:
                matched_list.append(index)
                
            else:
                
                not_matched_list.append(index)
                
        accuracy_val = (len(matched_list)/(len(test_input_data)))
        
        return accuracy_val, not_matched_list
    
            
    # input_data : 784 개,  target_data : 10개
    def train(self, input_data, target_data):  
        
        self.target_data = target_data    
        self.input_data = input_data
        
        # 먼저 feed forward 를 통해서 최종 출력값과 이를 바탕으로 현재의 에러 값 계산
        loss_val = self.feed_forward()      
        
        # 출력층 loss 인 loss_5, 가중치 W5, 바이어스 b5 계산
        loss_5 = (self.A5-self.target_data) * self.A5 * (1-self.A5)    

        W5_diff = np.dot(self.A4.T, loss_5)
        b5_diff = loss_5
        
        self.W5 = self.W5 - self.learning_rate * W5_diff        
        self.b5 = self.b5 - self.learning_rate * b5_diff
        
        # 은닉층 3 loss 인 loss_4, 가중치 W4, 바이어스 b4 계산
        loss_4 = np.dot(loss_5, self.W5.T) * self.A4 * (1-self.A4)    
        
        W4_diff = np.dot(self.A3.T, loss_4)
        b4_diff = loss_4
        
        self.W4 = self.W4 - self.learning_rate * W4_diff        
        self.b4 = self.b4 - self.learning_rate * b4_diff
                
        # 은닉층 2 loss 인 loss_3, 가중치 W3, 바이어스 b3 계산
        loss_3 = np.dot(loss_4, self.W4.T) * self.A3 * (1-self.A3)
        
        W3_diff = np.dot(self.A2.T, loss_3)
        b3_diff = loss_3
        
        self.W3 = self.W3 - self.learning_rate * W3_diff        
        self.b3 = self.b3 - self.learning_rate * b3_diff              
        
        # 은닉층 1 loss 인 loss_2,  가중치 W2, 바이어스 b2 계산
        loss_2 = np.dot(loss_3, self.W3.T) * self.A2 * (1-self.A2)   
        
        W2_diff = np.dot(self.A1.T, loss_2)
        b2_diff = loss_2
                
        self.W2 = self.W2 - self.learning_rate * W2_diff
        
        self.b2 = self.b2 - self.learning_rate * b2_diff

In [3]:
# 0~9 숫자 이미지가 784개의 숫자 (28X28) 로 구성되어 있는 training data 읽어옴

try:
    
    training_data = np.loadtxt('./mnist_train.csv', delimiter=',', dtype=np.float32)

    print("training_data.shape = ", training_data.shape)
    print("training_data[0,0] = ", training_data[0,0], ", len(training_data[0]) = ", len(training_data[0]))
    
except Exception as err:
    
    print('Exception occur !!')

training_data.shape =  (60000, 785)
training_data[0,0] =  5.0 , len(training_data[0]) =  785


#### 784 X 20 X 20 X 20 X 10 테스트

In [4]:
input_nodes = 784

hidden_nodes_1 = 20
hidden_nodes_2 = 20
hidden_nodes_3 = 20

output_nodes = 10
learning_rate = 1e-1
epochs = 2

nn = NeuralNetwork(input_nodes, hidden_nodes_1, hidden_nodes_2, hidden_nodes_3, output_nodes, learning_rate)

start_time = datetime.now()

for i in range(epochs):
    
    for step in range(len(training_data)):  # train
    
        # input_data, target_data normalize
        
        target_data = np.zeros(output_nodes) + 0.01    
        target_data[int(training_data[step, 0])] = 0.99
        #target_data = training_data[step, 0]
    
        input_data = ((training_data[step, 1:] / 255.0) * 0.99) + 0.01
        
    
        nn.train( np.array(input_data, ndmin=2), np.array(target_data, ndmin=2) )
    
        if step % 1000 == 0:
            print("epochs = ", i, ", step = ", step,  ",  loss_val = ", nn.loss_val())
        
end_time = datetime.now() 
print("\nelapsed time = ", end_time - start_time) 

epochs =  0 , step =  0 ,  loss_val =  8.431200682244604
epochs =  0 , step =  1000 ,  loss_val =  3.382768695667797
epochs =  0 , step =  2000 ,  loss_val =  3.264922432675478
epochs =  0 , step =  3000 ,  loss_val =  3.5974743316532987
epochs =  0 , step =  4000 ,  loss_val =  3.3514511205545254
epochs =  0 , step =  5000 ,  loss_val =  3.0415297400665455
epochs =  0 , step =  6000 ,  loss_val =  2.6743809906110165
epochs =  0 , step =  7000 ,  loss_val =  3.169885951123589
epochs =  0 , step =  8000 ,  loss_val =  2.977585053194304
epochs =  0 , step =  9000 ,  loss_val =  2.5195200962656505
epochs =  0 , step =  10000 ,  loss_val =  2.46591096696681
epochs =  0 , step =  11000 ,  loss_val =  1.4164608190286647
epochs =  0 , step =  12000 ,  loss_val =  2.8636427910953715
epochs =  0 , step =  13000 ,  loss_val =  2.1813634533853885
epochs =  0 , step =  14000 ,  loss_val =  1.0277952789961116
epochs =  0 , step =  15000 ,  loss_val =  2.282683674574327
epochs =  0 , step =  16000 ,

In [6]:
# 0~9 숫자 이미지가 784개의 숫자 (28X28) 로 구성되어 있는 test data 읽어옴

try:
    test_data = np.loadtxt('./mnist_test.csv', delimiter=',', dtype=np.float32)

    test_input_data = test_data[ : , 1: ]
    test_target_data = test_data[ : , 0 ]

    print("test_data.shape = ", test_data.shape)
    print("test_data[0,0] = ", test_data[0,0], ", len(test_data[0]) = ", len(test_data[0]))

    # measure accuracy
    (acc_ret, false_list) = nn.accuracy(test_input_data, test_target_data)   

    print('Accuracy = ', 100*acc_ret)
    
except Exception as err:
    
    print('Exception occur !!')

test_data.shape =  (10000, 785)
test_data[0,0] =  7.0 , len(test_data[0]) =  785
Accuracy =  92.27


#### 784 X 10 X 20 X 10 X 10 테스트

In [7]:
input_nodes = 784

hidden_nodes_1 = 10
hidden_nodes_2 = 20
hidden_nodes_3 = 10

output_nodes = 10
learning_rate = 0.1
epochs = 5

nn = NeuralNetwork(input_nodes, hidden_nodes_1, hidden_nodes_2, hidden_nodes_3, output_nodes, learning_rate)

start_time = datetime.now()

for i in range(epochs):
    
    for step in range(len(training_data)):  # train
    
        # input_data, target_data normalize
        
        target_data = np.zeros(output_nodes) + 0.01    
        target_data[int(training_data[step, 0])] = 0.99
        #target_data = training_data[step, 0]
    
        input_data = ((training_data[step, 1:] / 255.0) * 0.99) + 0.01
        #input_data = training_data[step, 1:]
    
        nn.train( np.array(input_data, ndmin=2), np.array(target_data, ndmin=2) )
    
        if step % 1000 == 0:
            print("epochs = ", i, ", step = ", step,  ",  loss_val = ", nn.loss_val())
        
end_time = datetime.now() 
print("\nelapsed time = ", end_time - start_time) 

epochs =  0 , step =  0 ,  loss_val =  10.778385081214399
epochs =  0 , step =  1000 ,  loss_val =  3.408243216027067
epochs =  0 , step =  2000 ,  loss_val =  3.37095294829591
epochs =  0 , step =  3000 ,  loss_val =  3.564574155159197
epochs =  0 , step =  4000 ,  loss_val =  3.316735788554302
epochs =  0 , step =  5000 ,  loss_val =  3.173098888821519
epochs =  0 , step =  6000 ,  loss_val =  2.7845937899217983
epochs =  0 , step =  7000 ,  loss_val =  3.0974395507873633
epochs =  0 , step =  8000 ,  loss_val =  1.9021092288920942
epochs =  0 , step =  9000 ,  loss_val =  2.3358044295590408
epochs =  0 , step =  10000 ,  loss_val =  3.007998174483781
epochs =  0 , step =  11000 ,  loss_val =  1.8830062174621616
epochs =  0 , step =  12000 ,  loss_val =  3.008656810140917
epochs =  0 , step =  13000 ,  loss_val =  2.6069398957639995
epochs =  0 , step =  14000 ,  loss_val =  1.5511009660398822
epochs =  0 , step =  15000 ,  loss_val =  2.8773584669478964
epochs =  0 , step =  16000 ,

epochs =  2 , step =  14000 ,  loss_val =  0.7951930827894477
epochs =  2 , step =  15000 ,  loss_val =  0.7973689298448664
epochs =  2 , step =  16000 ,  loss_val =  0.8869049688931765
epochs =  2 , step =  17000 ,  loss_val =  0.8504232324373351
epochs =  2 , step =  18000 ,  loss_val =  0.8287922960337555
epochs =  2 , step =  19000 ,  loss_val =  0.8837664930545086
epochs =  2 , step =  20000 ,  loss_val =  0.7624598323234433
epochs =  2 , step =  21000 ,  loss_val =  1.0589790966742558
epochs =  2 , step =  22000 ,  loss_val =  0.8085068048278632
epochs =  2 , step =  23000 ,  loss_val =  0.8559062478554056
epochs =  2 , step =  24000 ,  loss_val =  0.8675457292953248
epochs =  2 , step =  25000 ,  loss_val =  0.8521968816754706
epochs =  2 , step =  26000 ,  loss_val =  0.809945363786235
epochs =  2 , step =  27000 ,  loss_val =  0.9490488243406503
epochs =  2 , step =  28000 ,  loss_val =  0.8196620464985863
epochs =  2 , step =  29000 ,  loss_val =  0.8415827403252578
epochs = 

epochs =  4 , step =  27000 ,  loss_val =  0.9105540119368509
epochs =  4 , step =  28000 ,  loss_val =  0.8252078320521992
epochs =  4 , step =  29000 ,  loss_val =  0.805208762214278
epochs =  4 , step =  30000 ,  loss_val =  0.7666157518907013
epochs =  4 , step =  31000 ,  loss_val =  1.0199110820456827
epochs =  4 , step =  32000 ,  loss_val =  1.1255222564530476
epochs =  4 , step =  33000 ,  loss_val =  0.7890343205136221
epochs =  4 , step =  34000 ,  loss_val =  0.830066175376537
epochs =  4 , step =  35000 ,  loss_val =  0.8140420681422329
epochs =  4 , step =  36000 ,  loss_val =  0.8640420539102736
epochs =  4 , step =  37000 ,  loss_val =  0.7896752734674303
epochs =  4 , step =  38000 ,  loss_val =  0.8194089156774156
epochs =  4 , step =  39000 ,  loss_val =  6.269967682692098
epochs =  4 , step =  40000 ,  loss_val =  0.8768217104817292
epochs =  4 , step =  41000 ,  loss_val =  0.80206425851308
epochs =  4 , step =  42000 ,  loss_val =  0.8238475655954681
epochs =  4 ,

In [5]:
# 0~9 숫자 이미지가 784개의 숫자 (28X28) 로 구성되어 있는 test data 읽어옴

try:
    test_data = np.loadtxt('./mnist_test.csv', delimiter=',', dtype=np.float32)

    test_input_data = test_data[ : , 1: ]
    test_target_data = test_data[ : , 0 ]

    print("test_data.shape = ", test_data.shape)
    print("test_data[0,0] = ", test_data[0,0], ", len(test_data[0]) = ", len(test_data[0]))

    # measure accuracy
    (acc_ret, false_list) = nn.accuracy(test_input_data, test_target_data)   

    print('Accuracy = ', 100*acc_ret)
    
except Exception as err:
    
    print('Exception occur !!')

test_data.shape =  (10000, 785)
test_data[0,0] =  7.0 , len(test_data[0]) =  785
Accuracy =  93.67999999999999
