## numerical_derivative, sigmoid 함수 정의

In [1]:
import numpy as np
from datetime import datetime

np.random.seed(0)

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = float(tmp_val) - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

In [2]:
# 최종출력은 y = sigmoid(Wx+b) 이며, 손실함수는 cross-entropy 로 나타냄

def sigmoid(z):
    return 1 / (1+np.exp(-z))

## data definition

In [3]:
xor_xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])  
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

print("xor_xdata.shape = ", xor_xdata.shape, ", xor_tdata.shape = ", xor_tdata.shape)

xor_xdata.shape =  (4, 2) , xor_tdata.shape =  (4, 1)


## initialize weights and bias

In [4]:
input_nodes = 2
hidden_nodes_1 = 2
hidden_nodes_2 = 3
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes_1)  
W3 = np.random.rand(hidden_nodes_1, hidden_nodes_2)  
W4 = np.random.rand(hidden_nodes_2, output_nodes)  

b2 = np.random.rand(hidden_nodes_1)   
b3 = np.random.rand(hidden_nodes_2)
b4 = np.random.rand(output_nodes)

print(W2, b2)
print(W3, b3)
print(W4, b4)

[[0.5488135  0.71518937]
 [0.60276338 0.54488318]] [0.92559664 0.07103606]
[[0.4236548  0.64589411 0.43758721]
 [0.891773   0.96366276 0.38344152]] [0.0871293  0.0202184  0.83261985]
[[0.79172504]
 [0.52889492]
 [0.56804456]] [0.77815675]


## define loss function and output, y

In [5]:
def loss_func(x, t):
    
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(x, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    a3 = sigmoid(z3)
    
    z4 = np.dot(a3, W4) + b4
    y = a4 = sigmoid(z4)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) ) 

    # MSE
    #return np.sum((t-y)**2) / len(x)

## XOR learning

In [6]:
learning_rate = 1e-1  # 1e-2 에서 변경한 부분

f = lambda x : loss_func(xor_xdata, xor_tdata)  

print("Initial loss value = ", loss_func(xor_xdata, xor_tdata) )

start_time = datetime.now()

for step in range(30001):  
    
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)

    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    W4 -= learning_rate * numerical_derivative(f, W4)
    
    b4 -= learning_rate * numerical_derivative(f, b4)
    
    
    
    if (step % 500 == 0):
        print("step = ", step, "loss value = ", loss_func(xor_xdata, xor_tdata) )
        
end_time = datetime.now()
        
print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  4.849305410095795
step =  0 loss value =  4.1777115118820705
step =  500 loss value =  2.770941283666491
step =  1000 loss value =  2.7664802420290338
step =  1500 loss value =  2.7413533688564256
step =  2000 loss value =  2.3646538550098035
step =  2500 loss value =  1.7515501482281102
step =  3000 loss value =  0.07508785553454016
step =  3500 loss value =  0.02897916600148879
step =  4000 loss value =  0.01732945166866684
step =  4500 loss value =  0.0121942045507772
step =  5000 loss value =  0.009341273402909286
step =  5500 loss value =  0.007538553967937851
step =  6000 loss value =  0.006301679173425055
step =  6500 loss value =  0.005403004252331794
step =  7000 loss value =  0.004721922866118095
step =  7500 loss value =  0.00418877132590911
step =  8000 loss value =  0.0037605897737936662
step =  8500 loss value =  0.0034094897494881186
step =  9000 loss value =  0.0031166011009082107
step =  9500 loss value =  0.002868712383417655
step =  10000 loss v

## evaluate and predict

In [7]:
# 학습을 마친 후, 임의의 데이터에 대해 미래 값 예측 함수
# 입력변수 test_data : numpy type
def predict(test_data):
    
    z2 = np.dot(test_data, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    a3 = sigmoid(z3)
    
    z4 = np.dot(a3, W4) + b4
    y = a4 = sigmoid(z4)
    
    if y > 0.5:
        pred_val = 1
    else:
        pred_val = 0

    return y, pred_val

In [8]:
test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:

    print(predict(input_data))

(array([0.00019022]), 0)
(array([0.99986065]), 1)
(array([0.99986062]), 1)
(array([0.00017602]), 0)
