In [24]:
import numpy as np
def sigmoid(z):
    return 1 / (1+np.exp(-z))

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

In [28]:
# and, or, nand, xor data
x_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])   

and_tdata = np.array([0, 0, 0, 1]).reshape(4,1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4,1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4,1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

# test data
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

In [35]:
import numpy as np
from datetime import datetime

def loss_func(x,t):
    delta = 1e-7
    
    z2 = np.dot(x,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    y= a3 = sigmoid(z3)
    
    return -np.sum(t*np.log(y+delta)+(1-t)*np.log((1-y)+delta))

def predict(test_data):
    z2 = np.dot(test_data,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    y= a3 = sigmoid(z3)
    
    if y >= 0.5:
        result = 1
    else:
        result = 0
    return y,result

def accruracy(test_xdata,test_tdata):
    matched_list = []
    for index in range(len(test_xdata)):
        (real_val, logical_val) = predict(test_xdata[index])
        if logical_val == test_tdata[index]:
            matched_list.append(True)
            
    return len(matched_list) / len(test_xdata)
    
def feed_forward(x,t):
    
    delta = 1e-7
    
    z2 = np.dot(x,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    y= a3 = sigmoid(z3)
    
    return -np.sum(t*np.log(y+delta)+(1-t)*np.log((1-y)+delta))


input_nodes = 2
hidden_nodes = 2
output_nodes = 1

W2 = np.random.rand(input_nodes,hidden_nodes)
b2 = np.random.rand(hidden_nodes)

W3 = np.random.rand(hidden_nodes,output_nodes)
b3 = np.random.rand(output_nodes)

print("W2.shape = ",W2.shape,"b2.shape = ",b2.shape,"W3.shape = ", W3.shape, "b3.shape = ", b3.shape)

learning_rate = 1e-1
iteration_count = 10001
f = lambda x : loss_func(x_data,xor_tdata)
start_time = datetime.now()
for step in range(iteration_count):
    W2 -= learning_rate * numerical_derivative(f,W2)

    b2 -= learning_rate * numerical_derivative(f,b2)

    W3 -= learning_rate * numerical_derivative(f, W3)

    b3 -= learning_rate * numerical_derivative(f, b3)
    if (step % 1000) == 0:
        print("step =",step,",error_rate = ",loss_func(x_data,xor_tdata))
end_time = datetime.now()
print("")
print("time_escape : ", end_time - start_time)

W2.shape =  (2, 2) b2.shape =  (2,) W3.shape =  (2, 1) b3.shape =  (1,)
step = 0 ,error_rate =  3.798017876071282
step = 1000 ,error_rate =  1.840047742764368
step = 2000 ,error_rate =  0.15261758093456781
step = 3000 ,error_rate =  0.060226507538031736
step = 4000 ,error_rate =  0.03710890744549497
step = 5000 ,error_rate =  0.026722064873533347
step = 6000 ,error_rate =  0.020844050894732694
step = 7000 ,error_rate =  0.017070079383409778
step = 8000 ,error_rate =  0.01444495145887896
step = 9000 ,error_rate =  0.01251480209856594
step = 10000 ,error_rate =  0.011036654435727732

time_escape :  0:00:04.091974


In [34]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.0034058] , logical_val =  0
real_val [0.99757929] , logical_val =  1
real_val [0.99758457] , logical_val =  1
real_val [0.00251528] , logical_val =  0


# Hyper-parameter 선택 조건
1. 민감도가 적을 조건 (error_rate)
2. 수행시간이 가장 짧은 조간(performance)
3. 서버의 백엔드에 최적화 시켜야한다. 