In [1]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

In [2]:
# and, or, nand, xor data
x_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])   

and_tdata = np.array([0, 0, 0, 1]).reshape(4,1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4,1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4,1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

# test data
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

In [5]:
import numpy as np
from datetime import datetime

def loss_func(x,t):
    delta = 1e-7
    
    z2 = np.dot(x,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    y = a3 = sigmoid(z3)
    
    return -np.sum(t*np.log(y+delta)+(1-t)*np.log(1-y+delta))

def feed_f0orward(x,t):
    delta = 1e-7
    
    z2 = np.dot(x,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    y = a3 = sigmoid(z3)
    
    return -np.sum(t*np.log(y+delta)+(1-t)*np.log(1-y+delta))

def predict(test_data):
    z2 = np.dot(test_data,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    y = a3 = sigmoid(z3)
    
    if y >= 0.5:
        result = 1
    else:
        result = 0
    
    return y, result

def accuracy(test_xdata,test_tdata):
    matched_list = []
    for index in range(len(test_xdata)):
        (real_val, logical_val) = predict(test_xdata[index])
        if logical_val == test_tdata[index]:
            matched_list.append(True)
            
    return len(matched_list) / len(test_xdata)


In [6]:
input_nodes = 2
hidden_nodes = 2
output_nodes = 1

W2 = np.random.rand(input_nodes,hidden_nodes)
b2 = np.random.rand(hidden_nodes)

W3 = np.random.rand(hidden_nodes,output_nodes)
b3 = np.random.rand(output_nodes)

learning_rate = 1e-1
iteration_count = 10001
f = lambda x : loss_func(x_data,xor_tdata)
start_time = datetime.now()
for step in range(iteration_count):
    W2 -= learning_rate * numerical_derivative(f,W2)
    b2 -= learning_rate * numerical_derivative(f,b2)
    
    W3 -= learning_rate * numerical_derivative(f,W3)
    b3 -= learning_rate * numerical_derivative(f,b3)
    
    if (step % 400) == 0:
        print("step = ", step, "error_rate = ", loss_func(x_data,xor_tdata))
end_time = datetime.now()
print("")
print("time-escaped : ", end_time - start_time)

step =  0 error_rate =  3.76867353833238
step =  400 error_rate =  2.651188068244778
step =  800 error_rate =  2.251944896810029
step =  1200 error_rate =  2.059977231123156
step =  1600 error_rate =  1.9960374262639076
step =  2000 error_rate =  1.9670528499021156
step =  2400 error_rate =  1.9508355693088129
step =  2800 error_rate =  1.9402728316538091
step =  3200 error_rate =  1.9321882399282566
step =  3600 error_rate =  1.9238120734109978
step =  4000 error_rate =  1.9012179870471981
step =  4400 error_rate =  0.5582926112841422
step =  4800 error_rate =  0.1621916565992422
step =  5200 error_rate =  0.09272297138119875
step =  5600 error_rate =  0.06536141832204585
step =  6000 error_rate =  0.050684297560645214
step =  6400 error_rate =  0.041484260126383135
step =  6800 error_rate =  0.035154568418120306
step =  7200 error_rate =  0.030522124003827134
step =  7600 error_rate =  0.026979413680345638
step =  8000 error_rate =  0.024179448333092494
step =  8400 error_rate =  0.0

In [7]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val = ", real_val, ", logical_val = ", logical_val)

real_val =  [0.00460903] , logical_val =  0
real_val =  [0.99637782] , logical_val =  1
real_val =  [0.99637781] , logical_val =  1
real_val =  [0.00404977] , logical_val =  0
