In [1]:
import numpy as np
from datetime import datetime 

np.random.seed(0)

In [2]:
def numerical_derivative(f, x):
  delta_x = 1e-4
  grad = np.zeros_like(x)
    
  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
  
  while not it.finished:
      idx = it.multi_index        
      tmp_val = x[idx]
      x[idx] = float(tmp_val) + delta_x
      fx1 = f(x) # f(x+delta_x)
      
      x[idx] = float(tmp_val) - delta_x 
      fx2 = f(x) # f(x-delta_x)
      grad[idx] = (fx1 - fx2) / (2*delta_x)
      
      x[idx] = tmp_val 
      it.iternext()   
      
  return grad

In [3]:

def sigmoid(z):
  return 1/ (1+np.exp(-z))


In [4]:
xor_xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])  
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

print("xor_xdata.shape = ", xor_xdata.shape, ", xor_tdata.shape = ", xor_tdata.shape)


xor_xdata.shape =  (4, 2) , xor_tdata.shape =  (4, 1)


In [21]:
input_nodes = 2
hidden_nodes = 2
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)

b2 = np.random.rand(hidden_nodes)
b3 = np.random.rand(output_nodes)

print(W2)
print(b2)

print(W3)
print(b3)

[[0.3595079  0.43703195]
 [0.6976312  0.06022547]]
[0.21038256 0.1289263 ]
[[0.66676672]
 [0.67063787]]
[0.31542835]


In [22]:
def loss_func(x, t):
    
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(x, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    y = a3 = sigmoid(z3)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) ) 

    # MSE
    #return np.sum((t-y)**2) / len(x)
    

In [23]:
learning_rate = 1e-2  

f = lambda x : loss_func(xor_xdata, xor_tdata)  

print("Initial loss value = ", loss_func(xor_xdata, xor_tdata) )

start_time = datetime.now()

for step in range(30001):  
    
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)

    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "loss value = ", loss_func(xor_xdata, xor_tdata) )
        
end_time = datetime.now()
        
print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  3.406589444274438
step =  0 loss value =  3.3865512653007213
step =  500 loss value =  2.770698227628081
step =  1000 loss value =  2.7700756834451017
step =  1500 loss value =  2.76919073697333
step =  2000 loss value =  2.7678959188368037
step =  2500 loss value =  2.7659470684103358
step =  3000 loss value =  2.762937445457649
step =  3500 loss value =  2.7581967402030116
step =  4000 loss value =  2.7506590739407524
step =  4500 loss value =  2.738736575951781
step =  5000 loss value =  2.720256647224606
step =  5500 loss value =  2.6924620134809887
step =  6000 loss value =  2.652052278261218
step =  6500 loss value =  2.5956788753128777
step =  7000 loss value =  2.521763797344208
step =  7500 loss value =  2.432924824108364
step =  8000 loss value =  2.3356852179956533
step =  8500 loss value =  2.236570648854533
step =  9000 loss value =  2.13775406456543
step =  9500 loss value =  2.034474078792777
step =  10000 loss value =  1.9138135727351808
step =  10

In [24]:
def predict(test_data):
    
    z2 = np.dot(test_data, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    y = a3 = sigmoid(z3)
    
    if y > 0.5:
        pred_val = 1
    else:
        pred_val = 0

    return y, pred_val

In [25]:
test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:

    print(predict(input_data))

(array([0.01666301]), 0)
(array([0.98709781]), 1)
(array([0.98711561]), 1)
(array([0.01414677]), 0)


In [27]:
import numpy as np
Y = np.array([0,0,0,1,0])
print(np.argmax(Y))


3
