<a href="https://colab.research.google.com/github/fivetop/python/blob/master/DeepLearning_XOR_Example_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from datetime import datetime 

np.random.seed(0)

In [2]:
def numerical_derivative(f, x):
  delta_x = 1e-4
  grad = np.zeros_like(x)
    
  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
  
  while not it.finished:
      idx = it.multi_index        
      tmp_val = x[idx]
      x[idx] = float(tmp_val) + delta_x
      fx1 = f(x) # f(x+delta_x)
      
      x[idx] = float(tmp_val) - delta_x 
      fx2 = f(x) # f(x-delta_x)
      grad[idx] = (fx1 - fx2) / (2*delta_x)
      
      x[idx] = tmp_val 
      it.iternext()   
      
  return grad

In [3]:

def sigmoid(z):
  return 1/ (1+np.exp(-z))


In [4]:
xor_xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])  
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

print("xor_xdata.shape = ", xor_xdata.shape, ", xor_tdata.shape = ", xor_tdata.shape)


xor_xdata.shape =  (4, 2) , xor_tdata.shape =  (4, 1)


In [6]:
input_nodes = 2
hidden_nodes = 3
output_nodes = 1

W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)

b2 = np.random.rand(hidden_nodes)
b3 = np.random.rand(output_nodes)

print(W2)
print(b2)

print(W3)
print(b3)

[[0.92559664 0.07103606 0.0871293 ]
 [0.0202184  0.83261985 0.77815675]]
[0.46147936 0.78052918 0.11827443]
[[0.87001215]
 [0.97861834]
 [0.79915856]]
[0.63992102]


In [7]:
def loss_func(x, t):
    
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(x, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    y = a3 = sigmoid(z3)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) ) 

    # MSE
    #return np.sum((t-y)**2) / len(x)
    

In [8]:
learning_rate = 1e-2  

f = lambda x : loss_func(xor_xdata, xor_tdata)  

print("Initial loss value = ", loss_func(xor_xdata, xor_tdata) )

start_time = datetime.now()

for step in range(30001):  
    
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)

    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "loss value = ", loss_func(xor_xdata, xor_tdata) )
        
end_time = datetime.now()
        
print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  5.3306531689890555
step =  0 loss value =  5.256152423251162
step =  500 loss value =  2.7725393771912454
step =  1000 loss value =  2.772075003684494
step =  1500 loss value =  2.7715704397692242
step =  2000 loss value =  2.770990268155099
step =  2500 loss value =  2.7702888103739123
step =  3000 loss value =  2.7694035230459946
step =  3500 loss value =  2.768244801127125
step =  4000 loss value =  2.766679796650152
step =  4500 loss value =  2.7645064174871674
step =  5000 loss value =  2.7614115128394627
step =  5500 loss value =  2.756904230920022
step =  6000 loss value =  2.750211329515637
step =  6500 loss value =  2.7401143763877176
step =  7000 loss value =  2.724696357344846
step =  7500 loss value =  2.700957700760961
step =  8000 loss value =  2.664359755467888
step =  8500 loss value =  2.6088842413543665
step =  9000 loss value =  2.529242325445814
step =  9500 loss value =  2.425765897398958
step =  10000 loss value =  2.306340924313462
step =  1

In [9]:
def predict(test_data):
    
    z2 = np.dot(test_data, W2) + b2
    a2 = sigmoid(z2)

    z3 = np.dot(a2, W3) + b3
    y = a3 = sigmoid(z3)
    
    if y > 0.5:
        pred_val = 1
    else:
        pred_val = 0

    return y, pred_val

In [10]:
test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:

    print(predict(input_data))

(array([0.01223362]), 0)
(array([0.99218744]), 1)
(array([0.97960696]), 1)
(array([0.01730854]), 0)
