In [1]:
import numpy as np

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad


# 최종출력은 y = sigmoid(Wx+b) 이며, 손실함수는 cross-entropy 로 나타냄

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])   

tdata = np.array([0, 1, 1, 0]).reshape(4,1)

# test data
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

In [3]:
W = np.random.rand(2,1)  
b = np.random.rand(1)  

print("W = ", W, ", W.shape = ", W.shape, ", b = ", b, ", b.shape = ", b.shape)

W =  [[0.73657622]
 [0.438352  ]] , W.shape =  (2, 1) , b =  [0.33801134] , b.shape =  (1,)


In [4]:
def loss_func(x, t):
    
    delta = 1e-7
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) ) 

# 손실함수 값 계산 함수
# 입력변수 x, t : numpy type
def error_val(x, t):
    delta = 1e-7    
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) ) 

# 학습을 마친 후, 임의의 데이터에 대해 미래 값 예측 함수
# 입력변수 x : numpy type
def predict(x):
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    if y >= 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return y, result

In [5]:
# xor gate 검증
learning_rate = 1e-3  

f = lambda x : loss_func(xdata, tdata)  

print("Initial error value = ", error_val(xdata, tdata))

for step in  range(30001):  
    
    W -= learning_rate * numerical_derivative(f, W)
    
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step = ", step, "error value = ", error_val(xdata, tdata))

Initial error value =  3.2606016919249816
step =  0 error value =  3.2593343041160634
step =  400 error value =  2.938560162447528
step =  800 error value =  2.831351193576436
step =  1200 error value =  2.798044383128007
step =  1600 error value =  2.7873250456858982
step =  2000 error value =  2.783293069092736
step =  2400 error value =  2.781297465563989
step =  2800 error value =  2.7799934680244127
step =  3200 error value =  2.778984396098998
step =  3600 error value =  2.7781444094721444
step =  4000 error value =  2.7774260913824453
step =  4400 error value =  2.776805841058068
step =  4800 error value =  2.7762682368852407
step =  5200 error value =  2.7758014167455176
step =  5600 error value =  2.775395591363493
step =  6000 error value =  2.7750424672284835
step =  6400 error value =  2.774734949608994
step =  6800 error value =  2.774466944955148
step =  7200 error value =  2.7742332086618084
step =  7600 error value =  2.7740292202620185
step =  8000 error value =  2.773

In [6]:
# xor 검증
print("XOR Gate verification")

for input_data in test_data:
    (real_val, logical_val) = predict(input_data)
    print("real_val", real_val, ", logical_val = ", logical_val)

XOR Gate verification
real_val [0.49938269] , logical_val =  0
real_val [0.49988223] , logical_val =  0
real_val [0.49992411] , logical_val =  0
real_val [0.50042366] , logical_val =  1
