In [1]:
import numpy as np
from datetime import datetime

In [2]:
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

def sigmoid(x):
    return 1 / (1+np.exp(-x))

def predict(a1):
    z2 = np.dot(a1,W2) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2,W3) + b3
    a3 = sigmoid(z3)
    
    if a3 >= 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return a3, result

In [3]:
def feed_forward(xdata, tdata):
    delta = 1e-7   
    
    Z2 = np.dot(xdata, W2) + b2    
    A2 = sigmoid(Z2)    
    
    Z3 = np.dot(A2, W3) + b3    
    y = A3 = sigmoid(Z3)    
        
    return -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    

def loss_val(xdata, tdata):
    delta = 1e-7   
    
    Z2 = np.dot(xdata, W2) + b2    
    A2 = sigmoid(Z2)    
    
    Z3 = np.dot(A2, W3) + b3    
    y = A3 = sigmoid(Z3)    
    
    return -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    

In [4]:
xdata = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

and_tdata = np.array([0, 0, 0, 1]).reshape(4, 1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4, 1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4, 1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4, 1)

test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

## AND

In [5]:
input_nodes = 2
hidden_nodes = 10 ## 1개만 해도 돌아간다! 처음부터 너무 높히지 않고 할 것
output_nodes = 1

learning_rate = 1e-1

In [6]:
W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
    
b2 = np.random.rand(hidden_nodes)
b3= np.random.rand(output_nodes)

f = lambda x : feed_forward(xdata, and_tdata)
print('\n=================================================')
print('initial W2 = ', W2)
print('initial b2 = ', b2)
print('initial W3 = ', W3)
print('initial b3 = ', b3)
print('=================================================\n')

print("Initial loss value = ", loss_val(xdata, and_tdata))

start_time = datetime.now()

for step in  range(10001):
    W2 -= learning_rate * numerical_derivative( f, W2 )
    b2 -= learning_rate * numerical_derivative( f, b2 )

    W3 -= learning_rate * numerical_derivative( f, W3 )
    b3 -= learning_rate * numerical_derivative( f, b3 )
    
    if (step % 1000 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, and_tdata))

end_time = datetime.now()
print('\n=================================================')
print('updated W2 = ', W2)
print('updated b2 = ', b2)
print('updated W3 = ', W3)
print('updated b3 = ', b3)
print("Elapsed Time =>", end_time - start_time)
print('=================================================\n')


initial W2 =  [[0.32515441 0.72843125 0.46404357 0.72137295 0.20715038 0.09932945
  0.7807107  0.55779103 0.0099681  0.72727712]
 [0.13498071 0.18332076 0.08532649 0.44800331 0.06837904 0.89611391
  0.6218476  0.6015282  0.82754099 0.72192399]]
initial b2 =  [0.66702733 0.04252655 0.75554459 0.2042471  0.01284572 0.14328067
 0.04778304 0.43951326 0.66359448 0.89478216]
initial W3 =  [[0.71167068]
 [0.44234522]
 [0.75526878]
 [0.01735013]
 [0.10025782]
 [0.24667322]
 [0.25588348]
 [0.59628768]
 [0.83923151]
 [0.66765153]]
initial b3 =  [0.28888271]

Initial loss value =  10.600792781327598
step =  0   , loss value =  6.350934900298324
step =  1000   , loss value =  0.048606246358022936
step =  2000   , loss value =  0.01685666991839603
step =  3000   , loss value =  0.009705958755954718
step =  4000   , loss value =  0.006687842785622596
step =  5000   , loss value =  0.005051669375750723
step =  6000   , loss value =  0.004034539268448234
step =  7000   , loss value =  0.0033449621321

In [7]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val =", real_val, ", logical_val =", logical_val)

real_val = [1.0738198e-06] , logical_val = 0
real_val = [0.00060891] , logical_val = 0
real_val = [0.00054656] , logical_val = 0
real_val = [0.99897224] , logical_val = 1


## OR

In [8]:
input_nodes = 2
hidden_nodes = 1
output_nodes = 1

learning_rate = 1e-1

In [9]:
W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
    
b2 = np.random.rand(hidden_nodes)
b3= np.random.rand(output_nodes)

f = lambda x : feed_forward(xdata, or_tdata)
print('\n=================================================')
print('initial W2 = ', W2)
print('initial b2 = ', b2)
print('initial W3 = ', W3)
print('initial b3 = ', b3)
print('=================================================\n')

print("Initial loss value = ", loss_val(xdata, or_tdata))

start_time = datetime.now()

for step in  range(10001):
    W2 -= learning_rate * numerical_derivative( f, W2 )
    b2 -= learning_rate * numerical_derivative( f, b2 )

    W3 -= learning_rate * numerical_derivative( f, W3 )
    b3 -= learning_rate * numerical_derivative( f, b3 )
    
    if (step % 1000 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, or_tdata))

end_time = datetime.now()
print('\n=================================================')
print('updated W2 = ', W2)
print('updated b2 = ', b2)
print('updated W3 = ', W3)
print('updated b3 = ', b3)
print("Elapsed Time =>", end_time - start_time)
print('=================================================\n')


initial W2 =  [[0.12892789]
 [0.06185661]]
initial b2 =  [0.18558689]
initial W3 =  [[0.87071937]]
initial b3 =  [0.57008824]

Initial loss value =  2.229420846210183
step =  0   , loss value =  2.226915543498592
step =  1000   , loss value =  0.07371443534849616
step =  2000   , loss value =  0.031132268626958993
step =  3000   , loss value =  0.0195459324209734
step =  4000   , loss value =  0.014204203634038796
step =  5000   , loss value =  0.011141419473205412
step =  6000   , loss value =  0.009158913612122815
step =  7000   , loss value =  0.0077721265483528
step =  8000   , loss value =  0.006748216018061079
step =  9000   , loss value =  0.005961537038329693
step =  10000   , loss value =  0.00533837333564003

updated W2 =  [[6.46380926]
 [6.46375818]]
updated b2 =  [-3.43851261]
updated W3 =  [[13.88713109]]
updated b3 =  [-6.18511995]
Elapsed Time => 0:00:02.450452



In [10]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val =", real_val, ", logical_val =", logical_val)

real_val = [0.00316307] , logical_val = 0
real_val = [0.99914115] , logical_val = 1
real_val = [0.99914118] , logical_val = 1
real_val = [0.99954781] , logical_val = 1


## NAND

In [11]:
input_nodes = 2
hidden_nodes = 1
output_nodes = 1

learning_rate = 1e-1

In [12]:
W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
    
b2 = np.random.rand(hidden_nodes)
b3= np.random.rand(output_nodes)

f = lambda x : feed_forward(xdata, nand_tdata)
print('\n=================================================')
print('initial W2 = ', W2)
print('initial b2 = ', b2)
print('initial W3 = ', W3)
print('initial b3 = ', b3)
print('=================================================\n')

print("Initial loss value = ", loss_val(xdata, nand_tdata))

start_time = datetime.now()

for step in  range(10001):
    W2 -= learning_rate * numerical_derivative( f, W2 )
    b2 -= learning_rate * numerical_derivative( f, b2 )

    W3 -= learning_rate * numerical_derivative( f, W3 )
    b3 -= learning_rate * numerical_derivative( f, b3 )
    
    if (step % 1000 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, nand_tdata))

end_time = datetime.now()
print('\n=================================================')
print('updated W2 = ', W2)
print('updated b2 = ', b2)
print('updated W3 = ', W3)
print('updated b3 = ', b3)
print("Elapsed Time =>", end_time - start_time)
print('=================================================\n')


initial W2 =  [[0.76802097]
 [0.48707409]]
initial b2 =  [0.95854334]
initial W3 =  [[0.61355864]]
initial b3 =  [0.61279517]

Initial loss value =  2.2996413678320007
step =  0   , loss value =  2.2986004685005286
step =  1000   , loss value =  0.1591288506036078
step =  2000   , loss value =  0.04315612215311235
step =  3000   , loss value =  0.024271344979611968
step =  4000   , loss value =  0.016770723683756435
step =  5000   , loss value =  0.012775995191696607
step =  6000   , loss value =  0.010303209151067683
step =  7000   , loss value =  0.00862488018150235
step =  8000   , loss value =  0.007412507197022055
step =  9000   , loss value =  0.0064963868469077365
step =  10000   , loss value =  0.005780133658227427

updated W2 =  [[5.46269573]
 [5.46269568]]
updated b2 =  [-8.0938086]
updated W3 =  [[-14.35346033]]
updated b3 =  [7.51608078]
Elapsed Time => 0:00:02.416570



In [13]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val =", real_val, ", logical_val =", logical_val)

real_val = [0.99945365] , logical_val = 1
real_val = [0.99857487] , logical_val = 1
real_val = [0.99857487] , logical_val = 1
real_val = [0.0023789] , logical_val = 0


## XOR

In [20]:
input_nodes = 2
hidden_nodes = 2 ## XOR는 1개로는 안 됨!
output_nodes = 1

learning_rate = 1e-1

In [21]:
W2 = np.random.rand(input_nodes, hidden_nodes)
W3 = np.random.rand(hidden_nodes, output_nodes)
    
b2 = np.random.rand(hidden_nodes)
b3= np.random.rand(output_nodes)

f = lambda x : feed_forward(xdata, xor_tdata)
print('\n=================================================')
print('initial W2 = ', W2)
print('initial b2 = ', b2)
print('initial W3 = ', W3)
print('initial b3 = ', b3)
print('=================================================\n')

print("Initial loss value = ", loss_val(xdata, xor_tdata))

start_time = datetime.now()

for step in  range(10001):
    W2 -= learning_rate * numerical_derivative( f, W2 )
    b2 -= learning_rate * numerical_derivative( f, b2 )

    W3 -= learning_rate * numerical_derivative( f, W3 )
    b3 -= learning_rate * numerical_derivative( f, b3 )
    
    if (step % 1000 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))

end_time = datetime.now()
print('\n=================================================')
print('updated W2 = ', W2)
print('updated b2 = ', b2)
print('updated W3 = ', W3)
print('updated b3 = ', b3)
print("Elapsed Time => ", end_time - start_time)
print('=================================================\n')


initial W2 =  [[0.91897146 0.45605571]
 [0.47252958 0.5544793 ]]
initial b2 =  [0.26203599 0.15811028]
initial W3 =  [[0.07727417]
 [0.50154148]]
initial b3 =  [0.01297816]

Initial loss value =  2.8460485881594924
step =  0   , loss value =  2.8210940679995287
step =  1000   , loss value =  1.9211284153596133
step =  2000   , loss value =  0.13147563597444828
step =  3000   , loss value =  0.057040945288361636
step =  4000   , loss value =  0.03599097299900228
step =  5000   , loss value =  0.026186122370007676
step =  6000   , loss value =  0.02054170602308346
step =  7000   , loss value =  0.016881634694312066
step =  8000   , loss value =  0.014319353973096003
step =  9000   , loss value =  0.012426985668125725
step =  10000   , loss value =  0.010973025968701346

updated W2 =  [[5.66686066 7.46496636]
 [5.66546679 7.45754923]]
updated b2 =  [-8.66449307 -3.41660736]
updated W3 =  [[-13.73188689]
 [ 12.95009274]]
updated b3 =  [-6.0719766]
Elapsed Time =>  0:00:04.430188



In [22]:
for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val =", real_val, ", logical_val =", logical_val)

real_val = [0.00346081] , logical_val = 0
real_val = [0.9975336] , logical_val = 1
real_val = [0.99753546] , logical_val = 1
real_val = [0.00256629] , logical_val = 0


은닉층 노드(hidden_node) 개수를 늘리면 정확도는 높아지나(loss_value 감소) 최종 결과 값이 동일한 경우 개수가 적은 것을 선택하는 것이 좋다.

ex) 2개 3만번 돌리면 14초 정도, 8개 3만번 돌리면 47초 정도.