### [예제 2] 은닉층 1개를 가지는 신경망에서 AND / OR / NAND / XOR 검증
#### 은닉층 노드 1개여도 AND, OR, NAND 검증 OK, 그러나 XOR은 은닉층노드 1개로는 검증 Not OK

In [1]:
import numpy as np
from datetime import datetime

# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = float(tmp_val) - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
# feed forward
def feed_forward(xdata, tdata):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(xdata, W2) + b2  # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    # cross-entropy 
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    



# loss val
def loss_val(xdata, tdata):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(xdata, W2) + b2  # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    # cross-entropy 
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    

In [3]:
# query, 즉 미래 값 예측 함수
def predict(xdata):
        
    z2 = np.dot(xdata, W2) + b2         # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    if y >= 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return y, result

#### 입력데이터, 정답데이터 정의 

In [4]:
# and, or, nand, xor data
xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])   

and_tdata = np.array([0, 0, 0, 1]).reshape(4,1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4,1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4,1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

# test data
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

#### AND 학습 (은닉노드 10개)

In [5]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 10  # 은닉노드 10개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# and verification
f = lambda x : feed_forward(xdata, and_tdata)
        
print("Initial loss value = ", loss_val(xdata, and_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, and_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  14.764733659738724
step =  0   , loss value =  9.534989800095264
step =  500   , loss value =  0.177360347591258
step =  1000   , loss value =  0.04275661817988801
step =  1500   , loss value =  0.02184869437797192
step =  2000   , loss value =  0.014189439289916003
step =  2500   , loss value =  0.010341699719376908
step =  3000   , loss value =  0.008062582645565258
step =  3500   , loss value =  0.00656866068965488
step =  4000   , loss value =  0.005519861146205362
step =  4500   , loss value =  0.004746153213875925
step =  5000   , loss value =  0.004153601120727699
step =  5500   , loss value =  0.0036862982965532713
step =  6000   , loss value =  0.003308994046768884
step =  6500   , loss value =  0.002998412678784674
step =  7000   , loss value =  0.002738594838904345
step =  7500   , loss value =  0.0025182484680734097
step =  8000   , loss value =  0.002329168594712055
step =  8500   , loss value =  0.0021652559310325406
step =  9000   , loss value =  0.

In [6]:
# and prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [1.12238531e-07] , logical_val =  0
real_val [0.0004548] , logical_val =  0
real_val [0.00044904] , logical_val =  0
real_val [0.99912086] , logical_val =  1


#### AND 학습 (은닉노드 1개)

In [7]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# and verification
f = lambda x : feed_forward(xdata, and_tdata)
        
print("Initial loss value = ", loss_val(xdata, and_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, and_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  3.568320510314834
step =  0   , loss value =  3.2138166584596806
step =  500   , loss value =  0.923386986727212
step =  1000   , loss value =  0.13214360151648696
step =  1500   , loss value =  0.06301785201244979
step =  2000   , loss value =  0.040619555318930634
step =  2500   , loss value =  0.029775643169068194
step =  3000   , loss value =  0.023429509592948684
step =  3500   , loss value =  0.019279864090369216
step =  4000   , loss value =  0.016361243055643272
step =  4500   , loss value =  0.014199755929085338
step =  5000   , loss value =  0.012536260061157403
step =  5500   , loss value =  0.011217369441572894
step =  6000   , loss value =  0.010146619849851317
step =  6500   , loss value =  0.009260363548616295
step =  7000   , loss value =  0.008514938048992006
step =  7500   , loss value =  0.00787940934855187
step =  8000   , loss value =  0.007331258265018718
step =  8500   , loss value =  0.006853707077611273
step =  9000   , loss value =  0.006

In [8]:
# and prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00054295] , logical_val =  0
real_val [0.0014125] , logical_val =  0
real_val [0.0014125] , logical_val =  0
real_val [0.99764168] , logical_val =  1


#### OR 검증 (은닉노드 1개)

In [9]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# or verification
f = lambda x : feed_forward(xdata, or_tdata)
        
print("Initial loss value = ", loss_val(xdata, or_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 400 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, or_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  2.143547959042568
step =  0   , loss value =  2.1418990541223355
step =  400   , loss value =  0.2685529935714443
step =  800   , loss value =  0.09629757769212956
step =  1200   , loss value =  0.05682801267024123
step =  1600   , loss value =  0.03998467382701928
step =  2000   , loss value =  0.030742523070196533
step =  2400   , loss value =  0.024929219221316266
step =  2800   , loss value =  0.02094451032091159
step =  3200   , loss value =  0.018046914542342786
step =  3600   , loss value =  0.0158469482883049
step =  4000   , loss value =  0.014120817720224768
step =  4400   , loss value =  0.012730948318388908
step =  4800   , loss value =  0.011588187629754058
step =  5200   , loss value =  0.010632262165432366
step =  5600   , loss value =  0.009820980083150998
step =  6000   , loss value =  0.009123935002800105
step =  6400   , loss value =  0.008518666163600492
step =  6800   , loss value =  0.007988226228868056
step =  7200   , loss value =  0.007519

In [10]:
# or prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00315601] , logical_val =  0
real_val [0.99914315] , logical_val =  1
real_val [0.99914311] , logical_val =  1
real_val [0.99954876] , logical_val =  1


#### NAND 검증 (은닉층 노드 1개)

In [11]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# nand verification
f = lambda x : feed_forward(xdata, nand_tdata)
        
print("Initial loss value = ", loss_val(xdata, nand_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, nand_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  2.2827256576148076
step =  0   , loss value =  2.2776402026442994
step =  500   , loss value =  0.3304234680999681
step =  1000   , loss value =  0.09668409932017183
step =  1500   , loss value =  0.05352231534629471
step =  2000   , loss value =  0.03650376340490757
step =  2500   , loss value =  0.027545201675704036
step =  3000   , loss value =  0.022055195673184494
step =  3500   , loss value =  0.01835957723759187
step =  4000   , loss value =  0.015708019241136968
step =  4500   , loss value =  0.01371570853653454
step =  5000   , loss value =  0.012165518928073726
step =  5500   , loss value =  0.010925880162822222
step =  6000   , loss value =  0.009912523324257363
step =  6500   , loss value =  0.009069029739143999
step =  7000   , loss value =  0.008356231624120324
step =  7500   , loss value =  0.007746102555879613
step =  8000   , loss value =  0.007218068199464359
step =  8500   , loss value =  0.006756689470905651
step =  9000   , loss value =  0.006

In [12]:
# nand prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.99972562] , logical_val =  1
real_val [0.99897815] , logical_val =  1
real_val [0.99897815] , logical_val =  1
real_val [0.00334249] , logical_val =  0


#### XOR 검증 (은닉층 노드 1개)

In [13]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 1  # 은닉노드 1개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# xor verification
f = lambda x : feed_forward(xdata, xor_tdata)
        
print("Initial loss value = ", loss_val(xdata, xor_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  3.0495722319257235
step =  0   , loss value =  2.9737390624926046
step =  500   , loss value =  2.594516368856777
step =  1000   , loss value =  2.0811710112502357
step =  1500   , loss value =  1.9857602575547932
step =  2000   , loss value =  1.956757679444613
step =  2500   , loss value =  1.94332790082079
step =  3000   , loss value =  1.9356937614226208
step =  3500   , loss value =  1.930803275657285
step =  4000   , loss value =  1.9274160904686772
step =  4500   , loss value =  1.9249376655233856
step =  5000   , loss value =  1.9230488212481378
step =  5500   , loss value =  1.9215634123827376
step =  6000   , loss value =  1.9203657928429578
step =  6500   , loss value =  1.9193804390015963
step =  7000   , loss value =  1.9185559958616394
step =  7500   , loss value =  1.9178563569469842
step =  8000   , loss value =  1.9172554140901845
step =  8500   , loss value =  1.9167338317221159
step =  9000   , loss value =  1.9162769914671123
step =  9500   , l

In [14]:
# xor prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00309481] , logical_val =  0
real_val [0.66567739] , logical_val =  1
real_val [0.66567736] , logical_val =  1
real_val [0.66663449] , logical_val =  1


#### XOR 검증 (은닉층 노드 2개)

In [15]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 2  # 은닉노드 2개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# xor verification
f = lambda x : feed_forward(xdata, xor_tdata)
        
print("Initial loss value = ", loss_val(xdata, xor_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  3.244230706153651
step =  0   , loss value =  3.091458495467791
step =  500   , loss value =  2.654172468901668
step =  1000   , loss value =  1.6536382586745892
step =  1500   , loss value =  1.48261034966289
step =  2000   , loss value =  1.4419348180466032
step =  2500   , loss value =  1.4247845507733687
step =  3000   , loss value =  1.4154996788894147
step =  3500   , loss value =  1.4097276434486996
step =  4000   , loss value =  1.4058106627674585
step =  4500   , loss value =  1.4029867482836806
step =  5000   , loss value =  1.400858735705024
step =  5500   , loss value =  1.3992000476887296
step =  6000   , loss value =  1.397872308332742
step =  6500   , loss value =  1.39678638090006
step =  7000   , loss value =  1.3958823317918507
step =  7500   , loss value =  1.395118417784484
step =  8000   , loss value =  1.3944646952336326
step =  8500   , loss value =  1.3938991401934508
step =  9000   , loss value =  1.3934052013109208
step =  9500   , loss v

In [16]:
# xor prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00160164] , logical_val =  0
real_val [0.49935279] , logical_val =  0
real_val [0.99820863] , logical_val =  1
real_val [0.50079905] , logical_val =  1


#### XOR 은닉층 노드 4개

In [17]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 4  # 은닉노드 4개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# xor verification
f = lambda x : feed_forward(xdata, xor_tdata)
        
print("Initial loss value = ", loss_val(xdata, xor_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 500 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  4.132379802367491
step =  0   , loss value =  3.606345078174747
step =  500   , loss value =  2.56461297625877
step =  1000   , loss value =  1.1561440684108528
step =  1500   , loss value =  0.23143652826951996
step =  2000   , loss value =  0.10685171360222308
step =  2500   , loss value =  0.06696657021258204
step =  3000   , loss value =  0.0480549509096279
step =  3500   , loss value =  0.037183795219550876
step =  4000   , loss value =  0.030180841048085837
step =  4500   , loss value =  0.025317569221718714
step =  5000   , loss value =  0.02175537224024827
step =  5500   , loss value =  0.019040405434202858
step =  6000   , loss value =  0.016906511757714
step =  6500   , loss value =  0.01518768675365499
step =  7000   , loss value =  0.01377521720103371
step =  7500   , loss value =  0.012595033770667702
step =  8000   , loss value =  0.011594988974803613
step =  8500   , loss value =  0.010737341559033633
step =  9000   , loss value =  0.009994125393545

In [18]:
# xor prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00279098] , logical_val =  0
real_val [0.99801647] , logical_val =  1
real_val [0.99790531] , logical_val =  1
real_val [0.00189261] , logical_val =  0
