In [35]:
import numpy as np

# 수치미분 함수
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)
        
        x[idx] = float(tmp_val) - delta_x 
        fx2 = f(x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()
        
    return grad

# sigmoid 함수
def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [36]:
class LogicGate:
    def __init__(self, gate_name, xdata, tdata):
        self.name = gate_name
        
        # 입력 데이터, 정답 데이터 초기화
        self.__xdata = xdata.reshape(4, 2)
        self.__tdata = tdata.reshape(4, 1)
        
        # 2층 hidden layer unit : 6개 가정,  가중치 W2, 바이어스 b2 초기화
        self.__W2 = np.random.rand(2, 6)
        self.__b2 = np.random.rand(6)
        
        # 3층 output layer unit : 1 개 , 가중치 W3, 바이어스 b3 초기화
        self.__W3 = np.random.rand(6, 1)
        self.__b3 = np.random.rand(1)
        
        # 학습률 learning rate 초기화
        self.__learning_rate = 1e-2
        
        print(self.name + " object is created")
        
    def feed_forward(self):
        delta = 1e-7
        
        z2 = np.dot(self.__xdata, self.__W2) + self.__b2  # 은닉층의 선형회귀 값
        a2 = sigmoid(z2)                                  # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3            # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3)                              # 출력층의 출력
        
        # cross-entropy 
        return  -np.sum( self.__tdata*np.log(y+delta) + (1-self.__tdata)*np.log((1-y)+delta) )    
    
    def loss_val(self):
        delta = 1e-7
        
        z2 = np.dot(self.__xdata, self.__W2) + self.__b2  # 은닉층의 선형회귀 값
        a2 = sigmoid(z2)                                  # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3            # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3)                              # 출력층의 출력
        
        # cross-entropy
        return  -np.sum( self.__tdata*np.log(y+delta) + (1-self.__tdata)*np.log((1-y)+delta) )
        
    def train(self):
        f = lambda x : self.feed_forward()
        print("Initial loss value = ", self.loss_val())
        
        for step in  range(20001):
            self.__W2 -= self.__learning_rate * numerical_derivative(f, self.__W2)
            self.__b2 -= self.__learning_rate * numerical_derivative(f, self.__b2)
            self.__W3 -= self.__learning_rate * numerical_derivative(f, self.__W3)
            self.__b3 -= self.__learning_rate * numerical_derivative(f, self.__b3)
            
            if (step % 1000 == 0):
                print("step = ", step, "  , loss value = ", self.loss_val())
    
    def predict(self, xdata):
        z2 = np.dot(xdata, self.__W2) + self.__b2         # 은닉층의 선형회귀 값
        a2 = sigmoid(z2)                                  # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3            # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3)                              # 출력층의 출력
        
        if y > 0.5:
            result = 1  # True
        else:
            result = 0  # False
            
        return y, result

In [37]:
xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 1, 1, 0])

xor_obj = LogicGate("XOR", xdata, tdata)
xor_obj.train()

XOR object is created
Initial loss value =  7.164464393177543
step =  0   , loss value =  7.01167082206538
step =  1000   , loss value =  2.759963427518775
step =  2000   , loss value =  2.739752133386367
step =  3000   , loss value =  2.6915402522562193
step =  4000   , loss value =  2.570355196656134
step =  5000   , loss value =  2.323034443903422
step =  6000   , loss value =  1.988533076480374
step =  7000   , loss value =  1.6006176387488158
step =  8000   , loss value =  1.179505528753082
step =  9000   , loss value =  0.8249304293294477
step =  10000   , loss value =  0.5831557815856121
step =  11000   , loss value =  0.4291118649924459
step =  12000   , loss value =  0.32951976348481793
step =  13000   , loss value =  0.26245741767513614
step =  14000   , loss value =  0.21530458923423304
step =  15000   , loss value =  0.1808421611465229
step =  16000   , loss value =  0.15481145407262187
step =  17000   , loss value =  0.134598385796521
step =  18000   , loss value =  0.1185

In [38]:
test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for data in test_data:
    print(xor_obj.predict(data))

(array([0.00592093]), 0)
(array([0.97657793]), 1)
(array([0.97253585]), 1)
(array([0.03660876]), 0)
