In [1]:
# 지도학습 logistic regression: classification
# 출력값 y가 1또는 0만을 가져야하는 분류 시스템에서 함수 값으로 0~1사이의 값을 가지는 sigmoid 함수를 사용할 수 있음
# 선형회귀 때와는 다른 손실함수가 필요함: cross-entropy... 유도하는건 수업 듣기만 하는걸로..
# input -> linear regression -> classification sigmoid -> 손실함수의 최소값 -> 학습종료

In [3]:
# 예제
import numpy as np

x_data = np.array([2,4,6,8,10,12,14,16,18,20]).reshape(10,1)
t_data = np.array([0,0,0,0,0,0,1,1,1,1]).reshape(10,1)

W = np.random.rand(1,1)
b = np.random.rand(1)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def loss_func(x, t):
    delta = 1e-7 # log 내 0이 들어가면 무한대로 갈 수 있기 때문에 이를 방지하기 위함
    z = np.dot(x, W) + b
    y = sigmoid(z)
    # cross-entropy
    return -np.sum(t*np.log(y+delta) + (1-t)*np.log((1-y)+delta))

def numerical_derivative(f, x):
    delta_x = 1e-4
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x)
        
        grad[idx] = (fx1-fx2) / (2*delta_x)
        
        x[idx] = tmp_val
        it. iternext()
    
    return grad

def error_val(x, t):
    delta = 1e-7 # log 내 0이 들어가면 무한대로 갈 수 있기 때문에 이를 방지하기 위함
    z = np.dot(x, W) + b
    y = sigmoid(z)
    # cross-entropy
    return -np.sum(t*np.log(y+delta) + (1-t)*np.log((1-y)+delta))

def predict(x):
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    if y > 0.5:
        result = 1 # True
    else:
        result = 0 # False
        
    return y, result

learning_rate = 1e-2 # 발산하는 경우 바꾸어서 실행
f = lambda x : loss_func(x_data, t_data) # f(x) = loss_func(x_data, t_data)
print("init error val = ", error_val(x_data, t_data), "init W = ", W, "\n", ", b = ", b)

for step in range(10001):
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step%400 == 0):
        print("step = ", step, "error_val = ", error_val(x_data, t_data), "W = ", W, ", b = ", b)

init error val =  21.473107477371187 init W =  [[0.4915231]] 
 , b =  [0.05387912]
step =  0 error_val =  7.220160074339277 W =  [[0.08697596]] , b =  [0.02164924]
step =  400 error_val =  2.8530418360207026 W =  [[0.43142598]] , b =  [-4.28454772]
step =  800 error_val =  1.756442730325061 W =  [[0.46035563]] , b =  [-5.73430465]
step =  1200 error_val =  1.5021293339172426 W =  [[0.53603841]] , b =  [-6.74182713]
step =  1600 error_val =  1.3417596455895704 W =  [[0.59635511]] , b =  [-7.54263362]
step =  2000 error_val =  1.2280931990582815 W =  [[0.64725733]] , b =  [-8.2169945]
step =  2400 error_val =  1.1416648169659838 W =  [[0.69172337]] , b =  [-8.80505806]
step =  2800 error_val =  1.0727942466196192 W =  [[0.731473]] , b =  [-9.32998632]
step =  3200 error_val =  1.0160477747806567 W =  [[0.76759646]] , b =  [-9.80644702]
step =  3600 error_val =  0.9681050072686357 W =  [[0.80083204]] , b =  [-10.24436157]
step =  4000 error_val =  0.9268049448350842 W =  [[0.83170487]] , 

In [4]:
predict(19)

(array([[0.99913026]]), 1)

In [5]:
predict(3)

(array([[1.10597119e-05]]), 0)

In [11]:
# 다변량 예제
import numpy as np

x_data = np.array([ [2, 4], [4, 11], [6, 6], [8, 5], [10, 7], [12, 16], [14, 8], [16, 3], [18, 7] ])
t_data = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1]).reshape(9, 1)

W = np.random.rand(2,1)
b = np.random.rand(1)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def loss_func(x, t):
    delta = 1e-7 # log 내 0이 들어가면 무한대로 갈 수 있기 때문에 이를 방지하기 위함
    z = np.dot(x, W) + b
    y = sigmoid(z)
    # cross-entropy
    return -np.sum(t*np.log(y+delta) + (1-t)*np.log((1-y)+delta))

def numerical_derivative(f, x):
    delta_x = 1e-4
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x)
        
        grad[idx] = (fx1-fx2) / (2*delta_x)
        
        x[idx] = tmp_val
        it. iternext()
    
    return grad

def error_val(x, t):
    delta = 1e-7 # log 내 0이 들어가면 무한대로 갈 수 있기 때문에 이를 방지하기 위함
    z = np.dot(x, W) + b
    y = sigmoid(z)
    # cross-entropy
    return -np.sum(t*np.log(y+delta) + (1-t)*np.log((1-y)+delta))

def predict(x):
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    if y > 0.5:
        result = 1 # True
    else:
        result = 0 # False
        
    return y, result

learning_rate = 1e-2 #  손실함수값이 발산하는 경우 바꾸어서 실행
f = lambda x : loss_func(x_data, t_data) # f(x) = loss_func(x_data, t_data)
print("init error val = ", error_val(x_data, t_data), "init W = ", W, "\n", ", b = ", b)

for step in range(100001):
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step%1000 == 0):
        print("step = ", step, "error_val = ", error_val(x_data, t_data), "W = ", W, ", b = ", b)

init error val =  5.5980792072178325 init W =  [[0.04088418]
 [0.03597004]] 
 , b =  [0.07927207]
step =  0 error_val =  5.011208046533293 W =  [[ 0.10876297]
 [-0.01841746]] , b =  [0.06419685]
step =  400 error_val =  2.1679435414442048 W =  [[ 0.43409044]
 [-0.07854901]] , b =  [-2.83074072]
step =  800 error_val =  1.5493334624782291 W =  [[ 0.54618189]
 [-0.02204632]] , b =  [-4.39310614]
step =  1200 error_val =  1.2572043701495546 W =  [[0.63120281]
 [0.01191983]] , b =  [-5.47048464]
step =  1600 error_val =  1.0836212543771864 W =  [[0.70020185]
 [0.03645293]] , b =  [-6.30176123]
step =  2000 error_val =  0.9663805184100692 W =  [[0.75855703]
 [0.05615796]] , b =  [-6.98512055]
step =  2400 error_val =  0.8804882270252694 W =  [[0.80927068]
 [0.07311732]] , b =  [-7.57006083]
step =  2800 error_val =  0.8139542862262361 W =  [[0.8541923 ]
 [0.08842038]] , b =  [-8.08487386]
step =  3200 error_val =  0.7602962493342065 W =  [[0.89454601]
 [0.1026922 ]] , b =  [-8.54717983]
ste

step =  32800 error_val =  0.16454225415784365 W =  [[1.80348109]
 [0.71884634]] , b =  [-20.46345264]
step =  33200 error_val =  0.1628329304124679 W =  [[1.80960804]
 [0.7234017 ]] , b =  [-20.54584694]
step =  33600 error_val =  0.16115829590027228 W =  [[1.81567468]
 [0.72790721]] , b =  [-20.62740038]
step =  34000 error_val =  0.15951731364590033 W =  [[1.82168214]
 [0.73236394]] , b =  [-20.70812974]
step =  34400 error_val =  0.15790898741315515 W =  [[1.82763156]
 [0.7367729 ]] , b =  [-20.7880513]
step =  34800 error_val =  0.15633235972669657 W =  [[1.83352404]
 [0.74113509]] , b =  [-20.86718088]
step =  35200 error_val =  0.1547865100081014 W =  [[1.83936063]
 [0.7454515 ]] , b =  [-20.94553384]
step =  35600 error_val =  0.15327055281864257 W =  [[1.84514237]
 [0.74972304]] , b =  [-21.02312507]
step =  36000 error_val =  0.15178363620158566 W =  [[1.85087027]
 [0.75395063]] , b =  [-21.09996909]
step =  36400 error_val =  0.15032494011759673 W =  [[1.85654531]
 [0.758135

step =  66000 error_val =  0.08762132788135828 W =  [[2.17511576]
 [0.98774627]] , b =  [-25.41746254]
step =  66400 error_val =  0.087128106954542 W =  [[2.17845877]
 [0.99011105]] , b =  [-25.46170855]
step =  66800 error_val =  0.08664036956525699 W =  [[2.18178348]
 [0.99246214]] , b =  [-25.50570782]
step =  67200 error_val =  0.0861580252270472 W =  [[2.18509006]
 [0.99479968]] , b =  [-25.54946307]
step =  67600 error_val =  0.085680985425904 W =  [[2.18837872]
 [0.99712384]] , b =  [-25.59297696]
step =  68000 error_val =  0.08520916356694597 W =  [[2.19164966]
 [0.99943476]] , b =  [-25.63625214]
step =  68400 error_val =  0.0847424749228651 W =  [[2.19490305]
 [1.0017326 ]] , b =  [-25.67929118]
step =  68800 error_val =  0.08428083658397363 W =  [[2.19813908]
 [1.00401749]] , b =  [-25.72209662]
step =  69200 error_val =  0.08382416740988442 W =  [[2.20135794]
 [1.00628959]] , b =  [-25.76467098]
step =  69600 error_val =  0.08337238798270823 W =  [[2.20455981]
 [1.00854903]

step =  98800 error_val =  0.059797068477360014 W =  [[2.40176867]
 [1.14658649]] , b =  [-28.40860064]
step =  99200 error_val =  0.05956596472711916 W =  [[2.40406937]
 [1.1481853 ]] , b =  [-28.43888406]
step =  99600 error_val =  0.0593366327269661 W =  [[2.40636134]
 [1.1497778 ]] , b =  [-28.46905114]
step =  100000 error_val =  0.05910905224711362 W =  [[2.40864464]
 [1.15136404]] , b =  [-28.49910278]


In [12]:
predict([12,0])

(array([0.59980025]), 1)

In [13]:
# W1 = 2.4 , W2 = 1.15 -> x1에 대한 가중치가 더 크다? -> x1의 영향이 더 크다!

In [4]:
# 논리게이트: and, or, nand, xor

import numpy as np

# sigmoid 함수
def sigmoid(x):
    return 1 / (1+np.exp(-x))

# 수치미분 함수
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# LogicGate class (__init__, __loss_func, error_val)
class LogicGate:
    
    def __init__(self, gate_name, xdata, tdata): # xdata, tdata => numpy.array(...)
        self.name = gate_name
        
        # 입력데이터, 정답데이터 초기화
        self.__xdata = xdata.reshape(4,2) # 입력데이터는 (0,0) (0,1) (1,0) (1,1) 총 4가지
        self.__tdata = tdata.reshape(4,1)
        
        # 가중치 W, 바이어스 b 초기화
        self.__W = np.random.rand(2,1) # weight, 2x1 matrix
        self.__b = np.random.rand(1)
        
        # 학습율 초기화
        self.__learning_rate = 1e-2 # 발산할 경우 더 작게 설정
        
        # 멤버 변수들이 모두 프라이빗으로 선언되어 있음. 왜? 파이썬 클래스에서 멤버 변수는 기본적으로 퍼블릭이기 때문에 외부에서 쉽게 접근해서 변경할 수 있기 때문에
        
    # 손실함수
    def __loss_func(self):
        
        delta = 1e-7 # log 무한대 발산 방지
        
        z = np.dot(self.__xdata, self.__W) + self.__b
        y = sigmoid(z)
        
        # cross-entropy
        return -np.sum( self.__tdata*np.log(y + delta) + (1-self.__tdata)*np.log((1-y) + delta))
    
    # 손실값 계산
    def error_val(self):
        
        delta = 1e-7 # log 무한대 발산 방지
        
        z = np.dot(self.__xdata, self.__W) + self.__b
        y = sigmoid(z)
        
        # cross-entropy
        return -np.sum( self.__tdata*np.log(y + delta) + (1-self.__tdata)*np.log((1-y) + delta))
    
    # 수치미분을 이용하여 손실함수가 최소가 될 때까지 학습하는 함수
    def train(self):
        
        f = lambda x : self.__loss_func()
        
        print("init error val = ", self.error_val())
        
        for step in range(8001):
            self.__W -= self.__learning_rate * numerical_derivative(f, self.__W)
            self.__b -= self.__learning_rate * numerical_derivative(f, self.__b)
            
            if (step%400 == 0):
                print("step = ", step, "error value = ", self.error_val())
                
    # 미래 값 예측
    def predict(self, input_data):
        
        z = np.dot(input_data, self.__W) + self.__b
        y = sigmoid(z)
        
        if y > 0.5 :
            result = 1 # True
        else:
            result = 0 # False
        
        return y, result

In [5]:
xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 0, 0, 1])

AND_obj = LogicGate("AND_GATE", xdata, tdata)

AND_obj.train()

init error val =  4.978762689538606
step =  0 error value =  4.9151081014042965
step =  400 error value =  1.5407822861210194
step =  800 error value =  1.1439455524679079
step =  1200 error value =  0.9194255809430008
step =  1600 error value =  0.7713751254703193
step =  2000 error value =  0.6650092371209055
step =  2400 error value =  0.5843817226427886
step =  2800 error value =  0.5209720557245154
step =  3200 error value =  0.46973333262513267
step =  3600 error value =  0.4274514311333886
step =  4000 error value =  0.3919673622374862
step =  4400 error value =  0.3617705216875883
step =  4800 error value =  0.3357693026775475
step =  5200 error value =  0.31315388759987994
step =  5600 error value =  0.2933102218658096
step =  6000 error value =  0.2757639658433629
step =  6400 error value =  0.26014279343699864
step =  6800 error value =  0.24615033792242536
step =  7200 error value =  0.2335477650508046
step =  7600 error value =  0.22214047572534318
step =  8000 error value

In [6]:
# AND Gate prediction
print(AND_obj.name, "\n")

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:
    (sigmoid_val, logical_val) = AND_obj.predict(input_data) 
    print(input_data, " = ", logical_val, "\n")

AND_GATE 

[0 0]  =  0 

[0 1]  =  0 

[1 0]  =  0 

[1 1]  =  1 



In [7]:
xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 1, 1, 1])

OR_obj = LogicGate("OR_GATE", xdata, tdata)

OR_obj.train()

init error val =  1.85843693562649
step =  0 error value =  1.8530142415772646
step =  400 error value =  1.1102382686890384
step =  800 error value =  0.8038052961878829
step =  1200 error value =  0.6241612176273524
step =  1600 error value =  0.5071785068196584
step =  2000 error value =  0.42543124192220727
step =  2400 error value =  0.3653628195657569
step =  2800 error value =  0.31952347315554214
step =  3200 error value =  0.2834917269037545
step =  3600 error value =  0.2544855815223688
step =  4000 error value =  0.23067199236635116
step =  4400 error value =  0.21079698857327742
step =  4800 error value =  0.19397551146453776
step =  5200 error value =  0.17956612209919356
step =  5600 error value =  0.16709324591981894
step =  6000 error value =  0.15619724243834676
step =  6400 error value =  0.14660138374674905
step =  6800 error value =  0.13808944636011503
step =  7200 error value =  0.1304901561385251
step =  7600 error value =  0.12366617031240462
step =  8000 error 

In [8]:
# OR Gate prediction
print(OR_obj.name, "\n")

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:
    (sigmoid_val, logical_val) = OR_obj.predict(input_data) 
    print(input_data, " = ", logical_val, "\n")

OR_GATE 

[0 0]  =  0 

[0 1]  =  1 

[1 0]  =  1 

[1 1]  =  1 



In [9]:
xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([1, 1, 1, 0])

NAND_obj = LogicGate("NAND_GATE", xdata, tdata)

NAND_obj.train()

init error val =  3.1887909868528874
step =  0 error value =  3.18236575383791
step =  400 error value =  1.7793576467603978
step =  800 error value =  1.2607003381011923
step =  1200 error value =  0.9904641745342762
step =  1600 error value =  0.8200122693133451
step =  2000 error value =  0.7007276097214161
step =  2400 error value =  0.6118478650044574
step =  2800 error value =  0.5427939274201365
step =  3200 error value =  0.4875027208742415
step =  3600 error value =  0.4422037625289072
step =  4000 error value =  0.4044091577228043
step =  4400 error value =  0.3724021489326962
step =  4800 error value =  0.3449558573666343
step =  5200 error value =  0.321168376712088
step =  5600 error value =  0.30036103801643554
step =  6000 error value =  0.2820130027646671
step =  6400 error value =  0.2657177535050853
step =  6800 error value =  0.25115330950413933
step =  7200 error value =  0.238061333005733
step =  7600 error value =  0.2262321588274404
step =  8000 error value =  0.

In [10]:
# NAND Gate prediction
print(NAND_obj.name, "\n")

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:
    (sigmoid_val, logical_val) = NAND_obj.predict(input_data) 
    print(input_data, " = ", logical_val, "\n")

NAND_GATE 

[0 0]  =  1 

[0 1]  =  1 

[1 0]  =  1 

[1 1]  =  0 



In [11]:
xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 1, 1, 0])


XOR_obj = LogicGate("XOR_GATE", xdata, tdata)

# XOR Gate 를 보면, 손실함수 값이 2.7 근처에서 더 이상 감소하지 않는것을 볼 수 있음
XOR_obj.train()

init error val =  3.3779006225322235
step =  0 error value =  3.3631967046166293
step =  400 error value =  2.7795961946555248
step =  800 error value =  2.7736498057684758
step =  1200 error value =  2.772763556519934
step =  1600 error value =  2.7726207703171952
step =  2000 error value =  2.7725949462965946
step =  2400 error value =  2.7725896044289575
step =  2800 error value =  2.772588357351388
step =  3200 error value =  2.7725880399363803
step =  3600 error value =  2.772587954838477
step =  4000 error value =  2.7725879313765445
step =  4400 error value =  2.7725879248155643
step =  4800 error value =  2.772587922968005
step =  5200 error value =  2.7725879224459815
step =  5600 error value =  2.7725879222982464
step =  6000 error value =  2.7725879222564047
step =  6400 error value =  2.7725879222445493
step =  6800 error value =  2.77258792224119
step =  7200 error value =  2.772587922240238
step =  7600 error value =  2.772587922239968
step =  8000 error value =  2.772587

In [12]:
# XOR Gate prediction => 예측이 되지 않음
print(XOR_obj.name, "\n")

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for input_data in test_data:
    (sigmoid_val, logical_val) = XOR_obj.predict(input_data) 
    print(input_data, " = ", logical_val, "\n")

XOR_GATE 

[0 0]  =  0 

[0 1]  =  0 

[1 0]  =  0 

[1 1]  =  1 



In [13]:
# XOR 을 NAND + OR => AND 조합으로 계산
import numpy as np

input_data = np.array([[0,0],[0,1],[1,0],[1,1]])

s1 = [] # NAND 출력
s2 = [] # OR 출력

new_input_data = [] # AND 입력
final_output = [] # AND 출력

for index in range(len(input_data)):
    
    s1 = NAND_obj.predict(input_data[index])
    s2 = OR_obj.predict(input_data[index])
    
    new_input_data.append(s1[-1])
    new_input_data.append(s2[-1])
    
    (sigmoid_val, logical_val) = AND_obj.predict(np.array(new_input_data))
    
    final_output.append(logical_val) # AND 출력, 즉 XOR 출력
    new_input_data = [] # AND 입력 초기화
    
for index in range(len(input_data)):
    print(input_data[index], " = ", final_output[index], end='')
    print("\n")

# 머신러닝 XOR 문제는 다양한 Gate 조합인 Multi-Layer로 해결할 수 있음
# 이는 신경망 기반의 딥러닝 핵심 아이디어

[0 0]  =  0

[0 1]  =  1

[1 0]  =  1

[1 1]  =  0

