#### Logistic regression
- training data => linear regression function 도출 
- linear regression의 output을 sigmoid의 input x로 주면 => P(C=t|x) = y^t*(1-y)^(1-t)를 구할 수 있고 => 이를 이용해 손실 함수(cross-entropy)를 도출
- Cross-entropy를 minimize하는 W, b값 도출 

In [1]:
import numpy as np
from multi_var_numerical_derivative import multi_var_numerical_derivative

#### single variable logistic regression

In [2]:
# training data
x_data = np.arange(1,11).reshape(10,1) * 2
t_data = np.array([0,0,0,0,0,0,1,1,1,1]).reshape(10,1)
print(np.concatenate((x_data, t_data), axis=1))

[[ 2  0]
 [ 4  0]
 [ 6  0]
 [ 8  0]
 [10  0]
 [12  0]
 [14  1]
 [16  1]
 [18  1]
 [20  1]]


In [3]:
W = np.random.rand(1,1)
b = np.random.rand(1,)
print("INITIAL W = ", W, " b = ", b)

INITIAL W =  [[0.50910612]]  b =  [0.91183876]


In [4]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [5]:
def loss_func(x_data, t_data):
    delta = 1e-7 # prevent log 0 case
    z = np.dot(x_data, W) + b
    y = sigmoid(z)
    
    return -np.sum(np.log(y + delta) * t_data + np.log(1-y + delta) * (1-t_data) )

In [6]:
f = lambda x : loss_func(x_data, t_data)
learning_rate = 1e-2

In [7]:
for step in range(10001):
    W -= learning_rate * multi_var_numerical_derivative(f, W)
    b -= learning_rate * multi_var_numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("STEP %d: " % step, "W = ", W, " b = ", b, "error = ", loss_func(x_data, t_data))

STEP 0:  W =  [[0.09573972]]  b =  [0.86534392] error =  10.754347739792678
STEP 400:  W =  [[0.43549611]]  b =  [-4.08204201] error =  3.2484120905811564
STEP 800:  W =  [[0.45206595]]  b =  [-5.62376887] error =  1.7888346804155144
STEP 1200:  W =  [[0.52972106]]  b =  [-6.6578429] error =  1.5208528807512594
STEP 1600:  W =  [[0.59115561]]  b =  [-7.47367601] error =  1.3543932334883169
STEP 2000:  W =  [[0.64278699]]  b =  [-8.15782216] error =  1.237401448290138
STEP 2400:  W =  [[0.68777084]]  b =  [-8.75282306] error =  1.1489218656337417
STEP 2800:  W =  [[0.72790973]]  b =  [-9.2829583] error =  1.0786788359758073
STEP 3200:  W =  [[0.76433802]]  b =  [-9.7634905] error =  1.0209590938113053
STEP 3600:  W =  [[0.79781973]]  b =  [-10.20468831] error =  0.9722953733144631
STEP 4000:  W =  [[0.82889612]]  b =  [-10.61381788] error =  0.930442834174251
STEP 4400:  W =  [[0.85796584]]  b =  [-10.99622531] error =  0.8938738407100553
STEP 4800:  W =  [[0.88533178]]  b =  [-11.35596

In [8]:
def predict(x):
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    return y, 1 if y > 0.5 else 0 

In [9]:
predict(14.5)

(array([[0.86445033]]), 1)

In [10]:
predict(12.5)

(array([[0.38900935]]), 0)

#### two variable logistic regression

In [11]:
x1_data = np.array([2,4,6,8,10,12,14,16,18]).reshape(-1,1)
x2_data = np.array([4,11,6,5,7,16,8,3,7]).reshape(-1,1)
x_data = np.concatenate((x1_data, x2_data), axis=1)
t_data = np.array([0,0,0,0,1,1,1,1,1]).reshape(-1,1)

print(np.concatenate((x_data, t_data), axis=1))

[[ 2  4  0]
 [ 4 11  0]
 [ 6  6  0]
 [ 8  5  0]
 [10  7  1]
 [12 16  1]
 [14  8  1]
 [16  3  1]
 [18  7  1]]


In [12]:
W = np.random.rand(2,1)
b = np.random.rand(1,)

print("INITIAL W = ", W, " b = ", b)

INITIAL W =  [[0.42002643]
 [0.24167481]]  b =  [0.26950333]


In [13]:
def loss_func(x_data, t_data):
    delta = 1e-7
    z = np.dot(x_data,W) + b
    y = sigmoid(z)
    
    return -np.sum( t_data*np.log(y) + (1-t_data) * np.log(1-y) )

In [14]:
f = lambda x : loss_func(x_data, t_data)
learning_rate = 1e-2

for step in range(80001):
    W -= learning_rate * multi_var_numerical_derivative(f, W)
    b -= learning_rate * multi_var_numerical_derivative(f, b) 
    
    if (step % 5000 == 0):
        print("STEP %d: W = " % step, W, " b = ", b, " error = ", loss_func(x_data, t_data))

STEP 0: W =  [[ 0.22448169]
 [-0.01132964]]  b =  [0.24051661]  error =  6.437644281185769
STEP 5000: W =  [[1.03686167]
 [0.16110184]]  b =  [-10.20795907]  error =  0.6037086217551586
STEP 10000: W =  [[1.28112949]
 [0.31313454]]  b =  [-13.33308819]  error =  0.40394944011309436
STEP 15000: W =  [[1.44080933]
 [0.43875227]]  b =  [-15.52368764]  error =  0.30679452337789587
STEP 20000: W =  [[1.56582054]
 [0.53767763]]  b =  [-17.24080536]  error =  0.24722609395858894
STEP 25000: W =  [[1.66963914]
 [0.61795755]]  b =  [-18.65538367]  error =  0.20683474214903344
STEP 30000: W =  [[1.7585193 ]
 [0.68525682]]  b =  [-19.85786173]  error =  0.17766104931404325
STEP 35000: W =  [[1.83620328]
 [0.74311705]]  b =  [-20.90315144]  error =  0.15562171964895286
STEP 40000: W =  [[1.90517113]
 [0.79383055]]  b =  [-21.82728648]  error =  0.1383979183004886
STEP 45000: W =  [[1.96716189]
 [0.838953  ]]  b =  [-22.65521575]  error =  0.124574878349312
STEP 50000: W =  [[2.02344366]
 [0.879585

In [15]:
print(predict(np.array([5,5]).reshape(1,2)))

(array([[4.08222636e-05]]), 0)


In [16]:
print(predict(np.array([4,12]).reshape(1,2)))

(array([[0.00706008]]), 0)


In [17]:
print(predict(np.array([7,21]).reshape(1,2)))

(array([[0.99998955]]), 1)


W1 = 2.28, W2 = 1.06이므로 X1의 가중치가 더 크다고 해석할 수 있음.

#### Logic gate

In [18]:
class LogicGate:
    def __init__(self, name, x_data, t_data):
        self.name = name
        self.__x_data = x_data.reshape(4,2)
        self.__t_data = t_data.reshape(4,1)
        
        # Input layer (1), Output layer (2)의 Single layer perceptron 구성
        self.__W = np.random.rand(2,1)
        self.__b = np.random.rand(1,)
        
        self.__learning_rate = 1e-2
        
    def __loss_func(self):
        delta = 1e-7
        z = np.dot(self.__x_data, self.__W) + self.__b
        y = sigmoid(z)
        
        return -np.sum( np.log(y+delta)*self.__t_data + np.log(1-y+delta)*(1-self.__t_data) )
    
    def error_val(self):
        return self.__loss_func()
    
    def train(self):
        f = lambda x : self.__loss_func()
        
        for step in range(10001):
            self.__W -= self.__learning_rate * multi_var_numerical_derivative(f, self.__W)
            self.__b -= self.__learning_rate * multi_var_numerical_derivative(f, self.__b)
            
            if (step % 400 == 0):
                print("STEP %d: W = " % step, self.__W, " b = ", self.__b, " error = ", self.error_val())  
                
    def predict(self, x): # x.shape = (1,2)
        z = np.dot(x, self.__W) + self.__b
        y = sigmoid(z)
        
        return y, 1 if y > 0.5 else 0 

In [19]:
# Test inputs
test_input = np.array([ [0,0], [0,1], [1,0], [1,1] ])

In [20]:
# AND Gate
x_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])
t_data = np.array([[0], [0], [0], [1]])

AND_obj = LogicGate("AND", x_data, t_data)

In [21]:
AND_obj.train()

STEP 0: W =  [[0.87208683]
 [0.59753159]]  b =  [0.59312099]  error =  4.281868735604514
STEP 400: W =  [[1.01990169]
 [0.90774262]]  b =  [-1.83924367]  error =  1.4947205610236511
STEP 800: W =  [[1.61018638]
 [1.55927475]]  b =  [-2.68541182]  error =  1.120548788743588
STEP 1200: W =  [[2.06203636]
 [2.03707822]]  b =  [-3.34060432]  error =  0.9048425434351018
STEP 1600: W =  [[2.43192223]
 [2.41882594]]  b =  [-3.88098514]  error =  0.7612291617146405
STEP 2000: W =  [[2.74675503]
 [2.73946704]]  b =  [-4.34294322]  error =  0.6574747879362854
STEP 2400: W =  [[3.02144707]
 [3.0171812 ]]  b =  [-4.74726492]  error =  0.5785410914484456
STEP 2800: W =  [[3.26531478]
 [3.26270679]]  b =  [-5.10709513]  error =  0.5163033059508144
STEP 3200: W =  [[3.48465441]
 [3.48299876]]  b =  [-5.43137328]  error =  0.465913398191822
STEP 3600: W =  [[3.6839594 ]
 [3.68287327]]  b =  [-5.72650837]  error =  0.42426781355445686
STEP 4000: W =  [[3.86656719]
 [3.86583388]]  b =  [-5.99727995]  er

In [22]:
for e in test_input:
    print(AND_obj.predict(e))

(array([0.0001925]), 0)
(array([0.04872804]), 0)
(array([0.04872872]), 0)
(array([0.93163809]), 1)


In [23]:
# OR Gate
x_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])
t_data = np.array([[0], [1], [1], [1]])

OR_obj = LogicGate("OR", x_data, t_data)

In [24]:
OR_obj.train()

STEP 0: W =  [[0.98222914]
 [0.91685623]]  b =  [0.6998722]  error =  1.5264987216378842
STEP 400: W =  [[1.80387999]
 [1.76584864]]  b =  [-0.08297545]  error =  1.017696673120925
STEP 800: W =  [[2.44694267]
 [2.42362125]]  b =  [-0.54114921]  error =  0.7521481258613677
STEP 1200: W =  [[2.96044195]
 [2.94516269]]  b =  [-0.86396857]  error =  0.591638074362282
STEP 1600: W =  [[3.38440061]
 [3.37380186]]  b =  [-1.11350346]  error =  0.48497146569313065
STEP 2000: W =  [[3.74382996]
 [3.736129  ]]  b =  [-1.31701803]  error =  0.40938530088011316
STEP 2400: W =  [[4.05487992]
 [4.04907146]]  b =  [-1.48882116]  error =  0.3532736526513505
STEP 2800: W =  [[4.32847383]
 [4.32395787]]  b =  [-1.63738757]  error =  0.31011648076458725
STEP 3200: W =  [[4.5723025 ]
 [4.56870282]]  b =  [-1.76817549]  error =  0.2759808396790418
STEP 3600: W =  [[4.79196425]
 [4.78903481]]  b =  [-1.88491816]  error =  0.24836104827117822
STEP 4000: W =  [[4.99164904]
 [4.98922303]]  b =  [-1.99028606] 

In [25]:
for e in test_input:
    print(OR_obj.predict(e))

(array([0.0508131]), 0)
(array([0.97975613]), 1)
(array([0.97976446]), 1)
(array([0.99997716]), 1)


In [26]:
# NAND Gate
x_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])
t_data = np.array([[1], [1], [1], [0]])

NAND_obj = LogicGate("NAND", x_data, t_data)

In [27]:
NAND_obj.train()

STEP 0: W =  [[0.70767673]
 [0.79239451]]  b =  [0.09663901]  error =  3.1412586848571085
STEP 400: W =  [[-0.61714412]
 [-0.58202097]]  b =  [1.37947152]  error =  1.7666943185966524
STEP 800: W =  [[-1.34652824]
 [-1.3311252 ]]  b =  [2.34687569]  error =  1.254954753355861
STEP 1200: W =  [[-1.86383538]
 [-1.85652683]]  b =  [3.07154332]  error =  0.9870976611348585
STEP 1600: W =  [[-2.27154743]
 [-2.26782039]]  b =  [3.65618738]  error =  0.817751070276657
STEP 2000: W =  [[-2.61119029]
 [-2.6091666 ]]  b =  [4.14919948]  error =  0.6990852424060003
STEP 2400: W =  [[-2.9035898 ]
 [-2.90243005]]  b =  [4.576745]  error =  0.6105939778349616
STEP 2800: W =  [[-3.1608482 ]
 [-3.16015198]]  b =  [4.95471723]  error =  0.5418027986187952
STEP 3200: W =  [[-3.39073686]
 [-3.3903018 ]]  b =  [5.29361702]  error =  0.4866987855603152
STEP 3600: W =  [[-3.59860491]
 [-3.59832342]]  b =  [5.60081561]  error =  0.4415383876734241
STEP 4000: W =  [[-3.78832329]
 [-3.78813553]]  b =  [5.88172

In [28]:
for e in test_input:
    print(NAND_obj.predict(e))

(array([0.99979729]), 1)
(array([0.95047363]), 1)
(array([0.95047347]), 1)
(array([0.06948561]), 0)


In [29]:
# XOR Gate
x_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])
t_data = np.array([[0], [1], [1], [0]])

XOR_obj = LogicGate("XOR", x_data, t_data)

In [30]:
XOR_obj.train()

STEP 0: W =  [[0.42099033]
 [0.1181202 ]]  b =  [0.44308452]  error =  3.042386282249921
STEP 400: W =  [[ 0.0586443]
 [-0.0538375]]  b =  [-0.00153638]  error =  2.773380397173542
STEP 800: W =  [[ 0.02170245]
 [-0.01963026]]  b =  [-0.00122566]  error =  2.772694981182894
STEP 1200: W =  [[ 0.00814452]
 [-0.00704214]]  b =  [-0.00065318]  error =  2.7726024180326947
STEP 1600: W =  [[ 0.00308338]
 [-0.0024965 ]]  b =  [-0.00034774]  error =  2.772589891184546
STEP 2000: W =  [[ 0.0011813 ]
 [-0.00086886]]  b =  [-0.00018513]  error =  2.7725881914554233
STEP 2400: W =  [[ 0.0004598 ]
 [-0.00029347]]  b =  [-9.85564877e-05]  error =  2.7725879595507066
STEP 2800: W =  [[ 1.82658451e-04]
 [-9.41065760e-05]]  b =  [-5.24689204e-05]  error =  2.7725879275509415
STEP 3200: W =  [[ 7.44157629e-05]
 [-2.72730381e-05]]  b =  [-2.79330947e-05]  error =  2.772587923034564
STEP 3600: W =  [[ 3.12299950e-05]
 [-6.13242923e-06]]  b =  [-1.4870856e-05]  error =  2.7725879223691727
STEP 4000: W =  

In [31]:
for e in test_input:
    print(XOR_obj.predict(e)) # XOR not working

(array([0.5]), 0)
(array([0.5]), 0)
(array([0.5]), 0)
(array([0.5]), 1)


#### XOR
- logistic regression으로 만든 XOR classification이 제대로 동작 X
- XOR = AND(NAND, OR) 이용

In [32]:
for e in test_input:
    NAND_out = NAND_obj.predict(e)[-1]
    OR_out = OR_obj.predict(e)[-1]
    
    new_input = np.array([NAND_out, OR_out])
    res = AND_obj.predict(new_input)

    print("Input = ", e, "NAND out = ", NAND_out, " OR out = ", OR_out, "XOR out = ", res[-1])

Input =  [0 0] NAND out =  1  OR out =  0 XOR out =  0
Input =  [0 1] NAND out =  1  OR out =  1 XOR out =  1
Input =  [1 0] NAND out =  1  OR out =  1 XOR out =  1
Input =  [1 1] NAND out =  0  OR out =  1 XOR out =  0
