In [22]:
#multi-variable logistic regression
# 1. 학습 데이터 준비
import numpy as np

x_data = np.array([ [2,4], [4, 11], [6, 6], [8, 5], [10, 7], [12, 16], [14, 8], [16, 3], [18, 7] ])
t_data = np.array([ [0, 0, 0, 0, 1, 1, 1, 1, 1]]).reshape(9, 1)


In [23]:
# 2. 회귀선 정의 

W = np.random.rand(2, 1) #2x1 행렬
b = np.random.rand(1)

print("W =", W, ", W.shape =", W.shape, ", b = ", b, ", b.sahpe = ", b.shape)

W = [[0.97297071]
 [0.25525686]] , W.shape = (2, 1) , b =  [0.22148941] , b.sahpe =  (1,)


In [24]:
# 3. 손실함수 E(W, b) 정의

def sigmoid(x):
    return 1 / (1+ np.exp(-x))

def loss_func(x, t):
    delta = 1e-7 #log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    #cross-entropy
    return -np.sum(t*np.log(y + delta) + (1-t)*np.log((1-y) + delta))


In [25]:
# 4. 수치미분 numerical_derivative 및 utility 함수 정의

def numerical_derivative(f, x): #x는 모든 변수를 포함하고 있는 numpy객체(배열, 행렬...)
    delta_x = 1e-4 #0.0001
    grad = np.zeros_like(x)

    it = np.nditer(x, flags = ['multi_index'], op_flags=['readwrite']) #모든 입력변수에 대해 편미분하기 위해 iterator 사용
    
    while not it.finished: 
        idx = it.multi_index
        
        tmp_val = x[idx] 
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) #f(x+delta_x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val
        it.iternext()
    
    return grad

def error_val(x, t):
    delta = 1e-7 #log 무한대 발산 방지
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    #cross-entropy
    return -np.sum(t*np.log(y + delta) + (1-t)*np.log((1 - y) + delta))

def predict(x):
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    if y >= 0.5:
        result = 1 #True
    else:
        result = 0 #False
    
    return y, result

In [26]:
# 5. 학습율(learning rate) 초기화 및 손실함수가 최소가 될 때까지 W, b 업데이트

learning_rate = 1e-2 #발산하는 경우, 1e-3 ~ 1e-6 등으로 바꿔 실행

f = lambda x : loss_func(x_data, t_data) # f(x) = loss_func(x_data, t_data)

print("initial error value =", error_val(x_data, t_data), "initial W =", W, "\n", ", b =", b)

for step in range(80001):
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step =", step, "error value =", error_val(x_data, t_data), "W =", W, ", b = ", b)

initial error value = 27.022674741712716 initial W = [[0.97297071]
 [0.25525686]] 
 , b = [0.22148941]
step = 0 error value = 16.349331749824913 W = [[ 0.77394255]
 [-0.00293833]] , b =  [0.183421]
step = 400 error value = 2.1919130677303977 W = [[ 0.43085577]
 [-0.08047676]] , b =  [-2.7823161]
step = 800 error value = 1.559038848708326 W = [[ 0.54383879]
 [-0.02306623]] , b =  [-4.3623369]
step = 1200 error value = 1.2625164565865052 W = [[0.62935271]
 [0.01123262]] , b =  [-5.44774219]
step = 1600 error value = 1.0870323698305908 W = [[0.69866418]
 [0.03592469]] , b =  [-6.28354822]
step = 2000 error value = 0.9687962903949066 W = [[0.75723649]
 [0.0557167 ]] , b =  [-6.96980004]
step = 2400 error value = 0.8823150088088615 W = [[0.8081109 ]
 [0.07272738]] , b =  [-7.55674232]
step = 2800 error value = 0.815401484544695 W = [[0.85315709]
 [0.08806203]] , b =  [-8.07302217]
step = 3200 error value = 0.761482939623305 W = [[0.89361068]
 [0.10235371]] , b =  [-8.53644948]
step = 3600 e

step = 32800 error value = 0.16458430611476504 W = [[1.80333118]
 [0.71873482]] , b =  [-20.46143627]
step = 33200 error value = 0.16287412463656542 W = [[1.80945962]
 [0.72329141]] , b =  [-20.54385125]
step = 33600 error value = 0.16119865816020473 W = [[1.8155277 ]
 [0.72779812]] , b =  [-20.62542496]
step = 34000 error value = 0.1595568686929423 W = [[1.82153659]
 [0.73225602]] , b =  [-20.70617418]
step = 34400 error value = 0.15794775903083763 W = [[1.82748741]
 [0.73666613]] , b =  [-20.78611522]
step = 34800 error value = 0.15637037077754784 W = [[1.83338126]
 [0.74102945]] , b =  [-20.8652639]
step = 35200 error value = 0.15482378247776554 W = [[1.8392192 ]
 [0.74534696]] , b =  [-20.94363558]
step = 35600 error value = 0.15330710785741494 W = [[1.84500227]
 [0.74961958]] , b =  [-21.02124518]
step = 36000 error value = 0.15181949416364443 W = [[1.85073147]
 [0.75384823]] , b =  [-21.09810722]
step = 36400 error value = 0.15036012059805298 W = [[1.85640778]
 [0.75803379]] , b 

step = 65200 error value = 0.08863694436980116 W = [[2.16829145]
 [0.98291655]] , b =  [-25.32712675]
step = 65600 error value = 0.08813233750232341 W = [[2.17167216]
 [0.98530955]] , b =  [-25.37188074]
step = 66000 error value = 0.08763340361403753 W = [[2.17503415]
 [0.98768853]] , b =  [-25.41638235]
step = 66400 error value = 0.08714004805508278 W = [[2.17837761]
 [0.99005365]] , b =  [-25.4606344]
step = 66800 error value = 0.08665217826144546 W = [[2.18170276]
 [0.99240507]] , b =  [-25.50463964]
step = 67200 error value = 0.0861697036979819 W = [[2.18500978]
 [0.99474294]] , b =  [-25.5484008]
step = 67600 error value = 0.08569253580330037 W = [[2.18829888]
 [0.99706742]] , b =  [-25.59192054]
step = 68000 error value = 0.08522058793641468 W = [[2.19157024]
 [0.99937866]] , b =  [-25.63520149]
step = 68400 error value = 0.08475377532514937 W = [[2.19482406]
 [1.00167681]] , b =  [-25.67824625]
step = 68800 error value = 0.0842920150161607 W = [[2.19806051]
 [1.00396202]] , b = 

## step = 80000 error value = 0.0731258793437287 W ## = [[2.28230404] [1.06321827]] , b =  [-26.83408806]
## W1 = 2.28 / W2 = 1.06인 것으로 보아 예습이 학습 결과에 W2보다 큰 영향을 준다

In [27]:
test_data = np.array([3, 17]) # (예습, 복습) = (3, 17) => Fail (0)
predict(test_data)

(array([0.12870414]), 0)

In [28]:
test_data = np.array([5, 8]) # (예습, 복습) = (5, 8) => Fail (0)

predict(test_data)

(array([0.00098999]), 0)

In [30]:
test_data = np.array([7, 21]) # (예습, 복습) = (7, 21) => Fail (0)

predict(test_data)

(array([0.99998956]), 1)

In [29]:
test_data = np.array([12, 0]) # (예습, 복습) = (12, 0) => Fail (0)

predict(test_data)

(array([0.63496124]), 1)