In [1]:
import numpy as np

# x_data는 9x2, t_data는 9x1
x_data = np.array([ [2,4], [4,11], [6,6], [8,5], [10,7], [12,16], [14, 8], [16,3], [18,7] ])
t_data = np.array([0,0,0,0,1,1,1,1,1]).reshape(9,1)

In [2]:
W = np.random.rand(2,1)     # 2x1 행렬 (x_data == 9x2)
b = np.random.rand(1)
print("W =", W, ", W,shape =", W.shape, ", b =", b, ", b.shape =", b.shape)

W = [[0.43545369]
 [0.00585246]] , W,shape = (2, 1) , b = [0.55610528] , b.shape = (1,)


In [3]:
def sigmoid(x):
    return 1 / (1+np.exp(-x))

def loss_func(x, t):
    
    delta = 1e-7     # log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    # cross-entropy
    return -np.sum( t*np.log(y + delta) + (1-t) * np.log((1 - y) + delta) )

In [4]:
# 수치미분 함수
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.000
    grad = np.zeros_like(x) # x 크기의 어레이 선언 및 초기화
    
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx] # x 바뀌기 때문에 저장해둠
        x[idx]= float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val
        it.iternext()
        
    return grad

In [5]:
def error_val(x, t):
    delta = 1e-7      # log 무한대 발산 방지
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
     # cross-entropy
    return -np.sum( t*np.log(y + delta) + (1-t) * np.log((1 - y) + delta) )

def predict(x):
    
    z = np.dot(x, W) + b
    y = sigmoid(z)
    
    if y  > 0.5:
        result = 1     # True
    else :
        result = 0     # False
        
    return y, result

In [6]:
learning_rate = 1e-2 # 발산하는 경우 1e-3 ~ 1e-6 등으로 바꿔 실행

f = lambda x : loss_func(x_data, t_data) # f(x) = loss_func(x_data, t_data)

print("Initial error value =", error_val(x_data, t_data), "Initial W =", W, "\n", ", b=", b)

for step in range(80001):
    
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step =", step, "error value", error_val(x_data, t_data), "W =", W, ", b=", b)
    

Initial error value = 11.454897432168181 Initial W = [[0.43545369]
 [0.00585246]] 
 , b= [0.55610528]
step = 0 error value 4.892910188210715 W = [[ 0.24771695]
 [-0.23282866]] , b= [0.54264082]
step = 400 error value 2.2791386889621768 W = [[ 0.41957311]
 [-0.08737012]] , b= [-2.61158076]
step = 800 error value 1.5935194993750204 W = [[ 0.53571558]
 [-0.0266446 ]] , b= [-4.25514229]
step = 1200 error value 1.2811720940449196 W = [[0.62296147]
 [0.00884401]] , b= [-5.36896813]
step = 1600 error value 1.0989323079221944 W = [[0.69336389]
 [0.03409887]] , b= [-6.22067343]
step = 2000 error value 0.9771877506072637 W = [[0.75269123]
 [0.0541971 ]] , b= [-6.91702446]
step = 2400 error value 0.8886417192795525 W = [[0.80412304]
 [0.07138791]] , b= [-7.51093146]
step = 2800 error value 0.8204027802078561 W = [[0.84960026]
 [0.08683327]] , b= [-8.0323007]
step = 3200 error value 0.7655773016006884 W = [[0.89039887]
 [0.10119448]] , b= [-8.49961081]
step = 3600 error value 0.7201339353479059 W 

 [0.72742523]] , b= [-20.61867311]
step = 34000 error value 0.15969213748227803 W = [[1.82103913]
 [0.73188714]] , b= [-20.69949026]
step = 34400 error value 0.15808034728738493 W = [[1.82699474]
 [0.73630119]] , b= [-20.7794979]
step = 34800 error value 0.15650035676649948 W = [[1.83289329]
 [0.74066837]] , b= [-20.85871187]
step = 35200 error value 0.15495124146085756 W = [[1.83873584]
 [0.74498964]] , b= [-20.93714759]
step = 35600 error value 0.15343211223540115 W = [[1.84452343]
 [0.74926596]] , b= [-21.01482001]
step = 36000 error value 0.15194211361070709 W = [[1.85025707]
 [0.75349824]] , b= [-21.09174367]
step = 36400 error value 0.15048042218864818 W = [[1.85593774]
 [0.75768735]] , b= [-21.1679327]
step = 36800 error value 0.1490462451657095 W = [[1.86156641]
 [0.76183416]] , b= [-21.24340082]
step = 37200 error value 0.14763881892819739 W = [[1.86714399]
 [0.7659395 ]] , b= [-21.31816139]
step = 37600 error value 0.14625740772423268 W = [[1.87267139]
 [0.77000419]] , b= [-2

step = 68000 error value 0.08525963729524892 W = [[2.19129887]
 [0.99918696]] , b= [-25.63161137]
step = 68400 error value 0.08479240083301716 W = [[2.19455414]
 [1.0014862 ]] , b= [-25.67467567]
step = 68800 error value 0.08433022350315364 W = [[2.19779203]
 [1.00377248]] , b= [-25.71750611]
step = 69200 error value 0.08387302397735039 W = [[2.20101273]
 [1.00604594]] , b= [-25.76010519]
step = 69600 error value 0.08342072265473956 W = [[2.20421642]
 [1.00830674]] , b= [-25.80247538]
step = 70000 error value 0.08297324161640955 W = [[2.20740327]
 [1.010555  ]] , b= [-25.84461911]
step = 70400 error value 0.08253050458135171 W = [[2.21057346]
 [1.01279087]] , b= [-25.88653877]
step = 70800 error value 0.08209243686377328 W = [[2.21372715]
 [1.01501448]] , b= [-25.92823671]
step = 71200 error value 0.08165896533171746 W = [[2.21686453]
 [1.01722597]] , b= [-25.96971526]
step = 71600 error value 0.08123001836699846 W = [[2.21998576]
 [1.01942546]] , b= [-26.01097668]
step = 72000 error v

In [7]:
test_data = [3, 17]     # (예습, 복습)
predict(test_data)

(array([0.12865913]), 0)

In [8]:
test_data = [5, 8]
predict(test_data)

(array([0.00099059]), 0)

In [9]:
test_data = [7, 21]
predict(test_data)

(array([0.99998954]), 1)

In [10]:
test_data = [12, 0]      # 예습이 합격 확률 높음 (W1 = 2.28 > W2 = 1.06)
predict(test_data)

(array([0.63502609]), 1)