In [2]:
# x_data = (예습시간, 복습시간)
# t_data = 1 (Pass), 0 (Fail)

import numpy as np

x_data = np.array([ [2, 4], [4, 11], [6, 6], [8, 5], [10, 7], [12, 16], [14, 8], [16, 3], [18, 7] ])
t_data = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1]).reshape(9, 1)

# 데이터 차원 및 shape 확인
print("x_data.ndim = ", x_data.ndim, ", x_data.shape = ", x_data.shape)
print("t_data.ndim = ", t_data.ndim, ", t_data.shape = ", t_data.shape)

x_data.ndim =  2 , x_data.shape =  (9, 2)
t_data.ndim =  2 , t_data.shape =  (9, 1)


In [4]:
W = np.random.rand(2, 1)  # 2X1 행렬
b = np.random.rand(1)  
print("W = ", W, ", W.shape = ", W.shape, ", b = ", b, ", b.shape = ", b.shape)

W =  [[0.98011625]
 [0.49080407]] , W.shape =  (2, 1) , b =  [0.89909573] , b.shape =  (1,)


In [5]:
# classification 이므로 출력함수로 sigmoid 정의

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [6]:
# 최종출력은 y = sigmoid(Wx+b) 이며, 손실함수는 cross-entropy 로 나타냄

def loss_func(x, t):
    
    delta = 1e-7    # log 무한대 발산 방지
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) )

In [7]:
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

In [8]:
def error_val(x, t):
    delta = 1e-7    # log 무한대 발산 방지
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum( t*np.log(y + delta) + (1-t)*np.log((1 - y)+delta ) )  

def predict(x):
    
    z = np.dot(x,W) + b
    y = sigmoid(z)
    
    if y > 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return y, result

In [9]:
learning_rate = 1e-2  # 1e-2, 1e-3 은 손실함수 값 발산

f = lambda x : loss_func(x_data,t_data)

print("Initial error value = ", error_val(x_data, t_data), "Initial W = ", W, "\n", ", b = ", b )

for step in  range(80001):  
    
    W -= learning_rate * numerical_derivative(f, W)
    
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step = ", step, "error value = ", error_val(x_data, t_data), "W = ", W, ", b = ",b )

Initial error value =  35.956074469410964 Initial W =  [[0.98011625]
 [0.49080407]] 
 , b =  [0.89909573]
step =  0 error value =  25.125102369225438 W =  [[0.78106923]
 [0.23189384]] , b =  [0.85945415]
step =  400 error value =  2.3461476850186593 W =  [[ 0.4113891]
 [-0.0925435]] , b =  [-2.48587745]
step =  800 error value =  1.6191553132691228 W =  [[ 0.5298714]
 [-0.0292612]] , b =  [-4.17750755]
step =  1200 error value =  1.2948269033077444 W =  [[0.61838549]
 [0.00711943]] , b =  [-5.31236354]
step =  1600 error value =  1.1075644527684716 W =  [[0.68958021]
 [0.03279051]] , b =  [-6.17569701]
step =  2000 error value =  0.9832398471256977 W =  [[0.74945287]
 [0.05311355]] , b =  [-6.87938159]
step =  2400 error value =  0.893186555097363 W =  [[0.8012857 ]
 [0.07043605]] , b =  [-7.47832149]
step =  2800 error value =  0.8239851486316667 W =  [[0.84707215]
 [0.08596215]] , b =  [-8.0033555]
step =  3200 error value =  0.7685036885628457 W =  [[0.88811776]
 [0.10037403]] , b =

step =  32000 error value =  0.16836907879090057 W =  [[1.78999653]
 [0.70880249]] , b =  [-20.28200707]
step =  32400 error value =  0.16658084961198194 W =  [[1.79625831]
 [0.71346965]] , b =  [-20.36628283]
step =  32800 error value =  0.16482970580846973 W =  [[1.80245712]
 [0.71808453]] , b =  [-20.44967959]
step =  33200 error value =  0.16311451477697975 W =  [[1.80859422]
 [0.72264828]] , b =  [-20.53221527]
step =  33600 error value =  0.16143418937130893 W =  [[1.81467079]
 [0.727162  ]] , b =  [-20.61390725]
step =  34000 error value =  0.15978768564689913 W =  [[1.82068801]
 [0.73162676]] , b =  [-20.69477239]
step =  34400 error value =  0.15817400073870264 W =  [[1.826647  ]
 [0.73604358]] , b =  [-20.77482706]
step =  34800 error value =  0.1565921708630985 W =  [[1.83254886]
 [0.74041348]] , b =  [-20.85408716]
step =  35200 error value =  0.15504126943550917 W =  [[1.83839466]
 [0.74473743]] , b =  [-20.93256811]
step =  35600 error value =  0.15352040529593589 W =  [[

step =  64000 error value =  0.09026032849647865 W =  [[2.15754572]
 [0.97530508]] , b =  [-25.18484384]
step =  64400 error value =  0.0897372580713887 W =  [[2.16098663]
 [0.97774323]] , b =  [-25.23040956]
step =  64800 error value =  0.08922017223791426 W =  [[2.16440815]
 [0.98016683]] , b =  [-25.27571377]
step =  65200 error value =  0.08870896940025375 W =  [[2.16781049]
 [0.98257604]] , b =  [-25.32075942]
step =  65600 error value =  0.08820355024032812 W =  [[2.17119387]
 [0.98497105]] , b =  [-25.36554943]
step =  66000 error value =  0.08770381765449456 W =  [[2.1745585 ]
 [0.98735201]] , b =  [-25.41008666]
step =  66400 error value =  0.08720967669237403 W =  [[2.17790458]
 [0.98971908]] , b =  [-25.45437394]
step =  66800 error value =  0.0867210344976543 W =  [[2.18123231]
 [0.99207243]] , b =  [-25.49841401]
step =  67200 error value =  0.08623780025086884 W =  [[2.1845419 ]
 [0.99441221]] , b =  [-25.54220962]
step =  67600 error value =  0.08575988511399421 W =  [[2