# Logistic Regression을 클래스로 구현해봅시다!

## 1. 함수로 구현한 LogisticRegression

### - 아래 코드를 참고하여 LinearRegression_class.py에 클래스로 구현된 Logistic Regression을 완성시켜주세요!  

<br/>

코드 출처: 박성호님의 머신러닝 강의 https://youtu.be/nhzljkpjjFk, https://github.com/neowizard2018/neowizard/blob/master/MachineLearning/ML_LEC_17_Example1.ipynb  

In [1]:
import numpy as np

X_train = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20]).reshape(10,1)   
y_train = np.array([0, 0, 0, 0,  0,  0,  1,  1,  1,  1]).reshape(10,1)
X_test = np.array([1, 3, 5, 7, 9, 11, 15, 17, 19]).reshape(9,1)

print("X_train.shape = ", X_train.shape, ", y_train.shape = ", y_train.shape)

X_train.shape =  (10, 1) , y_train.shape =  (10, 1)


In [19]:
#시그모이드 함수
def sigmoid(x):
    return 1 / (1+np.exp(-x))

#편미분 함수
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad



In [20]:
#초기화
W = np.random.rand(1,1)  
b = np.random.rand(1)  
learning_rate = 1e-2

In [21]:
# 손실 함수
def loss_func(X_train, y_train):
    
    delta = 1e-7    # log 무한대 발산 방지
    
    z = np.dot(X_train,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum(y_train*np.log(y + delta) + (1-y_train)*np.log((1 - y)+delta ) )

# 손실 값 계산 함수
def error_val(X_train, y_train):
    
    delta = 1e-7    # log 무한대 발산 방지
    
    z = np.dot(X_train,W) + b
    y = sigmoid(z)
    
    # cross-entropy 
    return  -np.sum( y_train*np.log(y + delta) + (1-y_train)*np.log((1 - y)+delta ) ) 


def predict(X):
    result=[]
    for x in X:
        z=np.dot(x, W) + b
        y=sigmoid(z)

        if y > 0.5:
            result.append(1)
        else:
            result.append(0)

    return result

In [22]:
f = lambda x : loss_func(X_train, y_train)  # f(x) = loss_func(x_data, t_data)

print("Initial error value = ", error_val(X_train, y_train), "Initial W = ", W, "\n", ", b = ", b )

for step in  range(10001):  
    
    W -= learning_rate * numerical_derivative(f, W)
    
    b -= learning_rate * numerical_derivative(f, b)
    
    if (step % 400 == 0):
        print("step = ", step, "error value = ", error_val(X_train, y_train), "W = ", W, ", b = ",b )

Initial error value =  27.57887674785567 Initial W =  [[0.63383748]] 
 , b =  [0.10596355]
step =  0 error value =  11.11645986462114 W =  [[0.22244129]] , b =  [0.05792264]
step =  400 error value =  2.8754905059773317 W =  [[0.43174241]] , b =  [-4.2726443]
step =  800 error value =  1.7583892053115018 W =  [[0.45985005]] , b =  [-5.72756416]
step =  1200 error value =  1.5032650366288312 W =  [[0.53565133]] , b =  [-6.73668177]
step =  1600 error value =  1.3425297462549586 W =  [[0.59603574]] , b =  [-7.53839843]
step =  2000 error value =  1.2286623197397937 W =  [[0.64698234]] , b =  [-8.2133548]
step =  2400 error value =  1.1421094302842594 W =  [[0.69147999]] , b =  [-8.80184179]
step =  2800 error value =  1.0731553024641696 W =  [[0.73125343]] , b =  [-9.32708854]
step =  3200 error value =  1.016349446029051 W =  [[0.76739556]] , b =  [-9.80379866]
step =  3600 error value =  0.9683626151064734 W =  [[0.80064624]] , b =  [-10.24191458]
step =  4000 error value =  0.92702874

In [23]:
y_pred = predict(X_test)
y_pred

[0, 0, 0, 0, 0, 0, 1, 1, 1]

## 2. class로 구현한 LogisticRegression_class

### 1을 참고하여 만든 모듈을 import하고 학습시켜주세요!

In [24]:
from LogisticRegression_class import *

In [25]:
model_class = LogisticRegression_cls(X_train, y_train)
model_class.train()

Initial error value =  16.075104375092234
step =  0 error value =  7.543478269675374
step =  400 error value =  2.6279170702445773
step =  800 error value =  1.7565454011813404
step =  1200 error value =  1.5021892728437451
step =  1600 error value =  1.3418003014041993
step =  2000 error value =  1.2281232501488022
step =  2400 error value =  1.141688296683317
step =  2800 error value =  1.0728133154471906
step =  3200 error value =  1.016063708348713
step =  3600 error value =  0.9681186142304529
step =  4000 error value =  0.9268167663660799
step =  4400 error value =  0.8906813844573943
step =  4800 error value =  0.8586634868344296
step =  5200 error value =  0.8299940165716682
step =  5600 error value =  0.8040941162764901
step =  6000 error value =  0.7805183009040323
step =  6400 error value =  0.758917145674615
step =  6800 error value =  0.7390120227219329
step =  7200 error value =  0.7205775358273067
step =  7600 error value =  0.7034290216267899
step =  8000 error value = 

In [26]:
y_pred = model_class.predict(X_test)

print(y_pred)

[0, 0, 0, 0, 0, 0, 1, 1, 1]


In [7]:
#정답
#model_class = LogisticRegression_cls(X_train, y_train)
#model_class.train()

initial error value= 9.764485842269702
step= 0  error value= 12.863813142431257
step= 400  error value= 2.846142249503101
step= 800  error value= 1.788197607309589
step= 1200  error value= 1.5204882308809238
step= 1600  error value= 1.3541484590332817
step= 2000  error value= 1.2372216736883672
step= 2400  error value= 1.1487820061829852
step= 2800  error value= 1.0785656006766844
step= 3200  error value= 1.020864695642025
step= 3600  error value= 0.9722149043393165
step= 4000  error value= 0.93037302454704
step= 4400  error value= 0.893812414806018
step= 4800  error value= 0.861452170808396
step= 5200  error value= 0.8325019353976557
step= 5600  error value= 0.8063681425340465
step= 6000  error value= 0.7825948518906034
step= 6400  error value= 0.7608250204761637
step= 6800  error value= 0.7407743492387093
step= 7200  error value= 0.7222131404261993
step= 7600  error value= 0.7049534137459633
step= 8000  error value= 0.6888395666098783
step= 8400  error value= 0.6737414788181381
step=

In [8]:
#정답
#y_pred = model_class.predict(X_test)

#print(y_pred)

[0, 0, 0, 0, 0, 0, 1, 1, 1]


### 3. sklearn.linear_model의 LogisticRegression과 비교

In [17]:
from sklearn.linear_model import LogisticRegression

model_sk = LogisticRegression()
model_sk.fit(X_train,y_train)

  return f(**kwargs)


LogisticRegression()

In [18]:
y_pred_sk = model_sk.predict(X_test) 

print(y_pred_sk)

[0 0 0 0 0 0 1 1 1]


* LogisticRegression_cls와 sklearn의 LogisticRegression 둘다 똑같이 예측하고있는 것을 볼 수 있다!