## Logistic Regression 에서 MLE에 대한 GD 구현

### Chain Rule 정리  

$ h = W_{1}X_{1} + W_{2}X_{2} + b  $  
$ p = \frac{1}{1 + exp(-h)}  $  
$ L^* =  \sum_{i}^{N}{t_{i} log(p) + (1-t_{i})log(1-p) } $  
$ J^* = - \sum_{i}^{N}{t_{i} log(p) + (1-t_{i})log(1-p) } $  

$ \frac{\partial{J^*}}{\partial{p}} = - \sum_{i}^{N}{( \frac{t_i}{p_i} - \frac{1-t_i}{1-p_i} )} $  
$ \frac{\partial{p_i}}{\partial{h}} = p_i(1-p_i)$  
$ \frac{\partial{h}}{\partial{W_i}} = X_i $  
$ \frac{\partial{h}}{\partial{b}} = 1 $

In [9]:
import pandas as pd
import numpy as np

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [18]:
class Logistic:
    def __init__(self):
        
        self.params = {}
        self.params['W'] = 0.01 * np.random.randn(2, 1)
        self.params['b'] = np.ones(1)
    def forward(self, X):
        #Sigmoid 함수
        W = self.params['W']
        b = self.params['b']
        h = np.dot(X, W) + b
        p = 1 / (1 + np.exp(-h))
        
        return p
    
    def loss(self, X, T):
        
        p = self.forward(X)
        
        #Log_Likelihood
        L = np.dot(T.reshape(1,200), np.log(p)) + np.dot(1 - T.reshape(1,200), np.log(1 - p))
        L = np.squeeze(L)
        #for i in range(len(X)):
        #for문으로 각 데이터의 Log_Likelihood를 더해준다.
        #    L += ( T[i] * np.log(p[i]) + (1 - T[i]) * np.log(1 - p[i]) )
        
        #목적함수는 -Log_Likelihood
        return -L
    
    def gradient(self, X, T, learning_rate = 0.0001):
        
        p = self.forward(X)
        #T = np.array(T)
        T = T.reshape(-1,1)
        #목적함수에 대한 가중치 미분값을 담을 zero array 생성
        grads = {}
        grads['W'] = np.zeros((2, 1))
        grads['b'] = np.zeros(1)
        
        #목적함수에 대한 가중치 미분값 합 구하기
        grads['W'] = -np.dot(X.T, (T - T*p) - (p - T*p))
        grads['b'] = -np.sum((T - T*p) - (p - T*p))
#         for i in range(len(X)):
#             #for문으로 가중치에 대한 목적함수 미분 값 모두 더하기
#             grads['W'] -= ((T[i] * (1-p[i])) - ((1-T[i]) * p[i])) * X[i].reshape(2,1)
#             #print(i, ":  ", grads['W'])
#             grads['b'] -= ((T[i] * (1-p[i])) - ((1-T[i]) * p[i])) * 1
            
#             if i == len(X) - 1:
#                 grads['W'] /= len(X)
#                 grads['b'] /= len(X)
        self.params['W'] -= learning_rate * grads['W']
        self.params['b'] -= learning_rate * grads['b']

In [19]:
data = pd.read_csv("assignment_2.csv")
data.head()

Unnamed: 0,Label,bias,experience,salary
0,1,1,0.7,48000
1,0,1,1.9,48000
2,1,1,2.5,60000
3,0,1,4.2,63000
4,0,1,6.0,76000


In [20]:
data.shape

(200, 4)

In [21]:
del data["bias"]
data["salary"] = 0.0001 * data["salary"]



X = data[["experience", "salary"]].values
T = data["Label"].values

In [22]:
l = Logistic()

In [23]:
l.forward(X)

array([[0.73673392],
       [0.73659074],
       [0.73794744],
       [0.73810166],
       [0.73942951],
       [0.73854061],
       [0.73925172],
       [0.74059953],
       [0.73993791],
       [0.73978402],
       [0.73612568],
       [0.73803068],
       [0.73931063],
       [0.73941766],
       [0.73688872],
       [0.74095337],
       [0.73933482],
       [0.73869497],
       [0.73765047],
       [0.73836266],
       [0.73932308],
       [0.73784002],
       [0.74011552],
       [0.73761452],
       [0.74069381],
       [0.73723355],
       [0.74213094],
       [0.73724583],
       [0.73786357],
       [0.74022197],
       [0.74263614],
       [0.73850558],
       [0.73650744],
       [0.73672199],
       [0.74006795],
       [0.73638819],
       [0.73460722],
       [0.74081204],
       [0.73651914],
       [0.73947691],
       [0.73938199],
       [0.73744808],
       [0.73673392],
       [0.73595851],
       [0.73488267],
       [0.73720984],
       [0.73600608],
       [0.739

In [24]:
l.loss(X, T)

214.45453337245735

In [25]:
for i in range(100000):
    l.gradient(X, T)
    if i % 10000 == 0:
        print(i, "번째 Loss : ", l.loss(X, T))

0 번째 Loss :  162.07983407268182
10000 번째 Loss :  62.36711344701551
20000 번째 Loss :  59.184836759112386
30000 번째 Loss :  58.16938245213855
40000 번째 Loss :  57.78084133330186
50000 번째 Loss :  57.6169178953367
60000 번째 Loss :  57.54364690465924
70000 번째 Loss :  57.50968659626132
80000 번째 Loss :  57.49356960148934
90000 번째 Loss :  57.48579874722408


In [26]:
l.params

{'W': array([[ 1.57954273],
        [-2.80429987]]), 'b': array([8.71042931])}