## optimization on logistic regression

In [2]:
import numpy as np
import pandas as pd
from patsy import dmatrices
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt

data = pd.read_csv("HR_comma_sep.csv")

In [4]:
y, X = dmatrices('left~satisfaction_level+last_evaluation+number_project+average_montly_hours+time_spend_company+Work_accident+promotion_last_5years+C(sales)+C(salary)', data, return_type='dataframe')
X_names = X.columns
y, X = np.ravel(y), X.values

## normalization

In [9]:
for i in range(1, X.shape[1]):
    xmin = X[:, i].min()
    xmax = X[:, i].max()
    X[:, i] = (X[:, i] - xmin) / (xmax - xmin)

## gradient descent

In [10]:
np.random.seed(1)
alpha = 1 #learning rate

#initialization
beta = np.random.randn(X.shape[1])

for T in range(200):
    proba = 1. / (1 + np.exp(-np.matmul(X, beta))) #(14999, )
    proba_y = list(zip(proba, y))
    loss = - np.sum(np.log(p) if y==1 else np.log(1-p) for p,y in proba_y) / len(y)
    error_rate = 0
    for i in range(len(y)):
        if (proba[i] >=0.5 and y[i] == 0) or (proba[i] < 0.5 and y[i] == 1):
            error_rate += 1
    error_rate /= len(y)
    
    if T % 5 == 0:
        print('T = {}, loss = {}, error rate = {}'.format(T, loss, error_rate))
    
    deriv = np.zeros(X.shape[1])
    for i in range(len(y)):
        deriv += X[i] * (proba[i] - y[i])
    deriv /= len(y)
    beta -= alpha * deriv

  loss = - np.sum(np.log(p) if y==1 else np.log(1-p) for p,y in proba_y) / len(y)


T = 0, loss = 1.1203823278066718, error rate = 0.5037002466831122
T = 5, loss = 0.6492666637968592, error rate = 0.2910194012934196
T = 10, loss = 0.6095807663133694, error rate = 0.26668444562970867
T = 15, loss = 0.5816449211566243, error rate = 0.25888392559503964
T = 20, loss = 0.5607552377630787, error rate = 0.2526835122341489
T = 25, loss = 0.5450244708251245, error rate = 0.2481498766584439
T = 30, loss = 0.5328795073088821, error rate = 0.24854990332688845
T = 35, loss = 0.5231836380769039, error rate = 0.2474164944329622
T = 40, loss = 0.5151826947262421, error rate = 0.24234948996599773
T = 45, loss = 0.5083944951258156, error rate = 0.23968264550970064
T = 50, loss = 0.5025111703564583, error rate = 0.23728248549903327
T = 55, loss = 0.49733135447626586, error rate = 0.23581572104806986
T = 60, loss = 0.4927180171532152, error rate = 0.23448229881992133
T = 65, loss = 0.48857356128232937, error rate = 0.23381558770584707
T = 70, loss = 0.48482536350449806, error rate = 0.23

In [12]:
pd.DataFrame(zip(X_names, beta))

Unnamed: 0,0,1
0,Intercept,0.36151
1,C(sales)[T.RandD],-0.733954
2,C(sales)[T.accounting],-0.253959
3,C(sales)[T.hr],-0.191602
4,C(sales)[T.management],-0.337145
5,C(sales)[T.marketing],-0.772456
6,C(sales)[T.product_mng],-0.086809
7,C(sales)[T.sales],-0.214873
8,C(sales)[T.support],-0.148131
9,C(sales)[T.technical],-0.151191
