In [1]:
import numpy as np
import pandas as pd

## 데이터 읽기

In [2]:
def read_data():
    data_name = input('Enter the name of data file [(ex) sample1.csv]: ')
    coding_fm = int(input("Select the data coding format(1 = 'a b c' or 2 = 'a,b,c'): "))
    separator_fm = {coding_fm == 1 : ' '}.get(True, ",")
    res_pos = int(input('Enter the column position of the response variable : [from 1 to p]:')) - 1
    header = input('Does the data have column header? (y/n):')
    
    if header == 'y':
        trdata = pd.read_csv(data_name, sep=separator_fm)
        res_col = trdata.columns[res_pos]
        tr_response = trdata[res_col]
        tr_feature = trdata.drop(res_col, axis = 1)
    
    else:
        trdata = pd.read_csv(data_name, sep=separator_fm, header=None)
        tr_response = trdata[res_pos]
        tr_feature = trdata.drop(res_pos, axis = 1)

    out_name = input('Enter the output file name to export [(ex) result.txt]:')
    return tr_feature, tr_response, out_name

In [3]:
tr_x, tr_y, out_name = read_data()

Enter the name of data file [(ex) sample1.csv]: sample1.csv
Select the data coding format(1 = 'a b c' or 2 = 'a,b,c'): 2
Enter the column position of the response variable : [from 1 to p]:3
Does the data have column header? (y/n):y
Enter the output file name to export [(ex) result.txt]:result.txt


In [4]:
ones = np.ones((tr_x.shape[0], 1))
tr_xc = np.concatenate([ones, tr_x], axis=1)

## Gradient Descent

In [5]:
class GradientDescent:
    def __init__(self):
        self.beta = np.random.rand(tr_xc.shape[1], 1)
    
    def forward(self, x):
        reg = np.dot(x, self.beta)
        exp_reg = np.exp(reg)
        self.y_hat = exp_reg / (1+exp_reg)
    
    def gradient(self, x, y):
        self.grad = np.mean((self.y_hat - y.values.reshape(-1,1)) * x, axis=0).reshape(-1,1)
        return self.grad
    
    def train(self, x, y, epoch, lr=0.01):
        for i in range(epoch):
            self.forward(x)
            self.gradient(x, y)
            self.beta -= lr * self.grad
        return self.beta

In [6]:
gd = GradientDescent()

In [7]:
beta = gd.train(tr_xc, tr_y, 100000)

In [8]:
beta

array([[0.16829923],
       [2.82177564],
       [2.81929547]])

## Statmodels

In [9]:
import statsmodels.api as sm

In [10]:
lr = sm.Logit(tr_y ,tr_xc)

In [11]:
lr_fit = lr.fit()

Optimization terminated successfully.
         Current function value: 0.283800
         Iterations 8


In [12]:
lr_rst = lr_fit.params

In [13]:
lr_rst

const    0.168299
x1       2.821777
x2       2.819296
dtype: float64

## Output

In [14]:
text = f'''Coefficients by Gradient Descent Method
---------------------------------------
Constant: {beta[0, 0]:.3f}
'''

In [15]:
for idx, val in enumerate(beta[1:]):
    text += f'Beta{idx+1}: {val[0]:.3f}\n'

In [16]:
text += f'''
Coefficients by Statmodels
--------------------------
Constant: {lr_rst[0]:.3f}
'''

In [17]:
for idx, val in enumerate(lr_rst[1:]):
    text += f'Beta{idx+1}: {val:.3f}\n'

In [18]:
file = open(out_name, "w") 
file.write(text)
file.close()