# Note

주어진 data에 대해서 coef를 최적화해서 구했지만,
통계적으로 데이터를 더 주었을 때 바뀔 확률이 P이다.

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import time

def stopwatch_begin():
    return time.time()

def stopwatch_end(start_time):
    end_time = time.time()
    response_time = end_time - start_time
    print('response time=%f seconds' % (response_time))
    
def load_data():
    EXCEL_FILE = 'score.xlsx'
    data = pd.read_excel(EXCEL_FILE)

    X = data[['homework', 'attendance', 'final']]
    X = np.array(X)
    y = data['score']
    y = np.array(y)

    return X, y

def least_square(X, y):
    X_const = sm.add_constant(X)
    
    model = sm.OLS(y, X_const)
    ls = model.fit()
    
    print(ls.summary())
    
    print(ls.params)
    
    ls_c = ls.params[0]
    ls_homework = ls.params[1]
    ls_attendance = ls.params[2]
    ls_final = ls.params[3]
    
    return ls_c, ls_homework, ls_attendance, ls_final

def gradient_descent(X, y):
    start_time = stopwatch_begin()
    
    epochs = 1000000
    min_grad = 0.000001
    learning_rate_m = 0.001
    learning_rate_c = 0.1
    
    num_params = 3
    
    c = 0.0
    m = [0.0]*num_params
    
    n = len(y)
    
    for epoch in range(epochs):
        c_partial = 0.0
        m_partial = [0.0]*num_params
        
        for i in range(n):
            y_pred = c
            for j in range(num_params):
                y_pred += m[j] * X[i][j]
                
            c_partial += (y_pred-y[i])
            for j in range(num_params):
                m_partial[j] += (y_pred - y[i])*X[i][j]
                
        c_partial *= 2/n
        for j in range(num_params):
            m_partial[j] *= 2/n
        
        delta_c = -learning_rate_c * c_partial
        delta_m = [0.0] * num_params
        for j in range(num_params):
            delta_m[j] = -learning_rate_m * m_partial[j]
            
        c += delta_c
        for j in range(num_params):
            m[j] += delta_m[j]
            
        break_condition = True
        
        if abs(delta_c) > min_grad:
            break_condition = False
        for j in range(num_params):
            if abs(delta_m[j]) > min_grad:
                break_condition = False
                
        if break_condition:
            break
            
        if (epoch % 1000 == 0):
            print('epoch:', epoch, 'delta_m=', delta_m, 'delta_c=', delta_c, 'm=', m, 'c=', c)
            
    stopwatch_end(start_time)
    
    return c, m[0], m[1], m[2]

def gradient_descent_vectorized(X, y):
    start_time = stopwatch_begin()
    
    epochs = 1000000
    min_grad = 0.000001
    learning_rate_m = 0.001
    learning_rate_c = 0.1
    
    num_params = 3
    
    m = np.zeros(num_params)
    c = 0.0
    
    n = len(y)
    
    for epoch in range(epochs):

        y_pred = np.sum( m * X, axis=1 ) + c
        # axis=1이면, row(tuple) 갯수 정보를 유지한다.

        c_partial = np.sum (2*(y_pred-y)) / n
        m_partial = np.sum(2*((y_pred-y) * np.transpose(X)), axis=1 ) /n

        delta_c = -learning_rate_c * c_partial
        delta_m = -learning_rate_m * m_partial

        if abs(delta_c) < min_grad and np.all(np.abs(delta_m)) < min_grad:
            break

        c += delta_c
        m += delta_m

        if ( epoch % 1000 == 0 ):
            print("""epoch %d: delta_c=%f, delta_homework=%f, delta_attendance=%f, delta_final=%f,
                      c=%f, homework=%f, attendance=%f, final=%f\n""" %(epoch, delta_c, delta_m[0], delta_m[1], delta_m[2], c, m[0], m[1], m[2]) )
        
    stopwatch_end(start_time)
            
    return c, m[0], m[1], m[2]

if __name__=='__main__':
    X, y = load_data()
    
    ls_c, ls_homework, ls_attendance, ls_final = least_square(X, y)
    gd_c, gd_homework, gd_attendance, gd_final = gradient_descent(X, y)
    input('Enter:')
    gdv_c, gdv_homework, gdv_attendance, gdv_final = gradient_descent_vectorized(X, y)
    
    print('ls_c=%f, ls_homework=%f, ls_attendance=%f, ls_final=%f' % (ls_c, ls_homework, ls_attendance, ls_final))
    print('gd_c=%f, gd_homework=%f, gd_attendance=%f, gd_final=%f' % (gd_c, gd_homework, gd_attendance, gd_final))
    print('gdv_c=%f, gdv_homework=%f, gdv_attendance=%f, gdv_final=%f' %(gdv_c, gdv_homework, gdv_attendance, gdv_final))

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.825
Model:                            OLS   Adj. R-squared:                  0.819
Method:                 Least Squares   F-statistic:                     137.9
Date:                Fri, 11 Nov 2022   Prob (F-statistic):           3.76e-33
Time:                        09:04:42   Log-Likelihood:                -300.38
No. Observations:                  92   AIC:                             608.8
Df Residuals:                      88   BIC:                             618.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -3.6583     15.619     -0.234      0.8

epoch: 30000 delta_m= [-1.9268600911490435e-07, 4.197094053259506e-06, -2.8559847846225152e-08] delta_c= -2.884407493768393e-05 m= [1.7193005639190801, 1.901132635001688, 1.4974339464408253] c= -3.4272849349907535
epoch: 31000 delta_m= [-1.700746639823336e-07, 3.704572868713208e-06, -2.5208402672093124e-08] delta_c= -2.5459276364853845e-05 m= [1.7191194300309385, 1.905078100314265, 1.4974070988440522] c= -3.4543997186227506
epoch: 32000 delta_m= [-1.5011671846364572e-07, 3.269848129433182e-06, -2.2250243572338553e-08] delta_c= -2.2471677613702585e-05 m= [1.7189595518678362, 1.908560572607153, 1.4973834017628496] c= -3.4783326328027515
epoch: 33000 delta_m= [-1.3250080049548068e-07, 2.886137530242356e-06, -1.9639218799492755e-08] delta_c= -1.983466802087024e-05 m= [1.7188184351099558, 1.9116343832504958, 1.4973624854900731] c= -3.4994570639957723
epoch: 34000 delta_m= [-1.1695207824696071e-07, 2.5474546563445823e-06, -1.7334593252072355e-08] delta_c= -1.7507106603647347e-05 m= [1.718693

epoch 18000: delta_c=-0.000129, delta_homework=-0.000001, delta_attendance=0.000019, delta_final=-0.000000,
                      c=-2.624976, homework=1.724660, attendance=1.784389, final=1.498228

epoch 19000: delta_c=-0.000114, delta_homework=-0.000001, delta_attendance=0.000017, delta_final=-0.000000,
                      c=-2.746240, homework=1.723850, attendance=1.802034, final=1.498108

epoch 20000: delta_c=-0.000100, delta_homework=-0.000001, delta_attendance=0.000015, delta_final=-0.000000,
                      c=-2.853274, homework=1.723135, attendance=1.817608, final=1.498002

epoch 21000: delta_c=-0.000089, delta_homework=-0.000001, delta_attendance=0.000013, delta_final=-0.000000,
                      c=-2.947748, homework=1.722504, attendance=1.831355, final=1.497909

epoch 22000: delta_c=-0.000078, delta_homework=-0.000001, delta_attendance=0.000011, delta_final=-0.000000,
                      c=-3.031136, homework=1.721947, attendance=1.843489, final=1.497826

epoch

epoch 66000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.655765, homework=1.717774, attendance=1.934379, final=1.497208

epoch 67000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.656068, homework=1.717772, attendance=1.934423, final=1.497207

epoch 68000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.656335, homework=1.717770, attendance=1.934462, final=1.497207

epoch 69000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.656571, homework=1.717769, attendance=1.934496, final=1.497207

epoch 70000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.656780, homework=1.717767, attendance=1.934526, final=1.497207

epoch

epoch 113000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658340, homework=1.717757, attendance=1.934753, final=1.497205

epoch 114000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658341, homework=1.717757, attendance=1.934754, final=1.497205

epoch 115000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658342, homework=1.717757, attendance=1.934754, final=1.497205

epoch 116000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658343, homework=1.717757, attendance=1.934754, final=1.497205

epoch 117000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658343, homework=1.717757, attendance=1.934754, final=1.497205



epoch 160000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658348, homework=1.717757, attendance=1.934755, final=1.497205

epoch 161000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658348, homework=1.717757, attendance=1.934755, final=1.497205

epoch 162000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658348, homework=1.717757, attendance=1.934755, final=1.497205

epoch 163000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658348, homework=1.717757, attendance=1.934755, final=1.497205

epoch 164000: delta_c=-0.000000, delta_homework=-0.000000, delta_attendance=0.000000, delta_final=-0.000000,
                      c=-3.658348, homework=1.717757, attendance=1.934755, final=1.497205

