# Scikit-learn을 활용하여 GD 실행하기

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('reg_data/mlb_sample.csv')
df['smsa'] = df['smsa']/1000000
columns = ['wp', 'ps', 'ws', 'AL', 'smsa']
X = df[columns].to_numpy()
y = np.ravel(df['rev'])

최신 버전의 scikit-learn에서는 gradient descent 는 Stochastic Gradient Descent 모형에 포함되었습니다.\
tol은 Tolerance level로 이터레이션이 다 안되어도 중간에 차이가 tolerance level이하로 내려가면(수렴하면) 트레이닝을 종료합니다.

In [3]:
from sklearn.linear_model import SGDRegressor

# create an instance of the SGDRegressor class
model = SGDRegressor(loss='squared_error', alpha = 0.01, max_iter=1000, tol=1e-3)

# fit the model to the data
model.fit(X, y)

# extract the intercept and coefficients
intercept = model.intercept_
coefficients = model.coef_

print("Intercept: ", intercept)
print("Coefficients: ", coefficients)


Intercept:  [109.28824712]
Coefficients:  [45.85889199 19.27881121  6.22048094  8.45479541  4.16668441]


In [4]:
alphas = np.array([1,0.1,0.01,0.001,0.0001,0.00001])
for alpha in alphas:
    model = SGDRegressor(loss='squared_error', alpha = alpha, max_iter=1000)
    model.fit(X,y)
    coefficients = model.coef_
    print(f"Coefficients when alpha is {alpha}: ", coefficients)

Coefficients when alpha is 1.0:  [1.0071981  4.46802647 1.51632173 1.74099871 5.3997742 ]
Coefficients when alpha is 0.1:  [22.77146755 15.58004197  4.28142524  7.4809916   6.30937567]
Coefficients when alpha is 0.01:  [45.61812106 19.75076803  5.95041273 10.73184124  5.27623791]
Coefficients when alpha is 0.001:  [49.21165422 19.64158183  6.3222367   7.85860863  4.36638879]
Coefficients when alpha is 0.0001:  [49.12820556 21.94300682  4.82566144 12.4654312   7.57876826]
Coefficients when alpha is 1e-05:  [49.46284762 21.0400917   5.50654192 11.63370312  5.72025395]
