### 규제
* 학습이 과대적합도는 것을 방지하고자 하는 알고리즘
* 라쏘(Lasso)
    - L1규제를 추가한 모형
    - 영향력이 크지 않은 회귀계수 값을 0으로 만드는 특성이 있음
        * 회귀계수 : 독립변수의 값이 변화함에 따라 종속변수에 미치는 영향력 크기
    - alpha를 이용하여 가중치 제어. alpha값에 따라 과적합될 우려가 있음
    - 영향력이 작은 회귀계수를 0으로 만듦으로써 모델에서 가장 중요한 특성이 무엇인지 알 수 있음
* 릿지(Ridge)
    - L2규제를 추가한 모형
    - 계수값을 0이 아닌 작게 만드는 특성이 있음
    - alpha를 이용하여 가중치 제어. alpha값에 따라 과적합될 우려가 있음
* 엘라스틱넷(ElasticNet)
    - L1, L2를 함께 결합한 모형
    - 피처가 많은 데이터세트에 적용
    - L1 규제로 feature의 수를 줄이고 L2규제로 계수값의 크기를 조정
    - 파라미터
        * alpha : L1규제의 alpha(a) + L2규제의 alpha(b). L1과 L2의 alpha를 합쳐 놓은 것
        * l1_ratio = 0 : 0에 가까워 질수록 L2규제와 동일
        * l1_ratio = 1 : 1에 가까워 질수록 L1규제와 동일
        * 0 < l1_ratio < 1 : L1과 L2규제를 적절히 적용
* 계수 : 계산해서 얻은 값

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [5]:
df = pd.read_csv('data/boston.csv')
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [6]:
df.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'PRICE'],
      dtype='object')

In [7]:
f = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT']
label = 'PRICE'
X, y = df[f], df[label]

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [9]:
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
# alpha : 규제 강도
# 수치가 높을수록 강한 강도이다
# 강도가 높다 : 영향력이 높은것들도 0으로 만듦
lasso = Lasso( alpha = 0.07 )
lasso.fit( X_train, y_train )

train_pred = lasso.predict( X_train )
test_pred = lasso.predict( X_test )

print("score : ", lasso.score(X_train, y_train) ,
                "mse : ", mean_squared_error(train_pred, y_train))
print("score : ", lasso.score(X_test, y_test) ,
                "mse : ", mean_squared_error(test_pred, y_test))

score :  0.7336186464695609 mse :  23.70890917778461
score :  0.7045234961897899 mse :  21.788116131885268


In [10]:
alphas = [0.07, 0.1, 0.5, 1.3, 2]

for a in alphas:
    lasso = Lasso( alpha = a )
    lasso.fit( X_train, y_train )

    train_pred = lasso.predict( X_train )
    test_pred = lasso.predict( X_test )

    print("alpha : ", a)
    print("score : ", lasso.score(X_train, y_train) ,
                    "mse : ", mean_squared_error(train_pred, y_train))
    print("score : ", lasso.score(X_test, y_test) ,
                    "mse : ", mean_squared_error(test_pred, y_test))
    print("-" * 50)

alpha :  0.07
score :  0.7336186464695609 mse :  23.70890917778461
score :  0.7045234961897899 mse :  21.788116131885268
--------------------------------------------------
alpha :  0.1
score :  0.7325349938898179 mse :  23.805358198156686
score :  0.7037932949950232 mse :  21.841960374070556
--------------------------------------------------
alpha :  0.5
score :  0.7196182068566637 mse :  24.95499921686577
score :  0.6901498661589456 mse :  22.848011982520976
--------------------------------------------------
alpha :  1.3
score :  0.6628142193807436 mse :  30.01076067371531
score :  0.6175787624340858 mse :  28.19932626770679
--------------------------------------------------
alpha :  2
score :  0.6354173075418571 mse :  32.44918545807511
score :  0.5821755592810526 mse :  30.809919975809137
--------------------------------------------------


In [11]:
from sklearn.model_selection import GridSearchCV

params = {"alpha" : [0.07, 0.1, 0.5, 1.3, 2] }

lasso = Lasso()

grid_cv = GridSearchCV(lasso, param_grid=params, cv=5)
grid_cv.fit(X_train, y_train)

print("최적의 하이퍼 파라미터 : ", grid_cv.best_params_)
print("train : ", grid_cv.score(X_train, y_train))
print("test : ", grid_cv.score(X_test, y_test))

최적의 하이퍼 파라미터 :  {'alpha': 0.5}
train :  0.7196182068566637
test :  0.6901498661589456


In [12]:
lasso = Lasso(alpha=3)
lasso.fit( X_train, y_train )
print(X_train.columns )
lasso.coef_

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')


array([-0.        ,  0.0437953 , -0.        ,  0.        , -0.        ,
        0.        ,  0.04711327, -0.        ,  0.07997108, -0.0082593 ,
       -0.18599016,  0.00737554, -0.84348348])

In [13]:
alphas

[0.07, 0.1, 0.5, 1.3, 2]

In [14]:
coeff_df = pd.DataFrame(index=X_train.columns)

for idx, alpha in enumerate( alphas ):
    lasso = Lasso(alpha= alpha)
    lasso.fit(X_train, y_train)
    col_name = "alpha : "+str(alpha)
    coeff_df[col_name] = lasso.coef_
coeff_df

Unnamed: 0,alpha : 0.07,alpha : 0.1,alpha : 0.5,alpha : 1.3,alpha : 2
CRIM,-0.107781,-0.106979,-0.088324,-0.046905,-0.016572
ZN,0.049919,0.049936,0.049913,0.050435,0.039768
INDUS,-0.003465,-0.0,0.0,0.0,0.0
CHAS,1.589894,1.11237,0.0,0.0,0.0
NOX,-0.0,-0.0,-0.0,-0.0,-0.0
RM,3.669726,3.588068,2.387918,0.0,0.0
AGE,-0.010875,-0.009105,0.006547,0.036855,0.049866
DIS,-1.23998,-1.222296,-0.993397,-0.495173,-0.031506
RAD,0.278164,0.281092,0.275082,0.249702,0.178452
TAX,-0.013689,-0.013934,-0.013989,-0.013391,-0.010669


In [15]:
from sklearn.linear_model import Ridge

alphas = [0.01, 0.1, 1, 10, 100]

In [16]:
for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train, y_train)
    
    train_pred = ridge.predict(X_train)
    test_pred = ridge.predict(X_test)
    
    train_score = ridge.score(X_train, y_train)
    test_score = ridge.score(X_test, y_test)
    
    train_mse = mean_squared_error(train_pred, y_train)
    test_mse = mean_squared_error(test_pred, y_test)
    
    print("alpha : ", alpha)
    print("train : ", train_score, "mse : ", train_mse)
    print("train : ", test_score, "mse : ", test_mse)
    print("-" * 50)

alpha :  0.01
train :  0.7492048923564809 mse :  22.321676613425044
train :  0.7072754687758787 mse :  21.585188665490232
--------------------------------------------------
alpha :  0.1
train :  0.7490962057488142 mse :  22.331350116753484
train :  0.7084675185411868 mse :  21.497288211861605
--------------------------------------------------
alpha :  1
train :  0.7457309393960452 mse :  22.63087106016567
train :  0.7108745419243823 mse :  21.31979692463928
--------------------------------------------------
alpha :  10
train :  0.7364128200603898 mse :  23.460217567002044
train :  0.7057894238337914 mse :  21.69476800380607
--------------------------------------------------
alpha :  100
train :  0.7194602097838179 mse :  24.969061530915955
train :  0.6866244921087381 mse :  23.107969231993486
--------------------------------------------------


In [17]:
alphas

[0.01, 0.1, 1, 10, 100]

In [18]:
coeff_df = pd.DataFrame(index=X_train.columns)

for alpha in alphas:
    ridge = Ridge(alpha= alpha)
    ridge.fit(X_train, y_train)
    col_name = "alpha : "+str(alpha)
    coeff_df[col_name] = ridge.coef_
coeff_df

Unnamed: 0,alpha : 0.01,alpha : 0.1,alpha : 1,alpha : 10,alpha : 100
CRIM,-0.119371,-0.118627,-0.114791,-0.110659,-0.103788
ZN,0.047389,0.047583,0.048644,0.050689,0.0554
INDUS,0.062431,0.056318,0.024907,-0.005219,-0.004503
CHAS,2.976485,2.943507,2.724742,1.889245,0.529891
NOX,-20.66315,-19.046768,-10.682159,-1.969617,-0.203893
RM,3.653984,3.666812,3.71769,3.503934,1.972937
AGE,0.001758,0.000494,-0.005861,-0.009432,0.007144
DIS,-1.61794,-1.591876,-1.456501,-1.304163,-1.115921
RAD,0.322895,0.318895,0.298895,0.28826,0.322832
TAX,-0.011698,-0.011805,-0.012396,-0.013533,-0.015522


In [19]:
from sklearn.linear_model import ElasticNet

#ㅣ1_ratios : 0에 가까울수록 L2(릿지) 규제에 가까움
#ㅣ1_ratios : 1에 가까울수록 L1(랏소) 규제에 가까움

ratios = [0.2, 0.5, 0.8] 
alphas =  [0.1, 0.7, 1.5] # 규제 강도

In [20]:
el = ElasticNet(alpha=0.7, l1_ratio=0.2)
el.fit(X_train, y_train)

print("train : ", el.score(X_train, y_train))
print("test : ", el.score(X_test, y_test))

train :  0.7036000586928309
test :  0.6686165217868851


In [21]:
params = {
    "alpha" : alphas,
    "l1_ratio" : ratios
}

el = ElasticNet()
grid_cv = GridSearchCV(el, param_grid=params, cv=5)
grid_cv.fit(X_train, y_train)

print("최적의 하이퍼 파라미터 : ", grid_cv.best_params_)
print("train : ", grid_cv.score(X_train, y_train))
print("test : ", grid_cv.score(X_test, y_test))

최적의 하이퍼 파라미터 :  {'alpha': 0.1, 'l1_ratio': 0.2}
train :  0.7308936299152904
test :  0.7002511262035458
