In [3]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# 폴드 평균 rmse를 출력하고 회귀 계수값들을 DataFarme으로 바꾸는 함수.

def get_linear_reg_eval(model_name, params = None, X_data_n = None, y_target_n = None, verbose = True):
    coeff_df = pd.DataFrame()
    if verbose:
        print('#####', model_name, '#####')
    for param in params:
        if model_name == 'Ridge':
            model = Ridge(alpha=param)
        elif model_name == 'Lasso':
            model = Lasso(alpha = param)
        elif model_name == 'ElasticNet':
            model = ElasticNet(alpha=param, l1_ratio=0.7)
        neg_mse_scores = cross_val_score(model, X_data_n, y_target_n, scoring='neg_mean_squared_error', cv=5)
        avg_rmse = np.mean(np.sqrt(-1*neg_mse_scores))
        print('alpha {}일 때 5 폴드 세트의 평균 RMSE : {:.3f}'.format(param,avg_rmse))
        
        model.fit(X_data, y_target)
#       alpha에 따른 피처별 회귀 계수를 Series로 변환 이를 DataFrame칼럼으로 추가
        coeff = pd.Series(data=model.coef_, index=X_data.columns)
        colname='alpha:'+str(param)
        coeff_df[colname] =coeff
    return coeff_df

boston = load_boston()

bostonDF = pd.DataFrame(boston.data, columns = boston.feature_names)
bostonDF['PRICE'] = boston.target

X_data = bostonDF.iloc[:, :-1]
y_target = bostonDF.iloc[:, -1]

lasso_alphas = [0.07, 0.1, 0.5, 1, 3]
coeff_lasso_df = get_linear_reg_eval('Lasso', params=lasso_alphas, X_data_n = X_data, y_target_n = y_target)


# 알파값에 따른 피처별 회귀 계수
sort_column = 'alpha:'+str(lasso_alphas[0])
coeff_lasso_df.sort_values(by=sort_column, ascending=False)


# lasso 회귀는 불필여한 피처를 급격하게 감소시켜 0으로 만든다.


##### Lasso #####
alpha 0.07일 때 5 폴드 세트의 평균 RMSE : 5.612
alpha 0.1일 때 5 폴드 세트의 평균 RMSE : 5.615
alpha 0.5일 때 5 폴드 세트의 평균 RMSE : 5.669
alpha 1일 때 5 폴드 세트의 평균 RMSE : 5.776
alpha 3일 때 5 폴드 세트의 평균 RMSE : 6.189


Unnamed: 0,alpha:0.07,alpha:0.1,alpha:0.5,alpha:1,alpha:3
RM,3.789725,3.703202,2.498212,0.949811,0.0
CHAS,1.434343,0.95519,0.0,0.0,0.0
RAD,0.270936,0.274707,0.277451,0.264206,0.061864
ZN,0.049059,0.049211,0.049544,0.049165,0.037231
B,0.010248,0.010249,0.009469,0.008247,0.00651
NOX,-0.0,-0.0,-0.0,-0.0,0.0
AGE,-0.011706,-0.010037,0.003604,0.02091,0.042495
TAX,-0.01429,-0.01457,-0.015442,-0.015212,-0.008602
INDUS,-0.04212,-0.036619,-0.005253,-0.0,-0.0
CRIM,-0.098193,-0.097894,-0.083289,-0.063437,-0.0
