## p329 선형회귀 모델을 위한 회귀 변환

In [1]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,MinMaxScaler,PolynomialFeatures
from sklearn.linear_model import Lasso,ElasticNet

In [2]:
boston = load_boston()
bostonDF = pd.DataFrame(boston.data,columns=boston.feature_names)
bostonDF['PRICE'] = boston.target
y = bostonDF.PRICE
X = bostonDF.drop(columns=['PRICE'])

In [5]:
def get_scaled_data(method='None',p_degree=None,input_data=None):
    if method == 'Standard':
        scaled_data = StandardScaler().fit_transform(input_data)
    elif method == 'MinMax':
        scaled_data = MinMaxScaler().fit_transform(input_data)
    elif method == 'Log':
        scaled_data = np.log1p(input_data)
    else:
        scaled_data = input_data
    if p_degree != None:
        scaled_data = PolynomialFeatures(degree=p_degree,
                                         include_bias=False).fit_transform(scaled_data)
    return scaled_data

def get_linear_reg_eval(model_name,params=None,X=None,y=None,verbose=True):
    coeff_df=pd.DataFrame()
    if verbose:
        print('#####',model_name,'#####')
    for param in params:
        if model_name == 'Ridge':
            model = Ridge(alpha=param)
        elif model_name == 'Lasso':
            model = Lasso(alpha=param)
        elif model_name == 'ElasticNet':
            model = ElasticNet(alpha=param,l1_ratio=0.7)
        neg_mse_scores = cross_val_score(model,
                                         X,
                                         y,
                                         scoring='neg_mean_squared_error',
                                         cv=5)
        avg_rmse = np.mean(np.sqrt(-1*neg_mse_scores))
        print(f'alpha : {param} 일때 5폴드 세트의 평균 rmse: {avg_rmse:.3f}')
        model.fit(X,y)
        coeff = pd.Series(data=model.coef_,index=X.columns)
        colname = 'alpha:'+str(param)
        coeff_df[colname]=coeff
    return coeff_df

In [8]:
alphas = [0.1,1,10,100]
scale_methods = [('None',None),
                 ('Standard',None),
                 ('Standard',2),
                 ('MinMax',None),
                 ('MinMax',2),
                 ('Log',None)]
for scale_method in scale_methods:
    X_data_scaled = get_scaled_data(scale_method[0],scale_method[1],X)
    print(f'\n##변환유형:{scale_method[0]}, Polynomial Degree:{scale_method[1]}')
    get_linear_reg_eval('Ridge',alphas,X_data_scaled,y)


##변환유형:None, Polynomial Degree:None
##### Ridge #####
alpha : 0.1 일때 5폴드 세트의 평균 rmse: 5.788
alpha : 1 일때 5폴드 세트의 평균 rmse: 5.653
alpha : 10 일때 5폴드 세트의 평균 rmse: 5.518
alpha : 100 일때 5폴드 세트의 평균 rmse: 5.330

##변환유형:Standard, Polynomial Degree:None
##### Ridge #####
alpha : 0.1 일때 5폴드 세트의 평균 rmse: 5.826


AttributeError: 'numpy.ndarray' object has no attribute 'columns'

## 교재 5.3 Gradient Descent 참고