In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

In [2]:
df = pd.read_csv("E:/21KDT-Project-master/4. dms_seoul_avg.csv")
df.head()

Unnamed: 0,date,mosquito,temp,rain_per_day,accum_rain,wind,humidity,sunshine
0,2015-04-06,199,12.214286,5.285714,0,3.371429,68.757143,8.88
1,2015-04-07,146,11.571429,4.928571,0,3.428571,65.742857,9.891429
2,2015-04-08,90,10.914286,4.571429,0,3.257143,63.542857,9.927143
3,2015-04-09,172,10.171429,0.571429,0,2.871429,59.614286,10.884286
4,2015-04-10,249,10.314286,0.071429,0,2.871429,53.485714,12.777143


In [3]:
y_target = df['mosquito'].values 
X_data = df.drop(['mosquito','date'], axis=1).values 

In [4]:
from sklearn.linear_model import Lasso, ElasticNet

def get_linear_reg_eval(model_name, params = None, X_data_n = None, y_target_n = None, verbose = True):
    coeff_df = pd.DataFrame()
    if verbose : 
        print('####### ', model_name , '#######')
    for param in params:
        if model_name == 'Ridge' : 
            model = Ridge(alpha = param)
        elif model_name == 'Lasso' :
            model = Lasso(alpha = param)
        elif model_name == 'ElasticNet' :
            model = ElasticNet(alpha = param, l1_ratio = 0.7)
        
        neg_mse_scores = cross_val_score(model, X_data_n, y_target_n, scoring = 'neg_mean_squared_error',cv=5)
        avg_rmse = np.mean(np.sqrt(-1 * neg_mse_scores))
        
        print('alpha {0} 일 때 5 folds 의 평균 RMSE : {1:.3f}'.format(param,avg_rmse))
        
        model.fit(X_data, y_target)
        coeff = pd.Series(data=model.coef_, index = ['temp', 'rain_per_day', 'accum_rain', 'wind', 'humidity', 'sunshine'])
        colname = 'alpha:' +str(param)
        coeff_df[colname] = coeff
    return coeff_df

In [10]:
from sklearn.linear_model import Lasso, ElasticNet

elastic_alphas = [0.07, 0.1, 0.5, 1, 50, 70, 100]
coeff_elastic_df = get_linear_reg_eval('ElasticNet', params = elastic_alphas, X_data_n = X_data, y_target_n = y_target)

#######  ElasticNet #######
alpha 0.07 일 때 5 folds 의 평균 RMSE : 2163.231
alpha 0.1 일 때 5 folds 의 평균 RMSE : 2161.779
alpha 0.5 일 때 5 folds 의 평균 RMSE : 2151.263
alpha 1 일 때 5 folds 의 평균 RMSE : 2146.270
alpha 50 일 때 5 folds 의 평균 RMSE : 2128.736
alpha 70 일 때 5 folds 의 평균 RMSE : 2140.511
alpha 100 일 때 5 folds 의 평균 RMSE : 2158.940


In [11]:
sort_column = 'alpha:'+str(elastic_alphas[0])
coeff_elastic_df.sort_values(by = sort_column, ascending = False)

Unnamed: 0,alpha:0.07,alpha:0.1,alpha:0.5,alpha:1,alpha:50,alpha:70,alpha:100
wind,834.247351,801.718361,526.356063,366.823373,6.702597,3.7108,1.585722
temp,288.235906,287.332581,278.855102,272.459005,141.451036,120.417697,98.731523
accum_rain,3.641306,3.620461,3.435276,3.311721,0.658278,0.140322,-0.0
humidity,3.259324,3.327508,4.170278,5.124924,26.4677,26.661325,25.72522
sunshine,-36.562995,-36.106619,-31.716582,-28.250347,13.790961,13.928748,12.689894
rain_per_day,-71.166574,-70.822122,-67.806733,-65.876305,-36.575183,-30.362567,-24.027431
