In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import numpy as np

warnings.filterwarnings('ignore')
plt.rc('font',family='Batang')
plt.rcParams["figure.figsize"] = (10,10)

In [None]:
buy_weather_total = pd.read_csv("../활용 데이터셋/buy_weather_total.csv", encoding ='cp949', parse_dates=["일시"])

In [None]:
buy_weather_total

## 온도 - 냉방

In [None]:
buy_weather_tpr = pd.read_csv("../활용 데이터셋/buy_weather_tpr.csv", encoding ='cp949', parse_dates=["일시"])

In [None]:
buy_weather_tpr

## 온도 - 난방

In [None]:
buy_weather_tph = pd.read_csv("../활용 데이터셋/buy_weather_tph.csv", encoding ='cp949', parse_dates=["일시"])

In [None]:
buy_weather_tph

## 습도

In [None]:
buy_weather_hm = pd.read_csv("../활용 데이터셋/buy_weather_hm.csv", encoding ='cp949', parse_dates=["일시"])

In [None]:
buy_weather_hm

## 공기

In [None]:
buy_weather_ar = pd.read_csv("../활용 데이터셋/buy_weather_ar.csv", encoding ='cp949', parse_dates=["일시"])

In [None]:
buy_weather_ar

## 냉난방

In [None]:
buy_weather_tp = pd.read_csv("../활용 데이터셋/buy_weather_tp.csv", encoding ='cp949', parse_dates=["일시"])

In [None]:
buy_weather_tp

### 평가기준

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso, ElasticNet
from sklearn.metrics import mean_absolute_error


In [None]:
def MAPE(y_test, y_pred):
    return np.mean(np.abs((y_test - y_pred) / y_test)) * 100 

def MPE(y_test, y_pred): 
    return np.mean((y_test - y_pred) / y_test) * 100

def get_rmse(model):
    pred = model.predict(X_test)
    mse = mean_squared_error(y_test , pred)
    rmse = np.sqrt(mse)
    mpe = MPE(y_test,pred)
    mape = MAPE(y_test,pred)
    mae = mean_absolute_error(y_test, pred)

    r2 = r2_score(y_test, pred)
    
    print('{0} 로그 변환된 RMSE: {1}'.format(model.__class__.__name__,np.round(rmse, 3)))
    print('{0} 로그 변환된 MSE: {1}'.format(model.__class__.__name__,np.round(mse, 3)))
    print('{0} 로그 변환된 MAE: {1}'.format(model.__class__.__name__,np.round(mae, 3)))
    print('{0} 로그 변환된 MAPE: {1}'.format(model.__class__.__name__,np.round(mape, 3)))
    print('{0} 로그 변환된 MPE: {1}'.format(model.__class__.__name__,np.round(mpe, 3)))
    print('r2-score : {0:.3f}'.format(r2))
    return rmse,mse,mae,mape,mpe,r2


def get_rmses(model):
#     rmses=[]
    rmse,mse,mae,mape,mpe,r2 = get_rmse(model)
#     rmses.append(rmse)
#   tpr_eval_test= tpr_eval_test.append({'sm_cat': cg_,'model':model,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
#                                      'MPE':mpe,'R2-score':r2}, ignore_index=True)
        
    return rmse,mse,mae,mape,mpe,r2



### 함수

In [None]:
from sklearn.model_selection import GridSearchCV

def print_best_params(model, params):
    grid_model = GridSearchCV(model, param_grid=params, 
                              scoring='neg_mean_squared_error', cv=5)
    grid_model.fit(X_features, y_target)
    rmse = np.sqrt(-1* grid_model.best_score_)
    print('{0} 5 CV 시 최적 평균 RMSE 값: {1}, 최적 parameter:{2}'.format(model.__class__.__name__,
                                        np.round(rmse, 4), grid_model.best_params_))
    return grid_model.best_estimator_, grid_model.best_params_

In [None]:
from sklearn.model_selection import train_test_split

# 인자로 입력받은 DataFrame을 복사 한 뒤 Time 컬럼만 삭제하고 복사된 DataFrame 반환
def get_preprocessed_df(df=None):
    df_copy = df.copy()
    df_copy.drop(['sm_cat','일시'], axis=1, inplace=True)
    return df_copy

In [None]:
# 사전 데이터 가공 후 학습과 테스트 데이터 세트를 반환하는 함수.
def get_train_test_dataset(df=None):
    df = get_preprocessed_df(df)
    
    features_index = df.dtypes[df.dtypes != 'object'].index
    skew_features = df[features_index].apply(lambda x : skew(x))
    skew_features_top = skew_features[skew_features > 1]
    df[skew_features_top.index] = np.log1p(df[skew_features_top.index])
    
    X_features = df.drop('qty',axis=1, inplace=False)
    y_target = df['qty']
    X_train, X_test, y_train, y_test = train_test_split(X_features, y_target, test_size=0.2, random_state=156)
    
    return X_features,y_target,X_train, X_test, y_train, y_test

## 모델링

In [None]:
tpr_eval = pd.DataFrame(columns=['sm_cat','model','alpha','MSE','RMSE','MAE','MAPE',
                                      'MPE','R2-score'])

tph_eval = pd.DataFrame(columns=['sm_cat','model','alpha','MSE','RMSE','MAE','MAPE',
                                      'MPE','R2-score'])

hm_eval = pd.DataFrame(columns=['sm_cat','model','alpha','MSE','RMSE','MAE','MAPE',
                                      'MPE','R2-score'])

air_eval = pd.DataFrame(columns=['sm_cat','model','alpha','MSE','RMSE','MAE','MAPE',
                                      'MPE','R2-score'])

tp_eval = pd.DataFrame(columns=['sm_cat','model','alpha','MSE','RMSE','MAE','MAPE',
                                      'MPE','R2-score'])


In [None]:
Ridge_alp_list=[]
alp = np.arange(1,100,0.1)
for i in alp:
    Ridge_alp_list.append(round(i,2))

Lasso_alp_list=[]
alp = np.arange(0,1,0.001)
for i in alp:
    Lasso_alp_list.append(round(i,3))

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from scipy.stats import skew

## 냉난방

In [None]:
for cg_ in ['벽걸이형 냉온풍기','스탠드형 냉온풍기']:
    buy_weather_tp_cg = buy_weather_tp[buy_weather_tp['sm_cat'] == cg_]
    
    X_features,y_target,X_train, X_test, y_train, y_test = get_train_test_dataset(buy_weather_tp_cg)
    
    lr_reg = LinearRegression()
    lr_reg.fit(X_train, y_train)

    ridge_reg = Ridge()

    param_grid_ridge = {'alpha': Ridge_alp_list}
    grid_search_ridge = GridSearchCV(ridge_reg, param_grid=param_grid_ridge) 
    grid_search_ridge.fit(X_train, y_train)
    best_alpha_ridge = grid_search_ridge.best_params_['alpha']
    ridge_reg_GS = Ridge(alpha= best_alpha_ridge)
    ridge_reg_GS.fit(X_train, y_train)


    lasso_reg = Lasso()

    param_grid_lasso = {'alpha': Lasso_alp_list}
    grid_search_lasso = GridSearchCV(lasso_reg, param_grid=param_grid_lasso) 
    grid_search_lasso.fit(X_train, y_train)
    best_alpha_lasso = grid_search_lasso.best_params_['alpha']
    lasso_reg_GS=Lasso(alpha=best_alpha_lasso)
    lasso_reg_GS.fit(X_train, y_train)


    lasso_reg.fit(X_train, y_train)

    models = [lr_reg, ridge_reg_GS, lasso_reg_GS]
    print(cg_)
    print('===========================================================================================================')    
    rmse,mse,mae,mape,mpe,r2=get_rmses(lr_reg)
    tp_eval= tp_eval.append({'sm_cat': cg_,'model':'LR','MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(ridge_reg_GS)
    tp_eval= tp_eval.append({'sm_cat': cg_,'model':'Ridge','alpha':best_alpha_ridge,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(lasso_reg_GS)
    tp_eval= tp_eval.append({'sm_cat': cg_,'model':'Lasso','alpha':best_alpha_lasso,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)
    print('Ridge Alpha = ',best_alpha_ridge,'\n')
    print('Lasso Alpha = ',best_alpha_lasso,'\n')

    print('===========================================================================================================')

In [None]:
tp_eval_1 = tp_eval.groupby(['sm_cat','model']).sum()
tp_eval_1

## 냉방

In [None]:
for cg_ in ['멀티형 에어컨', '벽걸이 에어컨', '벽걸이형 선풍기', '스탠드형 에어컨', '에어컨 리모컨', '탁상/USB 선풍기',
       '휴대용 선풍기', '냉풍기', '이동형 에어컨', '중대형 에어컨', '천장형 에어컨', '에어커튼', '업소용 선풍기']:
    buy_weather_tpr_cg = buy_weather_tpr[buy_weather_tpr['sm_cat'] == cg_]
    
    X_features,y_target,X_train, X_test, y_train, y_test = get_train_test_dataset(buy_weather_tpr_cg)
    
    lr_reg = LinearRegression()
    lr_reg.fit(X_train, y_train)

    ridge_reg = Ridge()

    param_grid_ridge = {'alpha': Ridge_alp_list}
    grid_search_ridge = GridSearchCV(ridge_reg, param_grid=param_grid_ridge) 
    grid_search_ridge.fit(X_train, y_train)
    best_alpha_ridge = grid_search_ridge.best_params_['alpha']
    ridge_reg_GS = Ridge(alpha= best_alpha_ridge)
    ridge_reg_GS.fit(X_train, y_train)


    lasso_reg = Lasso()

    param_grid_lasso = {'alpha': Lasso_alp_list}
    grid_search_lasso = GridSearchCV(lasso_reg, param_grid=param_grid_lasso) 
    grid_search_lasso.fit(X_train, y_train)
    best_alpha_lasso = grid_search_lasso.best_params_['alpha']
    lasso_reg_GS=Lasso(alpha=best_alpha_lasso)
    lasso_reg_GS.fit(X_train, y_train)


    lasso_reg.fit(X_train, y_train)

    models = [lr_reg, ridge_reg_GS, lasso_reg_GS]
    print(cg_)
    print('===========================================================================================================')    
    rmse,mse,mae,mape,mpe,r2=get_rmses(lr_reg)
    tpr_eval= tpr_eval.append({'sm_cat': cg_,'model':'LR','MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(ridge_reg_GS)
    tpr_eval= tpr_eval.append({'sm_cat': cg_,'model':'Ridge','alpha':best_alpha_ridge,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(lasso_reg_GS)
    tpr_eval= tpr_eval.append({'sm_cat': cg_,'model':'Lasso','alpha':best_alpha_lasso,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)
    print('Ridge Alpha = ',best_alpha_ridge,'\n')
    print('Lasso Alpha = ',best_alpha_lasso,'\n')

    print('===========================================================================================================')

In [None]:
tpr_eval_1 = tpr_eval.groupby(['sm_cat','model']).sum()
tpr_eval_1

## 난방

In [None]:
for cg_ in ['가스온수기', '난방용 열풍기', '돈풍기', '라디에이터', '보일러', '온수매트', '온열매트',
       '온풍기', '전기온수기', '전기장판', '카페트매트', '컨벡터', '황토매트', '히터']:
    buy_weather_tph_cg = buy_weather_tph[buy_weather_tph['sm_cat'] == cg_]
    
    X_features,y_target,X_train, X_test, y_train, y_test = get_train_test_dataset(buy_weather_tph_cg)
    
    lr_reg = LinearRegression()
    lr_reg.fit(X_train, y_train)

    ridge_reg = Ridge()

    param_grid_ridge = {'alpha': Ridge_alp_list}
    grid_search_ridge = GridSearchCV(ridge_reg, param_grid=param_grid_ridge) 
    grid_search_ridge.fit(X_train, y_train)
    best_alpha_ridge = grid_search_ridge.best_params_['alpha']
    ridge_reg_GS = Ridge(alpha= best_alpha_ridge)
    ridge_reg_GS.fit(X_train, y_train)


    lasso_reg = Lasso()

    param_grid_lasso = {'alpha': Lasso_alp_list}
    grid_search_lasso = GridSearchCV(lasso_reg, param_grid=param_grid_lasso) 
    grid_search_lasso.fit(X_train, y_train)
    best_alpha_lasso = grid_search_lasso.best_params_['alpha']
    lasso_reg_GS=Lasso(alpha=best_alpha_lasso)
    lasso_reg_GS.fit(X_train, y_train)


    lasso_reg.fit(X_train, y_train)

    models = [lr_reg, ridge_reg_GS, lasso_reg_GS]
    print('===========================================================================================================')    
    rmse,mse,mae,mape,mpe,r2=get_rmses(lr_reg)
    tph_eval= tph_eval.append({'sm_cat': cg_,'model':'LR','MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(ridge_reg_GS)
    tph_eval= tph_eval.append({'sm_cat': cg_,'model':'Ridge','alpha':best_alpha_ridge,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(lasso_reg_GS)
    tph_eval= tph_eval.append({'sm_cat': cg_,'model':'Lasso','alpha':best_alpha_lasso,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)
    print('Ridge Alpha = ',best_alpha_ridge,'\n')
    print('Lasso Alpha = ',best_alpha_lasso,'\n')

    print('===========================================================================================================')

In [None]:
tph_eval_1 = tph_eval.groupby(['sm_cat','model']).sum()
tph_eval_1

## 습도

In [None]:
for cg_ in ['제습기','초음파식 가습기','자연식 가습기','복합식 가습기','신발건조기','의류건조기','가열식 가습기','에어워셔']:
    buy_weather_hm_cg = buy_weather_hm[buy_weather_hm['sm_cat'] == cg_]
    
    X_features,y_target,X_train, X_test, y_train, y_test = get_train_test_dataset(buy_weather_hm_cg)
    lr_reg = LinearRegression()
    lr_reg.fit(X_train, y_train)

    ridge_reg = Ridge()

    param_grid_ridge = {'alpha': Ridge_alp_list}
    grid_search_ridge = GridSearchCV(ridge_reg, param_grid=param_grid_ridge) 
    grid_search_ridge.fit(X_train, y_train)
    best_alpha_ridge = grid_search_ridge.best_params_['alpha']
    ridge_reg_GS = Ridge(alpha= best_alpha_ridge)
    ridge_reg_GS.fit(X_train, y_train)


    lasso_reg = Lasso()

    param_grid_lasso = {'alpha': Lasso_alp_list}
    grid_search_lasso = GridSearchCV(lasso_reg, param_grid=param_grid_lasso) 
    grid_search_lasso.fit(X_train, y_train)
    best_alpha_lasso = grid_search_lasso.best_params_['alpha']
    lasso_reg_GS=Lasso(alpha=best_alpha_lasso)
    lasso_reg_GS.fit(X_train, y_train)


    lasso_reg.fit(X_train, y_train)

    models = [lr_reg, ridge_reg_GS, lasso_reg_GS]
    print('===========================================================================================================')    
    rmse,mse,mae,mape,mpe,r2=get_rmses(lr_reg)
    hm_eval= hm_eval.append({'sm_cat': cg_,'model':'LR','MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(ridge_reg_GS)
    hm_eval= hm_eval.append({'sm_cat': cg_,'model':'Ridge','alpha':best_alpha_ridge,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(lasso_reg_GS)
    hm_eval= hm_eval.append({'sm_cat': cg_,'model':'Lasso','alpha':best_alpha_lasso,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)
    print('Ridge Alpha = ',best_alpha_ridge,'\n')
    print('Lasso Alpha = ',best_alpha_lasso,'\n')

    print('===========================================================================================================')

In [None]:
hm_eval_1 = hm_eval.groupby(['sm_cat','model']).sum()
hm_eval_1

In [None]:
for cg_ in ['공기청정기','공기정화 용품','산림욕기']:
    buy_weather_ar_cg = buy_weather_ar[buy_weather_ar['sm_cat'] == cg_]
    
    X_features,y_target,X_train, X_test, y_train, y_test = get_train_test_dataset(buy_weather_ar_cg)
    lr_reg = LinearRegression()
    lr_reg.fit(X_train, y_train)

    ridge_reg = Ridge()

    param_grid_ridge = {'alpha': Ridge_alp_list}
    grid_search_ridge = GridSearchCV(ridge_reg, param_grid=param_grid_ridge) 
    grid_search_ridge.fit(X_train, y_train)
    best_alpha_ridge = grid_search_ridge.best_params_['alpha']
    ridge_reg_GS = Ridge(alpha= best_alpha_ridge)
    ridge_reg_GS.fit(X_train, y_train)


    lasso_reg = Lasso()

    param_grid_lasso = {'alpha': Lasso_alp_list}
    grid_search_lasso = GridSearchCV(lasso_reg, param_grid=param_grid_lasso) 
    grid_search_lasso.fit(X_train, y_train)
    best_alpha_lasso = grid_search_lasso.best_params_['alpha']
    lasso_reg_GS=Lasso(alpha=best_alpha_lasso)
    lasso_reg_GS.fit(X_train, y_train)


    lasso_reg.fit(X_train, y_train)

    models = [lr_reg, ridge_reg_GS, lasso_reg_GS]
    models = [lr_reg, ridge_reg_GS, lasso_reg_GS]
    print('===========================================================================================================')    
    rmse,mse,mae,mape,mpe,r2=get_rmses(lr_reg)
    air_eval= air_eval.append({'sm_cat': cg_,'model':'LR','MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(ridge_reg_GS)
    air_eval= air_eval.append({'sm_cat': cg_,'model':'Ridge','alpha':best_alpha_ridge,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)

    rmse,mse,mae,mape,mpe,r2=get_rmses(lasso_reg_GS)
    air_eval= air_eval.append({'sm_cat': cg_,'model':'Lasso','alpha':best_alpha_lasso,'MSE': mse, 'RMSE': rmse, 'MAE':mae, 'MAPE':mape,
                                     'MPE':mpe,'R2-score':r2}, ignore_index=True)
    print('Ridge Alpha = ',best_alpha_ridge,'\n')
    print('Lasso Alpha = ',best_alpha_lasso,'\n')

    print('===========================================================================================================')

In [None]:
air_eval_1 = air_eval.groupby(['sm_cat','model']).sum()
air_eval_1

In [None]:
tpr_eval_1.to_csv('../활용 데이터셋/tpr_eval.csv', encoding='cp949',index=True)
tph_eval_1.to_csv('../활용 데이터셋/tph_eval.csv', encoding='cp949',index=True)
hm_eval_1.to_csv('../활용 데이터셋/hm_eval.csv', encoding='cp949',index=True)
air_eval_1.to_csv('../활용 데이터셋/air_eval.csv', encoding='cp949',index=True)