In [27]:
import pandas as pd
import numpy as np
from scipy.stats import skew
from scipy.stats import spearmanr

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet, LassoLars
from sklearn.svm import SVR

from xgboost import XGBRegressor
import lightgbm as lgb
from lightgbm import LGBMRegressor
import catboost
from catboost import CatBoostRegressor

from supervised.automl import AutoML

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt # seaborn figure 크기 조절을 위해서
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
# 운영체제별 한글 폰트 설정
plt.rc('font', family='Malgun Gothic')
plt.rc('axes', unicode_minus=False) # 마이너스 폰트 설정
# 글씨 선명하게 출력하는 설정
%config InlineBackend.figure_format = 'retina'

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

SEED = 2
np.random.seed(SEED)

# 건물유형별 에너지소비 예측성능 향상을 위한 변수중요도 및 기계학습모델 평가 p.6 CVRMSE
def cvrmse_score(y_true, y_pred):
    return ((np.sqrt(mean_squared_error(y_true, y_pred))) / y_true.mean() ) * 100 

# result

In [28]:
PURE = pd.read_csv('data/result/PURE.csv', index_col=0)

## DV_N

In [29]:
DV_N_OP_N = pd.read_csv('data/result/DV_N_op_N.csv', index_col=0)
DV_N_OP_0 = pd.read_csv('data/result/DV_N_op_1.csv', index_col=0)
DV_N_OP_1 = pd.read_csv('data/result/DV_N_op_0.csv', index_col=0)

In [30]:
DV_N_OP_N

Unnamed: 0_level_0,실제,부분모델_예측,통모델_예측
useDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-08 00:00:00,893.1,995.874960,1011.829417
2023-08-08 01:00:00,824.4,940.016576,943.900585
2023-08-08 02:00:00,847.3,902.994313,909.170565
2023-08-08 03:00:00,824.4,876.300951,792.691205
2023-08-08 04:00:00,778.6,935.360427,925.912996
...,...,...,...
2023-10-19 19:00:00,2679.3,2431.321275,2393.055688
2023-10-19 20:00:00,1648.8,1905.486707,1841.723433
2023-10-19 21:00:00,1099.2,1635.115023,1641.409662
2023-10-19 22:00:00,801.5,1272.560071,1256.461661


In [31]:
DV_N_OP_Y = pd.concat([DV_N_OP_0, DV_N_OP_1])
DV_N_OP_Y.sort_values(by='useDate', inplace=True)
DV_N_OP_Y

Unnamed: 0_level_0,실제,부분모델_예측,통모델_예측
useDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-08 00:00:00,893.1,1002.952102,1020.379657
2023-08-08 01:00:00,824.4,944.902798,931.219165
2023-08-08 02:00:00,847.3,922.149284,896.514392
2023-08-08 03:00:00,824.4,867.992220,696.685634
2023-08-08 04:00:00,778.6,933.215497,856.877466
...,...,...,...
2023-10-19 19:00:00,2679.3,2484.688755,2426.483203
2023-10-19 20:00:00,1648.8,2023.320362,1835.215088
2023-10-19 21:00:00,1099.2,1663.750000,1681.610669
2023-10-19 22:00:00,801.5,1280.869529,1230.009363


In [32]:
print(f"통모델_DV_N_OP_N fCV(RMSE) SCORE: {cvrmse_score(DV_N_OP_N.실제, DV_N_OP_N.통모델_예측)}")
print(f"부분모델_DV_N_OP_N CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_N.실제, DV_N_OP_N.부분모델_예측)}")
print(f"통모델_DV_N_OP_Y CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_Y.실제, DV_N_OP_Y.통모델_예측)}")
print(f"부분모델_DV_N_OP_Y CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_Y.실제, DV_N_OP_Y.부분모델_예측)}")

통모델_DV_N_OP_N fCV(RMSE) SCORE: 26.745063922742723
부분모델_DV_N_OP_N CV(RMSE) SCORE: 25.598869180428803
통모델_DV_N_OP_Y CV(RMSE) SCORE: 25.96832896714864
부분모델_DV_N_OP_Y CV(RMSE) SCORE: 26.281358623251133


## DV_Y

In [33]:
DV_Y_OP_N = pd.read_csv('data/result/DV_Y_op_N.csv', index_col=0)
DV_Y_OP_0 = pd.read_csv('data/result/DV_Y_op_1.csv', index_col=0)
DV_Y_OP_1 = pd.read_csv('data/result/DV_Y_op_0.csv', index_col=0)

In [34]:
DV_Y_OP_N

Unnamed: 0_level_0,실제,부분모델_예측,통모델_예측
useDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-08 00:00:00,893.1,1008.623871,1005.587352
2023-08-08 01:00:00,824.4,933.800620,934.746989
2023-08-08 02:00:00,847.3,884.734038,917.740448
2023-08-08 03:00:00,824.4,907.076711,895.783942
2023-08-08 04:00:00,778.6,900.510654,934.887759
...,...,...,...
2023-10-19 19:00:00,2679.3,2437.817424,2524.056761
2023-10-19 20:00:00,1648.8,1898.513074,1838.321795
2023-10-19 21:00:00,1099.2,1701.386729,1645.355012
2023-10-19 22:00:00,801.5,1286.536698,1283.204560


In [35]:
DV_Y_OP_Y = pd.concat([DV_Y_OP_0, DV_Y_OP_1])
DV_Y_OP_Y.sort_values(by='useDate', inplace=True)
DV_Y_OP_Y

Unnamed: 0_level_0,실제,부분모델_예측,통모델_예측
useDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-08-08 00:00:00,893.1,993.245026,987.778248
2023-08-08 01:00:00,824.4,939.777976,930.055871
2023-08-08 02:00:00,847.3,916.787943,900.235637
2023-08-08 03:00:00,824.4,918.699771,923.562882
2023-08-08 04:00:00,778.6,927.500400,914.713713
...,...,...,...
2023-10-19 19:00:00,2679.3,2502.346635,2476.416760
2023-10-19 20:00:00,1648.8,1950.425714,1953.588289
2023-10-19 21:00:00,1099.2,1682.192230,1689.117146
2023-10-19 22:00:00,801.5,1277.320552,1228.647582


In [39]:
print(f"통모델_PURE CV(RMSE) SCORE: {cvrmse_score(PURE.실제, PURE.통모델_예측)}")
print(f"부분모델_PURE CV(RMSE) SCORE: {cvrmse_score(PURE.실제, PURE.부분모델_예측)}")

통모델_PURE CV(RMSE) SCORE: 27.210724041744665
부분모델_PURE CV(RMSE) SCORE: 25.600041441534156


In [37]:
print(f"통모델_DV_N_OP_N CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_N.실제, DV_N_OP_N.통모델_예측)}")
print(f"부분모델_DV_N_OP_N CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_N.실제, DV_N_OP_N.부분모델_예측)}")
print(f"통모델_DV_N_OP_Y CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_Y.실제, DV_N_OP_Y.통모델_예측)}")
print(f"부분모델_DV_N_OP_Y CV(RMSE) SCORE: {cvrmse_score(DV_N_OP_Y.실제, DV_N_OP_Y.부분모델_예측)}")

통모델_DV_N_OP_N CV(RMSE) SCORE: 26.745063922742723
부분모델_DV_N_OP_N CV(RMSE) SCORE: 25.598869180428803
통모델_DV_N_OP_Y CV(RMSE) SCORE: 25.96832896714864
부분모델_DV_N_OP_Y CV(RMSE) SCORE: 26.281358623251133


In [38]:
print(f"통모델_DV_Y_OP_N CV(RMSE) SCORE: {cvrmse_score(DV_Y_OP_N.실제, DV_Y_OP_N.통모델_예측)}")
print(f"부분모델_DV_Y_OP_N CV(RMSE) SCORE: {cvrmse_score(DV_Y_OP_N.실제, DV_Y_OP_N.부분모델_예측)}")
print(f"통모델_DV_Y_OP_Y CV(RMSE) SCORE: {cvrmse_score(DV_Y_OP_Y.실제, DV_Y_OP_Y.통모델_예측)}")
print(f"부분모델_DV_Y_OP_Y CV(RMSE) SCORE: {cvrmse_score(DV_Y_OP_Y.실제, DV_Y_OP_Y.부분모델_예측)}")

통모델_DV_Y_OP_N CV(RMSE) SCORE: 24.426776810624204
부분모델_DV_Y_OP_N CV(RMSE) SCORE: 24.031282431446797
통모델_DV_Y_OP_Y CV(RMSE) SCORE: 24.340871994404132
부분모델_DV_Y_OP_Y CV(RMSE) SCORE: 24.968757940916458
