# 1. 주요 파라미터 설정

In [83]:
nLookBackDays=5
nForecastDays=1
hoursPerDay = 24
nForecastHours = nForecastDays * hoursPerDay

In [84]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [85]:
# 추가 패키지
import numpy as np
import random
from math import sqrt
from sklearn.metrics import mean_squared_error
import pandas as pd
from tensorflow import keras

import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
plt.style.use('ggplot')

from MyUtils import split_dataset
from MyUtils import to_supervisedDaily # 1-24시간 단위로 분할
from MyUtils import to_supervisedContinuousHours # 연속된 모든 24시간 단위로 분할
from MyUtils import plotTrainingProgress
from MyModels import *
from ForcingMaxUtils import test_predict_mergedInput_mergedOutput_forcingMax

# 2. 데이터 준비하기

In [86]:
pdDataset = pd.read_excel('3A)HourlyPower(SeparateDate)(AnyDayStart)(NoWeekend).xlsx', 
                          sheet_name = 'data')

In [87]:
datalen = pdDataset.shape[0]
print(datalen)

4344


In [88]:
# 필요한 상수 정의하기
numDaysTotal = int(datalen/hoursPerDay)
print("Number of days in total :", numDaysTotal)

Number of days in total : 181


In [89]:
perDayTotalPowerList = [] # 하루 24개 데이터를 리스트 형태로 저장(총전력)
perDayTotalPower = [] # 하루 24시간 총 전력을 하나의 값으로 해서 저장
perDayPeakPowerList = [] # 하루 24개 데이터를 리스트 형태로 저장(peak전력)
perDayPeakPower = [] # 하루 중 최대 전력 하나를 골라서 저장
perDayDateInfo = [] # 년.월.일 정보를 저장

for nthDay in range(numDaysTotal):
    currHourTotalPowerList, currHourPeakPowerList = [], []
    
    currDate = str(pdDataset["Year"][nthDay*hoursPerDay]) + "." \
               + str(pdDataset["Month"][nthDay*hoursPerDay]) + "." \
               + str(pdDataset["Day"][nthDay*hoursPerDay])
    perDayDateInfo.append(currDate)
    
    for hour in range(hoursPerDay):
        index = nthDay*hoursPerDay + hour
        currHourTotalPowerList.append(pdDataset["TotalPower(kWh)"][index])
        currHourPeakPowerList.append(pdDataset["PeakPower(kW)"][index])
   
    perDayTotalPowerList.append(currHourTotalPowerList)
    perDayTotalPower.append(sum(currHourTotalPowerList))
    perDayPeakPowerList.append(currHourPeakPowerList)
    perDayPeakPower.append(max(currHourPeakPowerList))
    
perDayTotalPowerList = np.array(perDayTotalPowerList)
perDayTotalPower = np.array(perDayTotalPower)
perDayPeakPowerList = np.array(perDayPeakPowerList)
perDayPeakPower = np.array(perDayPeakPower)
perDayDateInfo = np.array(perDayDateInfo)

In [90]:
"""
학습에 사용할 샘플 수 정하기
터미널 출력 결과를 보고 비중(portion)을 적절히 골랐음
"""
portion = 0.945
howManyDaysToTrain = int(numDaysTotal * portion)

if True:
    print("Train for " + str(howManyDaysToTrain) 
          + " days out of " + str(numDaysTotal) + " days")
    print("Forecast for %d days out of %d days." % (numDaysTotal
                                                    - howManyDaysToTrain, numDaysTotal))

Train for 171 days out of 181 days
Forecast for 10 days out of 181 days.


In [91]:
rawHourlyPeakPower = pdDataset["PeakPower(kW)"].values
hourlyPeakPowerTrain, hourlyPeakPowerTest \
= split_dataset(rawHourlyPeakPower, howManyDaysToTrain, hoursPerDay)

In [92]:
XHourlyPeakPowerTrain, yHourlyPeakPowerTrain \
= to_supervisedContinuousHours(hourlyPeakPowerTrain, nLookBackDays, nForecastDays)

# 검증은 1-24시간 단위의 데이터로 할 것임 >> to_supervisedDaily 함수를 사용
XHourlyPeakPowerTest, yHourlyPeakPowerTest \
= to_supervisedDaily(hourlyPeakPowerTest, nLookBackDays, nForecastDays)

In [93]:
# 예측할 24시간에 대한 전력 peak를 미리 계산하고, 이를 조건부 입력으로 사용
XContinuousDailyPeakPowerTrain = []
#print(XContinuousDailyPeakPowerTrain.shape)

numSamples = yHourlyPeakPowerTrain.shape[0]
for i in range(numSamples):
    sample = yHourlyPeakPowerTrain[i]
    XContinuousDailyPeakPowerTrain.append(max(sample))  # 최대값을 리턴
    if i == -1:
        print(sample)
        print(max(sample))
        #break

XContinuousDailyPeakPowerTrain = np.array(XContinuousDailyPeakPowerTrain)
#print(XContinuousDailyPeakPowerTrain.shape)

In [94]:
# 예측할 24시간에 대한 전력 peak를 미리 계산하고, 이를 조건부 입력으로 사용
XContinuousDailyPeakPowerTest = []
#print(XContinuousDailyPeakPowerTest.shape)

numSamples = yHourlyPeakPowerTest.shape[0]
for i in range(numSamples):
    sample = yHourlyPeakPowerTest[i]
    XContinuousDailyPeakPowerTest.append(max(sample))  # 최대값을 리턴
    if i == -1:
        print(sample)
        print(max(sample))
        #break

XContinuousDailyPeakPowerTest = np.array(XContinuousDailyPeakPowerTest)
#print(XContinuousDailyPeakPowerTest.shape)

In [95]:
from MyModels import build_transformer_conditionalInput_maxOutput_model_gAvgPooling
from MyUtils import test_predict_mergedInput_mergedOutput_forcingMax_multiplemodels
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 3. 모델 불러오기

In [None]:
# 1-24시 단위로 학습한 모델
modelA1 = keras.models.load_model('modelAsave/model1.txt')
modelA2 = keras.models.load_model('modelAsave/model2.txt')
modelA3 = keras.models.load_model('modelAsave/model3.txt')

In [None]:
# 연속된 24시간 단위로 학습한 모델
modelB1 = keras.models.load_model('modelBsave/model1.h5')
modelB2 = keras.models.load_model('modelBsave/model2.h5')
modelB3 = keras.models.load_model('modelBsave/model3.h5')

# 4. PEAK 전력 예측하기 : 테스트 케이스에 대해서 예측하기

## 4.1. Model-A의 결과

In [None]:
# 예측하기 (평가는 1-24시 단위로 자른 데이터로...)
# 3개의 모델간에는 max combine
test_predict_mergedInput_mergedOutput_forcingMax_multiplemodels(modelA1, modelA2, modelA3, 
             [XHourlyPeakPowerTest,XContinuousDailyPeakPowerTest], 
             [yHourlyPeakPowerTest,XContinuousDailyPeakPowerTest], 
             "Peak power prediction : model A series")

## 4.2. Model-B의 결과

In [None]:
# 예측하기 (평가는 1-24시 단위로 자른 데이터로...)
# 3개의 모델간에는 max combine
test_predict_mergedInput_mergedOutput_forcingMax_multiplemodels(modelB1, modelB2, modelB3, 
             [XHourlyPeakPowerTest,XContinuousDailyPeakPowerTest], 
             [yHourlyPeakPowerTest,XContinuousDailyPeakPowerTest], 
             "Peak power prediction : model A series")

## 4.3. 통합 모델 결과

In [None]:
# 유틸리티 함수
def test_predict_mergedInput_mergedOutput_forcingMax_3models_noFig(model1, model2, model3, Xmerged, ymerged):
    # X 데이터 분리
    X = Xmerged[0]
    conditionalX = Xmerged[1]
    # y 데이터 분리
    y = ymerged[0]
    maxy = ymerged[1]

    ts_pred = [] # 시계열 예측값을 저장할 리스트
    ts_actual = y.reshape(y.shape[0] * y.shape[1]) # 시계열 정답을 저장할 리스트
    
    ts_max_pred = [] # 24시간 예측 단위로, 예측으로 생성한 시계열 데이터의 max 저장할 리스트
    ts_max_actual = [] # 24시간 예측 단위로, 정답에 해당하는 시계열 데이터의 max 저장할 리스트
    
    for i in range(X.shape[0]):
        x_sample1 = X[i].reshape(1, len(X[i]), 1)
        x_sample2 = conditionalX[i].reshape(1, len(conditionalX[i]), 1)
        
        [y_hat1, y_hat_max1] = model1.predict([x_sample1, x_sample2])
        [y_hat2, y_hat_max2] = model2.predict([x_sample1, x_sample2])
        [y_hat3, y_hat_max3] = model3.predict([x_sample1, x_sample2])
            
        y_hat_values1 = y_hat1[0].reshape(len(y_hat1[0]),).tolist()
        y_hat_values2 = y_hat2[0].reshape(len(y_hat2[0]),).tolist()
        y_hat_values3 = y_hat3[0].reshape(len(y_hat3[0]),).tolist()
        
        y_hat_values = []
        for j in range(len(y_hat_values1)):
            y_hat_values.append(max(y_hat_values1[j],y_hat_values2[j],y_hat_values3[j]))

        if len(ts_pred) == 0:
            ts_pred = y_hat_values
        else:
            ts_pred = ts_pred + y_hat_values
            
        ts_max_pred.append(max(y_hat_max1,y_hat_max2,y_hat_max3))
        ts_max_actual.append(maxy[i][0])

    return ts_pred, ts_max_pred, ts_actual, ts_max_actual

In [None]:
ts_pred_modelA, ts_max_pred_modelA, ts_actual_modelA, ts_max_actual_modelA = \
    test_predict_mergedInput_mergedOutput_forcingMax_3models_noFig(modelA1, modelA2, modelA3, 
             [XHourlyPeakPowerTest,XContinuousDailyPeakPowerTest], 
             [yHourlyPeakPowerTest,XContinuousDailyPeakPowerTest])

ts_pred_modelB, ts_max_pred_modelB, ts_actual_modelB, ts_max_actual_modelB = \
    test_predict_mergedInput_mergedOutput_forcingMax_3models_noFig(modelB1, modelB2, modelB3, 
             [XHourlyPeakPowerTest,XContinuousDailyPeakPowerTest], 
             [yHourlyPeakPowerTest,XContinuousDailyPeakPowerTest])

In [None]:
for i in range(len(ts_max_actual_modelA)):
    assert ts_max_actual_modelA[i] == ts_max_actual_modelB[i]

for i in range(len(ts_actual_modelA))    :
    assert ts_actual_modelA[i] == ts_actual_modelB[i]
    
assert len(ts_pred_modelA) == len(ts_pred_modelB)
assert len(ts_max_pred_modelA) == len(ts_max_pred_modelA)

### 4.3.1. Max-Combine 결과

In [None]:
import statistics as st

ts_actual = ts_actual_modelA # 실제 값
ts_pred_final = [] # 예측 값

for i in range(len(ts_pred_modelA)):
       ts_pred_final.append(max(ts_pred_modelA[i], ts_pred_modelB[i]))
    
print("TimeSeries MAE : %d"%(int(mean_absolute_error(ts_actual, ts_pred_final))))
plt.figure()
plt.plot(ts_actual, label='actual')
plt.plot(ts_pred_final, label='forecast')
plt.title('[max combine] final prediction ; time-series peak') 
plt.legend()
plt.xlabel('hours')
plt.show()

In [None]:
import statistics as st
assert ts_max_actual_modelA == ts_max_actual_modelB
ts_max_actual = ts_max_actual_modelA

ts_max_pred_final = []
for i in range(len(ts_max_pred_modelA)):
    ts_max_pred_final.append(max(ts_max_pred_modelA[i], ts_max_pred_modelB[i]))

# 24시간 예측 단위로, 시계열 데이터의 총합을 plot
print("TimeSeriesMax MAE : %d"%(int(mean_absolute_error(np.array(ts_max_actual), np.array(ts_max_pred_final)))))
plt.figure()
plt.bar(np.arange(len(ts_max_actual))-0.1, ts_max_actual, width=0.3, label='actual')
plt.bar(np.arange(len(ts_max_pred_final))+0.1, ts_max_pred_final, width=0.3, label='forecast')
plt.legend()
plt.title("[max combine] daily power max")
plt.xlabel('days')
plt.show()

### 4.3.2. Mean-Combine 결과

In [None]:
from sklearn.metrics import mean_absolute_error

import statistics as st
ts_pred_final = []
for i in range(len(ts_pred_modelA)):
    ts_pred_final.append(st.mean([ts_pred_modelA[i], ts_pred_modelB[i]]))
    
ts_actual = ts_actual_modelA
#print("TimeSeries MAE : %d"%(int(mae(ts_actual, ts_pred_final))))
print("TimeSeries MAE : %d"%(int(mean_absolute_error(ts_actual, ts_pred_final))))
plt.figure()
plt.plot(ts_actual, label='actual')
plt.plot(ts_pred_final, label='forecast')
plt.title('[mean combine] final prediction ; time-series peak') 
plt.legend()
plt.xlabel('hours')
plt.show()

In [None]:
import statistics as st
ts_max_pred_final = []
for i in range(len(ts_max_pred_modelA)):
    #ts_max_pred_final.append(max(ts_max_pred_modelA[i], ts_max_pred_modelB[i]))
    ts_max_pred_final.append(st.mean([ts_max_pred_modelA[i], ts_max_pred_modelB[i]]))
    
ts_max_actual = ts_max_actual_modelA
# 24시간 예측 단위로, 시계열 데이터의 총합을 plot
print("TimeSeriesMax MAE : %d"%(int(mean_absolute_error(np.array(ts_max_actual), np.array(ts_max_pred_final)))))
plt.figure()
plt.bar(np.arange(len(ts_max_actual))-0.1, ts_max_actual, width=0.3, label='actual')
plt.bar(np.arange(len(ts_max_pred_final))+0.1, ts_max_pred_final, width=0.3, label='forecast')
plt.legend()
plt.title("[mean combine] daily power max")
plt.xlabel('days')
plt.show()