In [None]:
# Import packages

import pandas as pd
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import Conv2D, BatchNormalization, Activation
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import datetime
from tqdm.notebook import tqdm

In [None]:
# Define function

def look_back(X, a):
    X_lb = np.zeros((len(X)- 29*a , a, 12))
    for i in range(len(X) - 29 * a):
        for j in range(a):
            X_lb[i, j] = X[i+(j*29)]
    X_lb = X_lb.reshape(int(len(X)/29) - a, 29, a, 12)
    Y_lb = X[a*29:, 7]
    Y_lb = Y_lb.reshape(int(len(X)/29) - a, 29, 1)
    return X_lb, Y_lb

def division(data):
    train_size = int(len(data)*0.6)
    val_size = int(len(data)*0.8)
    data_train = data[0:train_size]
    data_val = data[train_size:val_size]
    data_test = data[val_size:len(data)]
    return data_train, data_val, data_test

In [None]:
rmse_lst = []
r2_lst = []

for ts in tqdm(np.arange(3, 11, 1)):
    stst = time.time()
    df = pd.read_csv('./data.csv', encoding='ms949')
    
    train = df[:641*29]                    # 2015.04.01 ~ 2016.12.31 : 641 days -> 18589
    validation = df[len(train):1006*29]    # 2017.01.01 ~ 2017.12.31 : 365 days -> 10585
    test = df[len(train)+len(validation):] # 2018.01.01 ~ 2018.12.31 : 365 days -> 10585
    
    X_train, Y_train = look_back(train.values, ts)
    Y_train = Y_train.reshape(len(Y_train), 29, 1, 1)

    X_val, Y_val = look_back(validation.values, ts)
    Y_val = Y_val.reshape(len(Y_val), 29, 1, 1)

    X_test, Y_test = look_back(test.values, ts)
    Y_test = Y_test.reshape(len(Y_test), 29, 1, 1)
    
    # CNN model
    early_stopping = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 30, mode = 'min')
    mc = ModelCheckpoint('./CNN_{}.h5'.format(ts), monitor='val_loss', mode='auto', save_best_only=True)
    
    model = Sequential()
    model.add(Conv2D(64, (1, 2), padding='same', input_shape=(29,ts,12)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(32, (1, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(16, (1, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(8, (1, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(1, (1, ts), padding='valid'))

    model.compile(loss='mean_squared_error', optimizer='adam')
    
    print("### Model training : time step_{} ###".format(ts))
    print('Start training :', datetime.datetime.now())
    model.fit(X_train, Y_train, 
              epochs=500,
              batch_size=8,
              validation_data=(X_val, Y_val), 
              callbacks=[early_stopping, mc], 
              verbose=0)
    print('End training :', datetime.datetime.now())
    
    y_predicted = model.predict(X_test)
    y_predicted = y_predicted.reshape(-1, 1).astype('float32')
    y_observed = Y_test.reshape(-1, 1).astype('float32')
    
    raw= {'Observed': list(y_observed), 'Predicted': list(y_predicted)}
    rr = pd.DataFrame(raw)
    reg = sm.OLS.from_formula("Observed ~ Predicted",rr).fit()


    try:
        rmse = round(sqrt(mean_squared_error(y_observed, y_predicted)), 3)
        r2 = round(reg.rsquared, 3)
    except ValueError:
        pass
    
    rmse_lst.append(rmse)
    r2_lst.append(r2)
    
    print('RMSE :', rmse)
    print('R-squared :', r2)
    print("---------------------------------\n")
    
    del model