In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd

from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError

In [2]:
CONFIGS = {
    'data_path': '../data/',
    'model_path': '../model/',
    'model_name': 'super_basic',

    'valid_start_index': 1704,
    'test_start_index': 1872,
    
    'batch_size': 64,
    'learning_rate': 1e-4,
    'epochs': 100,
    'es_patience': 10,
    
    'window_size': 7*24,
    'target_length': 3,
}

In [3]:
data_path = '../data/'

train_origin = pd.read_csv(data_path+'train.csv', encoding='cp949')

In [4]:
data = deepcopy(train_origin)

data.columns = [
    'num', 'date_time', 'target', 'temp', 'wind',
    'humid', 'rain', 'sun', 'non_elec_eq', 'sunlight_eq'
]

data = data.loc[data['num'] == 1, ['date_time', 'target']]

print(f'data.shape: {data.shape}')

data.shape: (2040, 2)


In [5]:
input_cols = [f't-{i}' for i in range(CONFIGS['window_size'], 0, -1)]
target_cols = [f't+{i}' for i in range(CONFIGS['target_length'])]

CONFIGS['input_cols'] = input_cols
CONFIGS['target_cols'] = target_cols

In [6]:
def mk_time_series(data):
    
    new_data_length = data.shape[0]-CONFIGS['window_size']-CONFIGS['target_length']+1
    new_data_shape = (new_data_length, CONFIGS['window_size']+CONFIGS['target_length'])
    new_data = np.zeros(new_data_shape)

    for i in range(new_data_length):
        new_data[i, :CONFIGS['window_size']] = data['target'][i:i+CONFIGS['window_size']]
        new_data[i, CONFIGS['window_size']:] = \
            data['target'][i+CONFIGS['window_size']:i+CONFIGS['window_size']+CONFIGS['target_length']]

    new_data = pd.DataFrame(new_data)
    new_data.columns = input_cols + target_cols
    
    return new_data

In [7]:
def split_data(data, CONFIGS):
    
    train = data[:CONFIGS['valid_start_index']]
    valid = data[
        CONFIGS['valid_start_index']-CONFIGS['window_size']:CONFIGS['test_start_index']
    ]
    test = data[
        CONFIGS['test_start_index']-CONFIGS['window_size']:
    ]
    
    train, valid, test = \
        mk_time_series(train), mk_time_series(valid), mk_time_series(test)
    
    return train, valid, test

In [8]:
train, valid, test = split_data(data, CONFIGS)

In [9]:
def set_model(CONFIGS, model_name = None, print_summary=False):
    inputs = Input(batch_shape=(None, CONFIGS['window_size']), name='inputs')
    dense_0 = Dense(64, activation='relu', name='dense_0')(inputs)
    dense_1 = Dense(32, activation='relu', name='dense_1')(dense_0)
    outputs = Dense(CONFIGS['target_length'], name='outputs')(dense_1)
    
    if not model_name:
        model_name = CONFIGS['model_name']
    
    model = Model(
        inputs, outputs,
        name = model_name
    )
    
    optimizer = Adam(learning_rate=CONFIGS['learning_rate'])
    model.compile(
        loss = MeanSquaredError(),
        optimizer = optimizer,
    )
    
    if print_summary:
        model.summary()
    
    return model

In [10]:
model = set_model(CONFIGS, print_summary=True)

Model: "super_basic"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 168)]             0         
_________________________________________________________________
dense_0 (Dense)              (None, 64)                10816     
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
outputs (Dense)              (None, 3)                 99        
Total params: 12,995
Trainable params: 12,995
Non-trainable params: 0
_________________________________________________________________


In [11]:
def train_model(model, train, valid, CONFIGS):
    
    X_train, y_train = train[CONFIGS['input_cols']], train[CONFIGS['target_cols']]
    X_valid, y_valid = valid[CONFIGS['input_cols']], valid[CONFIGS['target_cols']]
    
    early_stop = EarlyStopping(
        patience=CONFIGS['es_patience']
    )
    save_best_only = ModelCheckpoint(
        filepath = f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5',
        monitor = 'val_loss',
        save_best_only = True,
        save_weights_only = True
    )
    
    history = model.fit(
        X_train, y_train,
        batch_size = CONFIGS['batch_size'],
        epochs = CONFIGS['epochs'],
        validation_data = (X_valid, y_valid),
        callbacks = [
            early_stop,
            save_best_only,
        ]
    )
    
    return history

In [12]:
history = train_model(model, train, valid, CONFIGS)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


In [13]:
best_model = set_model(CONFIGS, model_name='best_'+CONFIGS['model_name'])
best_model.load_weights(f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5')

In [14]:
X_train, y_train = train[CONFIGS['input_cols']], train[CONFIGS['target_cols']]
X_valid, y_valid = valid[CONFIGS['input_cols']], valid[CONFIGS['target_cols']]
X_test, y_test = test[CONFIGS['input_cols']], test[CONFIGS['target_cols']]

y_train_pred = best_model.predict(X_train)
y_valid_pred = best_model.predict(X_valid)
y_test_pred = best_model.predict(X_test)

In [15]:
train_loss = best_model.evaluate(X_train, y_train, verbose=0)
valid_loss = best_model.evaluate(X_valid, y_valid, verbose=0)
test_loss = best_model.evaluate(X_test, y_test, verbose=0)

print(f'train_loss: {train_loss}')
print(f'valid_loss: {valid_loss}')
print(f'test_loss: {test_loss}')

train_loss: 8580.9912109375
valid_loss: 9559.943359375
test_loss: 11584.376953125
