In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd

from tensorflow.data import Dataset
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError

In [2]:
CONFIGS = {
    'data_path': '../data/',
    'model_path': '../model/',
    'model_name': 'using_dataset',
    
    'test_lenght': 24,
    'valid_start_index': 1992,
    'test_start_index': 2016,
    
    'batch_size': 64,
    'learning_rate': 1e-4,
    'epochs': 100,
    'es_patience': 10,
    
    'window_size': 7*24,
    'target_length': 3,
}

In [3]:
data_path = '../data/'

train_origin = pd.read_csv(data_path+'train.csv', encoding='cp949')

In [4]:
data = deepcopy(train_origin)

data.columns = [
    'num', 'date_time', 'target', 'temp', 'wind',
    'humid', 'rain', 'sun', 'non_elec_eq', 'sunlight_eq'
]

data = data.loc[data['num'] == 1, ['date_time', 'target']]

print(f'data.shape: {data.shape}')

data.shape: (2040, 2)


In [5]:
train = data.loc[:CONFIGS['valid_start_index'], 'target']
valid = data.loc[CONFIGS['valid_start_index']-CONFIGS['window_size']:CONFIGS['test_start_index'], 'target']
test = data.loc[CONFIGS['test_start_index']-CONFIGS['window_size']:, 'target']

In [6]:
def mk_dataset(data, shuffle=False):
    
    X = data[:-CONFIGS['target_length']]
    y = data[CONFIGS['window_size']:]
    
    X_ds = Dataset.from_tensor_slices(X)
    X_ds = X_ds.window(CONFIGS['window_size'], shift=1, drop_remainder=True)
    X_ds = X_ds.flat_map(lambda x: x).batch(CONFIGS['window_size'])
    
    y_ds = Dataset.from_tensor_slices(y)
    y_ds = y_ds.window(CONFIGS['target_length'], shift=1, drop_remainder=True)
    y_ds = y_ds.flat_map(lambda x: x).batch(CONFIGS['target_length'])
    
    ds = Dataset.zip((X_ds, y_ds))
    if shuffle:
        ds = ds.shuffle(512)
    ds = ds.batch(CONFIGS['batch_size']).cache().prefetch(2)
    
    return ds

In [7]:
train_ds = mk_dataset(train, shuffle=True)
valid_ds = mk_dataset(valid)
test_ds = mk_dataset(test)

In [8]:
def set_model(CONFIGS, model_name = None, print_summary=False):
    inputs = Input(batch_shape=(None, CONFIGS['window_size']), name='inputs')
    dense_0 = Dense(64, activation='relu', name='dense_0')(inputs)
    dense_1 = Dense(32, activation='relu', name='dense_1')(dense_0)
    outputs = Dense(CONFIGS['target_length'], name='outputs')(dense_1)
    
    if not model_name:
        model_name = CONFIGS['model_name']
    
    model = Model(
        inputs, outputs,
        name = model_name
    )
    
    optimizer = Adam(learning_rate=CONFIGS['learning_rate'])
    model.compile(
        loss = MeanSquaredError(),
        optimizer = optimizer,
    )
    
    if print_summary:
        model.summary()
    
    return model

In [9]:
model = set_model(CONFIGS, print_summary=True)

Model: "using_dataset"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 168)]             0         
_________________________________________________________________
dense_0 (Dense)              (None, 64)                10816     
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
outputs (Dense)              (None, 3)                 99        
Total params: 12,995
Trainable params: 12,995
Non-trainable params: 0
_________________________________________________________________


In [10]:
def train_model(model, train_ds, valid_ds, CONFIGS):
    
    early_stop = EarlyStopping(
        patience=CONFIGS['es_patience']
    )
    save_best_only = ModelCheckpoint(
        filepath = f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5',
        monitor = 'val_loss',
        save_best_only = True,
        save_weights_only = True
    )
    
    history = model.fit(
        train_ds,
        batch_size = CONFIGS['batch_size'],
        epochs = CONFIGS['epochs'],
        validation_data = valid_ds,
        callbacks = [
            early_stop,
            save_best_only,
        ]
    )
    
    return history

In [11]:
history = train_model(model, train_ds, valid_ds, CONFIGS)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


In [12]:
best_model = set_model(CONFIGS, model_name='best_'+CONFIGS['model_name'])
best_model.load_weights(f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5')

In [13]:
y_train_pred = best_model.predict(train_ds)
y_valid_pred = best_model.predict(valid_ds)
y_test_pred = best_model.predict(test_ds)

In [14]:
train_loss = best_model.evaluate(train_ds, verbose=0)
valid_loss = best_model.evaluate(valid_ds, verbose=0)
test_loss = best_model.evaluate(test_ds, verbose=0)

print(f'train_loss: {train_loss}')
print(f'valid_loss: {valid_loss}')
print(f'test_loss: {test_loss}')

train_loss: 7508.27001953125
valid_loss: 13330.0322265625
test_loss: 13318.1591796875
