In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd

import datetime

import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Loss
from tensorflow.keras.metrics import Metric
from tensorflow.keras.callbacks import TensorBoard, Callback

In [2]:
CONFIGS = {
    'data_path': '../data/',
    'model_path': '../model/',
    'model_name': 'recursive_prediction',
    'model_type': 'cnn1d',
    
    'dtype': tf.float32,
    
    'valid_start_date_time': '2020-08-11 00',
    'test_start_date_time': '2020-08-18 00',
    
    'buffer_size': 512,
    'batch_size': 64,
    'learning_rate': 1e-4,
    'epochs': 100,
    'es_patience': 10,
    
    'window_size': 7*24,
    'shift': 1,
    'target_length': 3,
}

CONFIGS['tensorboard_log_path'] = f'../logs/tensorboard/{CONFIGS["model_name"]}'

In [3]:
train_origin = pd.read_csv(CONFIGS['data_path']+'train.csv', encoding='cp949')

In [4]:
data = deepcopy(train_origin)

data.columns = [
    'num', 'date_time', 'target', 'temp', 'wind',
    'humid', 'rain', 'sun', 'non_elec_eq', 'sunlight_eq'
]

data['num'] -= 1

print(f'data.shape: {data.shape}')

CONFIGS['last_date_time'] = data['date_time'].max()
CONFIGS['n_buildings'] = len(data['num'].unique())

data.shape: (122400, 10)


In [5]:
def mk_time_data(data):
    
    new_data = data.copy()

    new_data['date_time'] = data['date_time'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H'))
    
    new_data['time_stamp'] = new_data['date_time'].apply(lambda x: x.timestamp())
    
    new_data['year'] = new_data['date_time'].apply(lambda x: x.year)
    new_data['month'] = new_data['date_time'].apply(lambda x: x.month)
    new_data['day'] = new_data['date_time'].apply(lambda x: x.day)
    
    new_data['hour'] = new_data['date_time'].apply(lambda x: x.hour)
    new_data['cos_hour'] = np.cos(2*np.pi*(new_data['hour']/24))
    new_data['sin_hour'] = np.sin(2*np.pi*(new_data['hour']/24))

    new_data['weekday'] = new_data['date_time'].apply(lambda x: x.weekday())
    new_data['cos_weekday'] = np.cos(2*np.pi*(new_data['weekday']/7))
    new_data['sin_weekday'] = np.sin(2*np.pi*(new_data['weekday']/7))
    
    new_data['is_holiday'] = 0
    new_data.loc[(new_data['weekday'] == 5) | (new_data['weekday'] == 6), 'is_holiday'] = 1
    new_data.loc[(new_data['month'] == 8) & (new_data['day'] == 17), 'is_holiday'] = 1
    
    return new_data

In [6]:
new_data = mk_time_data(data)

In [7]:
def mk_building_info(data, data_for_calc, CONFIGS):
        
    new_data = data.copy()
    new_data['range'] = 0
    new_data['mean'] = 0
    new_data['std'] = 0
    new_data['holiday_gap'] = 0
    new_data['day_gap'] = 0

    for num in range(CONFIGS['n_buildings']):
        building = data_for_calc.query(f'num == {num}')
        
        bt_range = building['target'].max()-building['target'].min()
        bt_mean = building['target'].mean()
        bt_std = building['target'].std()
        bt_holiday_gap = abs(building.query('is_holiday == 0')['target'].mean() - building.query('is_holiday == 1')['target'].mean())
        bt_day_gap = 0
        for d in range(building.shape[0]//24):
            tmp = building['target'][d*24:(d+1)*24]
            bt_day_gap += (tmp.max()-tmp.min())/(building.shape[0]//24)
            
        new_data.loc[new_data['num']==num, 'range'] = bt_range
        new_data.loc[new_data['num']==num, 'mean'] = bt_mean
        new_data.loc[new_data['num']==num, 'std'] = bt_std
        new_data.loc[new_data['num']==num, 'holiday_gap'] = bt_holiday_gap
        new_data.loc[new_data['num']==num, 'day_gap'] = bt_day_gap
        
    new_data['mean_to_inverse'] = new_data['mean']
    new_data['std_to_inverse'] = new_data['std']
        
    return new_data

In [8]:
new_data = mk_building_info(
    new_data,
    new_data[new_data['date_time']<CONFIGS['valid_start_date_time']],
    CONFIGS
)

In [9]:
def mk_mean_std_dict(data, scaling_by_building_cols):
    mean_std_dict = {}
    for num in range(60):
        building = data.query(f'num == {num}')
        mean_std_dict[num] = {
            col: {
                'mean': building[col].mean(),
                'std': building[col].std()
            } for col in scaling_by_building_cols
        }
    return mean_std_dict

In [10]:
scaling_by_building_cols = [
    'temp', 'wind', 'humid', 'rain', 'sun', 'time_stamp', 'target',
]
scaling_by_all_cols = ['range', 'mean', 'std', 'holiday_gap', 'day_gap']

mean_std_dict = mk_mean_std_dict(
    new_data[new_data['date_time'] < CONFIGS['valid_start_date_time']],
    scaling_by_building_cols
)
CONFIGS['mean_std_dict'] = mean_std_dict

In [11]:
def standard_scaling(data, scaling_by_building_cols, scaling_by_all_cols, mean_std_dict=None):
    if not mean_std_dict:
        mean_std_dict = mk_mean_std_dict(data, scaling_by_building_cols)
        
    new_data = data.copy()
    for num in range(60):
        for col in scaling_by_building_cols:
            new_data.loc[new_data['num']==num, col] -= mean_std_dict[num][col]['mean']
            new_data.loc[new_data['num']==num, col] /= mean_std_dict[num][col]['std']
    
    for col in scaling_by_all_cols:
        m = new_data.loc[:, col].mean()
        s = new_data.loc[:, col].std()
        new_data.loc[:, col] -= m
        new_data.loc[:, col] /= s
    
    return new_data

In [12]:
new_data = standard_scaling(new_data, scaling_by_building_cols, scaling_by_all_cols, mean_std_dict)

In [13]:
building_num_cols = ['num']
building_info_cols = [
    'range', 'mean', 'std', 'holiday_gap', 'day_gap',
    'non_elec_eq', 'sunlight_eq',
]
target_time_info_cols = [
    'temp', 'wind', 'humid', 'rain', 'sun', 'time_stamp',
    'cos_hour', 'sin_hour', 'cos_weekday', 'sin_weekday',
    'is_holiday',
]
time_series_cols = [
    'temp', 'wind', 'humid', 'rain', 'sun', 'time_stamp',
    'cos_hour', 'sin_hour', 'cos_weekday', 'sin_weekday',
    'is_holiday', 'target',
]
target_cols = ['target']
to_inverse_cols = ['mean_to_inverse', 'std_to_inverse']
input_cols = list(set(
    building_num_cols + building_info_cols + target_time_info_cols +
    time_series_cols + target_cols + to_inverse_cols
))

CONFIGS['building_num_cols'] = building_num_cols
CONFIGS['building_info_cols'] = building_info_cols
CONFIGS['target_time_info_cols'] = target_time_info_cols
CONFIGS['time_series_cols'] = time_series_cols
CONFIGS['target_cols'] = target_cols
CONFIGS['to_inverse_cols'] = to_inverse_cols
CONFIGS['input_cols'] = input_cols

In [14]:
def mk_time_series(data, CONFIGS, is_input=False, is_time_series=False):
    if is_input:
        data = data[:-CONFIGS['target_length']*CONFIGS['n_buildings']]
    else:
        data = data[CONFIGS['window_size']*CONFIGS['n_buildings']:]
    ds = Dataset.from_tensor_slices(data)
    if is_time_series:
        if is_input:
            size = CONFIGS['window_size']
        else:
            size = CONFIGS['target_length']
        ds = ds.window(
            size=size, shift=CONFIGS['shift'],
            stride=CONFIGS['n_buildings'], drop_remainder=True
        )
        ds = ds.flat_map(lambda x: x).batch(size)
    return ds


def mk_dataset(data, CONFIGS, batch_size=None, shuffle=False):
    
    if not batch_size:
        batch_size = CONFIGS['batch_size']
    
    data = data.sort_values(['date_time', 'num'])

    building_num = data[CONFIGS['building_num_cols']]
    building_info = data[CONFIGS['building_info_cols']]
    target_time_info = data[CONFIGS['target_time_info_cols']]
    time_series = data[CONFIGS['time_series_cols']]
    to_inverse = data[CONFIGS['to_inverse_cols']]
    target = data[CONFIGS['target_cols']]

    building_num_ds = mk_time_series(building_num, CONFIGS)
    building_info_ds = mk_time_series(building_info, CONFIGS)
    target_time_info_ds = mk_time_series(target_time_info, CONFIGS)
    time_series_ds = mk_time_series(time_series, CONFIGS, is_input=True, is_time_series=True)
    to_inverse_ds = mk_time_series(to_inverse, CONFIGS)
    target_ds = mk_time_series(target, CONFIGS, is_time_series=True)

    ds = Dataset.zip((
        (
            building_num_ds,
            building_info_ds,
            target_time_info_ds,
            time_series_ds,
            to_inverse_ds
        ),
        target_ds
    ))
    if shuffle:
        ds = ds.shuffle(CONFIGS['buffer_size'])
    ds = ds.batch(batch_size).prefetch(2)
    
    return ds

In [15]:
str_to_dt = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H')
hour_to_td = lambda x: datetime.timedelta(hours=x)

train = new_data.loc[
    new_data['date_time'] < \
        str_to_dt(CONFIGS['valid_start_date_time']),
    :
]
valid = new_data.loc[
    (new_data['date_time'] >= \
        str_to_dt(CONFIGS['valid_start_date_time'])-hour_to_td(CONFIGS['window_size']))&\
    (new_data['date_time'] < \
         str_to_dt(CONFIGS['test_start_date_time'])),
    :
]
test = new_data.loc[
    new_data['date_time'] >= \
        str_to_dt(CONFIGS['test_start_date_time'])-hour_to_td(CONFIGS['window_size']),
    :
]

train_ds = mk_dataset(train, CONFIGS, shuffle=True)
valid_ds = mk_dataset(valid, CONFIGS, batch_size=CONFIGS['n_buildings'])
test_ds = mk_dataset(test, CONFIGS, batch_size=CONFIGS['n_buildings'])

2022-02-18 08:39:47.363075: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-18 08:39:47.368819: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-18 08:39:47.369342: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-18 08:39:47.370096: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [16]:
class CustomMSE(Loss):
    
    def __init__(self, target_max, name="custom_mse"):
        super(CustomMSE, self).__init__(name=name)
        self.target_max = target_max

    def call(self, y_true, y_pred):
        y_true = tf.squeeze(y_true)
        mean = tf.reshape(y_pred[:, -2], (-1, 1))
        std = tf.reshape(y_pred[:, -1], (-1, 1))
        y_pred = y_pred[:, :-2]

        y_true_inversed = y_true*std+mean
        y_pred_inversed = y_pred*std+mean
        
        y_true_inversed_scaled = y_true_inversed/self.target_max
        y_pred_inversed_scaled = y_pred_inversed/self.target_max

        mse = tf.reduce_mean((y_true_inversed_scaled-y_pred_inversed_scaled)**2)
        return mse

    
class InversedRMSE(Metric):
    
    def __init__(self, CONFIGS, name="inversed_rmse", **kwargs):
        super(InversedRMSE, self).__init__(name=name, **kwargs)
        self.inversed_mse = self.add_weight(name='inversed_mse', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')
        self.CONFIGS = CONFIGS

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.reshape(y_true, (-1, CONFIGS['target_length']))
        mean = tf.reshape(y_pred[:, -2], (-1, 1))
        std = tf.reshape(y_pred[:, -1], (-1, 1))
        y_pred = y_pred[:, :-2]

        y_true_inversed = y_true*std+mean
        y_pred_inversed = y_pred*std+mean

        error = tf.reduce_sum(tf.math.squared_difference(y_true_inversed, y_pred_inversed))
        
        self.inversed_mse.assign_add(error)
        self.count.assign_add(tf.cast(tf.size(y_true), CONFIGS['dtype']))

    def result(self):
        return tf.sqrt(tf.math.divide_no_nan(self.inversed_mse, self.count))

In [17]:
class BuildingNum(Layer):

    def __init__(self, CONFIGS, name='building_num_layer', **kwargs):
        super(BuildingNum, self).__init__(name=name, **kwargs)
        self.building_num_emb = Embedding(
            input_dim=CONFIGS['n_buildings'],
            output_dim=CONFIGS['embedding_dim']
        )
        self.bn_outputs = Reshape(target_shape=(CONFIGS['embedding_dim'],))
        
    def get_config(self):
        config = super(BuildingNum, self).get_config().copy()
        config.update({
            'building_num_emb': self.building_num_emb,
            'bn_outputs': self.bn_outputs,
        })
        return config
        
    def call(self, inputs):
        x = self.building_num_emb(inputs)
        outputs = self.bn_outputs(x)
        return outputs
    

class BuildingInfo(Layer):
    
    def __init__(self, CONFIGS, name='building_info_layer', **kwargs):
        super(BuildingInfo, self).__init__(name=name, **kwargs)
        self.bi_dense_0 = Dense(16, activation='relu')
        self.dropout_0 = Dropout(0.3)
        self.bi_outputs = Dense(32, activation='relu')
        
    def get_config(self):
        config = super(BuildingInfo, self).get_config().copy()
        config.update({
            'bi_dense_0': self.bi_dense_0,
            'dropout_0': self.dropout_0,
            'bi_outputs': self.bi_outputs,
        })
        return config
        
    def call(self, inputs):
        x = self.bi_dense_0(inputs)
        x = self.dropout_0(x)
        outputs = self.bi_outputs(x)
        return outputs
    

class TargetTimeInfo(Layer):
    
    def __init__(self, CONFIGS, name='target_time_info_layer', **kwargs):
        super(TargetTimeInfo, self).__init__(name=name, **kwargs)
        self.tti_dense_0 = Dense(16, activation='relu')
        self.dropout_0 = Dropout(0.3)
        self.tti_outputs = Dense(32, activation='relu')
        
    def get_config(self):
        config = super(TargetTimeInfo, self).get_config().copy()
        config.update({
            'tti_dense_0': self.tti_dense_0,
            'dropout_0': self.dropout_0,
            'tti_outputs': self.tti_outputs,
        })
        return config
        
    def call(self, inputs):
        x = self.tti_dense_0(inputs)
        x = self.dropout_0(x)
        outputs = self.tti_outputs(x)
        return outputs
    

class TimeSeries(Layer):
    
    def __init__(self, CONFIGS, name='time_series_layer', **kwargs):
        super(TimeSeries, self).__init__(name=name, **kwargs)
        
        if CONFIGS['model_type'] == 'flatten':
            pass
        elif CONFIGS['model_type'] == 'cnn1d':
            self.conv1d_0 = Conv1D(16, 3, 2, activation='relu')
            self.pool1d_0 = MaxPool1D(2)
            self.conv1d_1 = Conv1D(32, 3, 2, activation='relu')
            self.pool1d_1 = MaxPool1D(2)
        elif CONFIGS['model_type'] == 'cnn2d':
            self.conv2d_reshape = Reshape(target_shape=(
                CONFIGS['window_size'], len(CONFIGS['time_series_cols']), 1
            ))
            self.conv2d_0 = Conv2D(8, (3, 1), strides=(2, 1), activation='relu')
            self.pool2d_0 = MaxPool2D((2, 1))
            self.conv2d_1 = Conv2D(16, (3, 1), strides=(2, 1), activation='relu')
            self.pool2d_1 = MaxPool2D((2, 1))
        elif CONFIGS['model_type'] == 'lstm':
            self.lstm_0 = LSTM(16, return_sequences=True, activation='relu')
            self.lstm_1 = LSTM(32, activation='relu')
        elif CONFIGS['model_type'] == 'bilstm':
            self.bilstm_0 = Bidirectional(LSTM(16, return_sequences=True, activation='relu'))
            self.bilstm_1 = Bidirectional(LSTM(32, activation='relu'))
        self.time_series_outputs = Flatten()
        
    def get_config(self):
        config = super(TimeSeries, self).get_config().copy()
        if CONFIGS['model_type'] == 'flatten':
            pass
        elif CONFIGS['model_type'] == 'cnn1d':
            config.update({
                'conv1d_0': self.conv1d_0,
                'pool1d_0': self.pool1d_0,
                'conv1d_1': self.conv1d_1,
                'pool1d_1': self.pool1d_1,
            })
        elif CONFIGS['model_type'] == 'cnn2d':
            config.update({
                'conv2d_reshape': self.conv2d_reshape,
                'conv2d_0': self.conv2d_0,
                'pool2d_0': self.pool2d_0,
                'conv2d_1': self.conv2d_1,
                'pool2d_1': self.pool2d_1,
            })
        elif CONFIGS['model_type'] == 'lstm':
            config.update({
                'lstm_0': self.lstm_0,
                'lstm_1': self.lstm_1,
            })
        elif CONFIGS['model_type'] == 'bilstm':
            config.update({
                'bilstm_0': self.bilstm_0,
                'bilstm_1': self.bilstm_1,
            })
        config.update({
            'time_series_outputs': self.time_series_outputs,
        })
        return config
        
    def call(self, inputs):
        if CONFIGS['model_type'] == 'flatten':
            x = inputs
        elif CONFIGS['model_type'] == 'cnn1d':
            x = self.conv1d_0(inputs)
            x = self.pool1d_0(x)
            x = self.conv1d_1(x)
            x = self.pool1d_1(x)
        elif CONFIGS['model_type'] == 'cnn2d':
            x = self.conv2d_reshape(x)
            x = self.conv2d_0(x)
            x = self.pool2d_0(x)
            x = self.conv2d_1(x)
            x = self.pool2d_1(x)
        elif CONFIGS['model_type'] == 'lstm':
            x = self.lstm_0(x)
            x = self.lstm_1(x)
        elif CONFIGS['model_type'] == 'bilstm':
            x = self.bilstm_0(x)
            x = self.bilstm_1(x)
        outputs = self.time_series_outputs(x)
        return outputs

In [18]:
def set_model(CONFIGS, model_name=None, print_summary=False):
    
    # building_num
    building_num_inputs = Input(batch_shape=(None, 1), name='building_num_inputs')
    bn_outputs = BuildingNum(CONFIGS)(building_num_inputs)
    
    # building_info
    building_info_inputs = Input(
        batch_shape=(None, len(CONFIGS['building_info_cols'])),
        name='building_info_inputs'
    )
    bi_outputs = BuildingInfo(CONFIGS)(building_info_inputs)
    
    # target_time_info
    target_time_info_inputs = Input(
        batch_shape=(None, len(CONFIGS['target_time_info_cols'])),
        name='target_time_info_inputs'
    )
    tti_outputs = TargetTimeInfo(CONFIGS)(target_time_info_inputs)
    
    # time_series
    time_series_inputs = Input(batch_shape=(
        None, CONFIGS['window_size'], len(CONFIGS['time_series_cols'])
    ), name='time_series_inputs')
    time_series_outputs = TimeSeries(CONFIGS)(time_series_inputs)
    
    concat = Concatenate(name='concat')([bn_outputs, bi_outputs, tti_outputs, time_series_outputs])
        
    dense_0 = Dense(64, activation='relu', name='dense_0')(concat)
    dropout_0 = Dropout(0.3, name='dropout_0')(dense_0)
    dense_1 = Dense(32, activation='relu', name='dense_1')(dropout_0)
    dropout_1 = Dropout(0.3, name='dropout_1')(dense_1)
    outputs = Dense(CONFIGS['target_length'], name='outputs')(dropout_1)
    
    # to_inverse
    to_inverse_inputs = Input(batch_shape=(None, len(CONFIGS['to_inverse_cols'])), name='to_inverse_inputs')
    concat_to_inverse = Concatenate(name='concat_to_inverse')([outputs, to_inverse_inputs])
    
    if not model_name:
        model_name = CONFIGS['model_name']
    
    model = Model(
        inputs = [
            building_num_inputs,
            building_info_inputs,
            target_time_info_inputs,
            time_series_inputs,
            to_inverse_inputs
        ],
        outputs = concat_to_inverse,
        name = model_name
    )
    
    custom_mse = CustomMSE(CONFIGS['target_max'])
    inversed_rmse = InversedRMSE(CONFIGS)
    optimizer = Adam(learning_rate=CONFIGS['learning_rate'])
    model.compile(
        loss = custom_mse,
        optimizer = optimizer,
        metrics = [inversed_rmse],
    )
    
    if print_summary:
        model.summary()
    
    return model

In [19]:
CONFIGS['target_max'] = \
    data[data['date_time']<CONFIGS['valid_start_date_time']]['target'].max()
CONFIGS['embedding_dim'] = 10

model = set_model(CONFIGS, print_summary=True)

Model: "recursive_prediction"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 building_num_inputs (InputLaye  [(None, 1)]         0           []                               
 r)                                                                                               
                                                                                                  
 building_info_inputs (InputLay  [(None, 7)]         0           []                               
 er)                                                                                              
                                                                                                  
 target_time_info_inputs (Input  [(None, 11)]        0           []                               
 Layer)                                                                        

In [20]:
def recursive_eval(model, ds, data_usage, CONFIGS):
    
    if data_usage == 'valid':
        seq_len = str_to_dt(CONFIGS['test_start_date_time']) - \
            str_to_dt(CONFIGS['valid_start_date_time'])
    elif data_usage == 'test':
        seq_len = str_to_dt(CONFIGS['last_date_time'])+datetime.timedelta(hours=1) - \
            str_to_dt(CONFIGS['test_start_date_time'])
    seq_len = seq_len.total_seconds()/3600

    (_, _, _, fisrt_ts, _), _ = iter(ds).next()
    ts_target = fisrt_ts[..., -1:]

    inversed_mse = 0
    for i, ((bn, bi, tti, ts, ti), y_true) in enumerate(ds):
        assert len(y_true) == CONFIGS['n_buildings'], \
            f'batch_size is {len(y_true)} now. Set batch_size same as CONFIGS["n_buildings"]'
        assert seq_len % CONFIGS['target_length'] == 0, \
            f'seq_len must be multiple of target_length. Now seq_len: {seq_len}, target_length: {CONFIGS["target_length"]}'
        if i%CONFIGS['target_length'] != 0:
            continue
        ts_wo_target = ts[..., :-1]
        ts_concat = tf.concat([ts_wo_target, ts_target], axis=-1)

        y_true = tf.reshape(y_true, (CONFIGS['n_buildings'], CONFIGS['target_length']))
        y_pred = model.predict((bn, bi, tti, ts_concat, ti))
        y_pred, mean, std = y_pred[..., :-2], y_pred[..., [-2]], y_pred[..., [-1]]

        ts_target = tf.concat([
            ts_target[:, CONFIGS['target_length']:, :],
            y_pred.reshape(CONFIGS['n_buildings'], CONFIGS['target_length'], 1)
        ], axis=1)

        y_true_inversed = y_true*std+mean
        y_pred_inversed = y_pred*std+mean

        inversed_mse += tf.reduce_sum((y_true_inversed-y_pred_inversed)**2)/(seq_len*CONFIGS['n_buildings'])

    inversed_rmse = inversed_mse**0.5

    return inversed_rmse

In [21]:
class BestByRecursiveRMSE(Callback):

    def __init__(self, valid_ds, CONFIGS):
        super(BestByRecursiveRMSE, self).__init__()
        self.valid_ds = valid_ds
        self.CONFIGS = CONFIGS
        self.best_weights = None

    def on_train_begin(self, logs=None):
        self.best_epoch = None
        self.wait = 0
        self.stopped_epoch = 0
        self.best_train_loss = np.inf
        self.best_valid_rmse = np.inf

    def on_epoch_end(self, epoch, logs=None):
        self.best_epoch = epoch
        train_loss = logs.get('loss')
        train_inversed_rmse = logs.get('inversed_rmse')
        valid_rmse = recursive_eval(self.model, self.valid_ds, 'valid', self.CONFIGS)
        print(f'Epoch: {epoch}')
        print(f'\ttrain loss: {train_loss:.07f}\ttrain_inversed_rmse: {train_inversed_rmse:.07f}')
        print(f'\trecursive valid rmse: {valid_rmse:.07f}\n')
        
        if np.less(valid_rmse, self.best_valid_rmse):
            self.best_train_loss = train_loss
            self.best_train_inversed_rmse = train_inversed_rmse
            self.best_valid_rmse = valid_rmse
            self.wait = 0
            self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= CONFIGS['es_patience']:
                self.stopped_epoch = epoch
                self.model.stop_training = True
                self.model.set_weights(self.best_weights)

    def on_train_end(self, logs=None):
        if self.stopped_epoch > 0:
            self.model.save_weights(f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5')
            print(f'\nBest epoch by recursive valid rmse: {self.best_epoch}')
            print(f'\ttrain loss: {self.best_train_loss:.07f}\ttrain_inversed_rmse: {self.best_train_inversed_rmse:.07f}')
            print(f'\trecursive valid rmse: {self.best_valid_rmse:.07f}')

In [22]:
def train_model(model, train_ds, valid_ds, CONFIGS):
    
    tensorboard_callback = TensorBoard(
        log_dir = CONFIGS['tensorboard_log_path']
    )
    best_by_recursive_rmse = BestByRecursiveRMSE(valid_ds, CONFIGS)
    
    history = model.fit(
        train_ds,
        batch_size = CONFIGS['batch_size'],
        epochs = CONFIGS['epochs'],
        callbacks = [
            best_by_recursive_rmse,
            tensorboard_callback,
        ],
        verbose=0
    )
    
    return history

In [23]:
history = train_model(model, train_ds, valid_ds, CONFIGS)

2022-02-18 08:39:49.829832: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8100


Epoch: 0
	train loss: 0.0023470	train_inversed_rmse: 819.2539062
	recursive valid rmse: 696.1470839

Epoch: 1
	train loss: 0.0013332	train_inversed_rmse: 617.4653931
	recursive valid rmse: 612.2616672

Epoch: 2
	train loss: 0.0010563	train_inversed_rmse: 549.6088867
	recursive valid rmse: 564.6299684

Epoch: 3
	train loss: 0.0008432	train_inversed_rmse: 491.0686340
	recursive valid rmse: 519.7577684

Epoch: 4
	train loss: 0.0007395	train_inversed_rmse: 459.8553772
	recursive valid rmse: 499.0166944

Epoch: 5
	train loss: 0.0006378	train_inversed_rmse: 427.0700989
	recursive valid rmse: 485.2085708

Epoch: 6
	train loss: 0.0005815	train_inversed_rmse: 407.7862854
	recursive valid rmse: 485.2940741

Epoch: 7
	train loss: 0.0005440	train_inversed_rmse: 394.4239197
	recursive valid rmse: 461.7611130

Epoch: 8
	train loss: 0.0005106	train_inversed_rmse: 382.1182556
	recursive valid rmse: 478.6382050

Epoch: 9
	train loss: 0.0004781	train_inversed_rmse: 369.7617493
	recursive valid rmse: 479

In [24]:
best_model = set_model(CONFIGS, model_name='best_'+CONFIGS['model_name'])
best_model.load_weights(f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5')

In [25]:
train_loss, train_rmse = best_model.evaluate(train_ds, verbose=0)
valid_loss, valid_rmse = best_model.evaluate(valid_ds, verbose=0)
test_loss, test_rmse = best_model.evaluate(test_ds, verbose=0)

recursive_valid_rmse = recursive_eval(best_model, valid_ds, 'valid', CONFIGS)
recursive_test_rmse = recursive_eval(best_model, test_ds, 'test', CONFIGS)

print(f'train_loss: {train_loss:.07f}\ttrain_rmse: {train_rmse:.07f}')
print(f'valid_loss: {valid_loss:.07f}\tvalid_rmse: {valid_rmse:.07f}')
print(f'test_loss: {test_loss:.07f}\ttest_rmse: {test_rmse:.07f}')

print(f'\nrecursive_valid_rmse: {recursive_valid_rmse:.6f}\nrecursive_test_rmse: {recursive_test_rmse:.6f}')

train_loss: 0.0001872	train_rmse: 231.3749847
valid_loss: 0.0005532	valid_rmse: 397.7495422
test_loss: 0.0004342	test_rmse: 352.3868408

recursive_valid_rmse: 434.670028
recursive_test_rmse: 437.077567
