In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd

import datetime

import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Loss
from tensorflow.keras.metrics import Metric
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from tensorflow.keras.losses import MeanSquaredError

In [2]:
CONFIGS = {
    'data_path': '../data/',
    'model_path': '../model/',
    'model_name': 'multi_task_learning',
    'model_type': 'cnn1d',
    
    'valid_start_date_time': '2020-08-11 00',
    'test_start_date_time': '2020-08-18 00',
    
    'batch_size': 64,
    'learning_rate': 1e-4,
    'epochs': 100,
    'es_patience': 10,
    
    'window_size': 7*24,
    'shift': 1,
    'target_length': 3,
}

In [3]:
train_origin = pd.read_csv(CONFIGS['data_path']+'train.csv', encoding='cp949')

In [4]:
data = deepcopy(train_origin)

data.columns = [
    'num', 'date_time', 'target', 'temp', 'wind',
    'humid', 'rain', 'sun', 'non_elec_eq', 'sunlight_eq'
]

data['num'] -= 1

print(f'data.shape: {data.shape}')

CONFIGS['n_buildings'] = len(data['num'].unique())

data.shape: (122400, 10)


In [5]:
def mk_time_data(data):
    
    new_data = data.copy()

    new_data['date_time'] = data['date_time'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H'))
    
    new_data['time_stamp'] = new_data['date_time'].apply(lambda x: x.timestamp())
    
    new_data['year'] = new_data['date_time'].apply(lambda x: x.year)
    new_data['month'] = new_data['date_time'].apply(lambda x: x.month)
    new_data['day'] = new_data['date_time'].apply(lambda x: x.day)
    
    new_data['hour'] = new_data['date_time'].apply(lambda x: x.hour)
    new_data['cos_hour'] = np.cos(2*np.pi*(new_data['hour']/24))
    new_data['sin_hour'] = np.sin(2*np.pi*(new_data['hour']/24))

    new_data['weekday'] = new_data['date_time'].apply(lambda x: x.weekday())
    new_data['cos_weekday'] = np.cos(2*np.pi*(new_data['weekday']/7))
    new_data['sin_weekday'] = np.sin(2*np.pi*(new_data['weekday']/7))
    
    new_data['is_holiday'] = 0
    new_data.loc[(new_data['weekday'] == 5) | (new_data['weekday'] == 6), 'is_holiday'] = 1
    new_data.loc[(new_data['month'] == 8) & (new_data['day'] == 17), 'is_holiday'] = 1
    
    return new_data

In [6]:
new_data = mk_time_data(data)

In [7]:
def mk_building_info(data, data_for_cal):
        
    new_data = data.copy()
    new_data['range'] = 0
    new_data['mean'] = 0
    new_data['std'] = 0
    new_data['holiday_gap'] = 0
    new_data['day_gap'] = 0
    
    B_NUM = 60

    for num in range(B_NUM):
        building = data_for_cal.query(f'num == {num}')
        
        bt_range = building['target'].max()-building['target'].min()
        bt_mean = building['target'].mean()
        bt_std = building['target'].std()
        bt_holiday_gap = abs(building.query('is_holiday == 0')['target'].mean() - building.query('is_holiday == 1')['target'].mean())
        bt_day_gap = 0
        for d in range(building.shape[0]//24):
            tmp = building['target'][d*24:(d+1)*24]
            bt_day_gap += (tmp.max()-tmp.min())/(building.shape[0]//24)
            
        new_data.loc[new_data['num']==num, 'range'] = bt_range
        new_data.loc[new_data['num']==num, 'mean'] = bt_mean
        new_data.loc[new_data['num']==num, 'std'] = bt_std
        new_data.loc[new_data['num']==num, 'holiday_gap'] = bt_holiday_gap
        new_data.loc[new_data['num']==num, 'day_gap'] = bt_day_gap
        
    new_data['mean_to_inverse'] = new_data['mean']
    new_data['std_to_inverse'] = new_data['std']
        
    return new_data

In [8]:
new_data = mk_building_info(new_data, new_data[new_data['date_time']<CONFIGS['valid_start_date_time']])

In [9]:
def mk_mean_std_dict(data, scaling_by_building_cols):
    mean_std_dict = {}
    for num in range(60):
        building = data.query(f'num == {num}')
        mean_std_dict[num] = {
            col: {
                'mean': building[col].mean(),
                'std': building[col].std()
            } for col in scaling_by_building_cols
        }
    return mean_std_dict

In [10]:
scaling_by_building_cols = [
    'temp', 'wind', 'humid', 'rain', 'sun', 'time_stamp', 'target',
]
scaling_by_all_cols = ['range', 'mean', 'std', 'holiday_gap', 'day_gap']

mean_std_dict = mk_mean_std_dict(
    new_data[new_data['date_time'] < CONFIGS['valid_start_date_time']],
    scaling_by_building_cols
)
CONFIGS['mean_std_dict'] = mean_std_dict

In [11]:
def standard_scaling(data, scaling_by_building_cols, scaling_by_all_cols, mean_std_dict=None):
    if not mean_std_dict:
        mean_std_dict = mk_mean_std_dict(data, scaling_by_building_cols)
        
    new_data = data.copy()
    for num in range(60):
        for col in scaling_by_building_cols:
            new_data.loc[new_data['num']==num, col] -= mean_std_dict[num][col]['mean']
            new_data.loc[new_data['num']==num, col] /= mean_std_dict[num][col]['std']
    
    for col in scaling_by_all_cols:
        m = new_data.loc[:, col].mean()
        s = new_data.loc[:, col].std()
        new_data.loc[:, col] -= m
        new_data.loc[:, col] /= s
    
    return new_data

In [12]:
new_data = standard_scaling(new_data, scaling_by_building_cols, scaling_by_all_cols, mean_std_dict)

In [13]:
building_num_cols = ['num']
building_info_cols = ['range', 'mean', 'std', 'holiday_gap', 'day_gap']
target_time_info_cols = [
    'temp', 'wind', 'humid', 'rain', 'sun', 'time_stamp',
    'cos_hour', 'sin_hour', 'cos_weekday', 'sin_weekday',
    'is_holiday',
]
time_series_cols = [
    'temp', 'wind', 'humid', 'rain', 'sun', 'time_stamp',
    'cos_hour', 'sin_hour', 'cos_weekday', 'sin_weekday',
    'is_holiday', 'target',
]
target_cols = ['target']
to_inverse_cols = ['mean_to_inverse', 'std_to_inverse']
input_cols = list(set(
    building_num_cols + building_info_cols + target_time_info_cols +
    time_series_cols + target_cols + to_inverse_cols
))


CONFIGS['building_num_cols'] = building_num_cols
CONFIGS['building_info_cols'] = building_info_cols
CONFIGS['target_time_info_cols'] = target_time_info_cols
CONFIGS['time_series_cols'] = time_series_cols
CONFIGS['target_cols'] = target_cols
CONFIGS['to_inverse_cols'] = to_inverse_cols
CONFIGS['input_cols'] = input_cols

In [17]:
def crop(data, CONFIGS):
    croped = data[CONFIGS['window_size']+1:-(CONFIGS['target_length']-2)]
    return Dataset.from_tensor_slices(croped)


def mk_window(data, size, shift):
    ds = Dataset.from_tensor_slices(data)
    ds = ds.window(
        size, shift=shift, drop_remainder=True
    ).flat_map(lambda x: x).batch(size)
    return ds


def mk_dataset(data, CONFIGS, shuffle=False):

    data = data[CONFIGS['input_cols']]
    building_length = data.query('num == 0').shape[0]

    building_num = data[CONFIGS['building_num_cols']]
    building_info = data[CONFIGS['building_info_cols']]
    target_time_info = data[CONFIGS['target_time_info_cols']]
    time_series = data[CONFIGS['time_series_cols']]
    to_inverse = data[CONFIGS['to_inverse_cols']]
    target = data[CONFIGS['target_cols']]

    # building_num
    building_num_ds = Dataset.from_tensor_slices(building_num).batch(building_length)
    building_num_ds = building_num_ds.flat_map(lambda x: crop(x, CONFIGS))
    building_num_ds = building_num_ds.map(lambda x: tf.cast(x, tf.int16))

    # building_info
    building_info_ds = Dataset.from_tensor_slices(building_info).batch(building_length)
    building_info_ds = building_info_ds.flat_map(lambda x: crop(x, CONFIGS))
    building_info_ds = building_info_ds.map(lambda x: tf.cast(x, tf.float32))

    # target_time_info
    target_time_info_ds = Dataset.from_tensor_slices(target_time_info).batch(building_length)
    target_time_info_ds = target_time_info_ds.flat_map(lambda x: crop(x, CONFIGS))
    target_time_info_ds = target_time_info_ds.map(lambda x: tf.cast(x, tf.float32))

    # time_series
    time_series_ds = Dataset.from_tensor_slices(time_series).batch(building_length)
    time_series_ds = time_series_ds.flat_map(
        lambda x: mk_window(x, CONFIGS['window_size'], CONFIGS['shift']))
    time_series_ds = time_series_ds.map(lambda x: tf.cast(x, tf.float32))

    # target
    target_ds = Dataset.from_tensor_slices(target).batch(building_length)
    target_ds = target_ds.flat_map(
        lambda x: mk_window(x, CONFIGS['target_length'], CONFIGS['shift']))
    target_ds = target_ds.map(lambda x: tf.cast(x, tf.float32))
    
    # to_inverse
    to_inverse_ds = Dataset.from_tensor_slices(to_inverse).batch(building_length)
    to_inverse_ds = to_inverse_ds.flat_map(lambda x: crop(x, CONFIGS))
    to_inverse_ds = to_inverse_ds.map(lambda x: tf.cast(x, tf.int16))
    
    # zip
    ds = Dataset.zip((
        (
            building_num_ds,
            building_info_ds,
            target_time_info_ds,
            time_series_ds,
            to_inverse_ds
        ),
        target_ds
    ))
    if shuffle:
        ds = ds.shuffle(512)
    ds = ds.batch(CONFIGS['batch_size']).cache().prefetch(2)
    
    return ds

In [18]:
str_to_dt = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H')
hour_to_td = lambda x: datetime.timedelta(hours=x)

train = new_data.loc[
    new_data['date_time'] < \
        str_to_dt(CONFIGS['valid_start_date_time']),
    :
]
valid = new_data.loc[
    (new_data['date_time'] > \
        str_to_dt(CONFIGS['valid_start_date_time'])-hour_to_td(CONFIGS['window_size']))&\
    (new_data['date_time'] < \
         str_to_dt(CONFIGS['test_start_date_time'])),
    :
]
test = new_data.loc[
    new_data['date_time'] > \
        str_to_dt(CONFIGS['test_start_date_time'])-hour_to_td(CONFIGS['window_size']),
    :
]

train_ds = mk_dataset(train, CONFIGS, shuffle=True)
valid_ds = mk_dataset(valid, CONFIGS)
test_ds = mk_dataset(test, CONFIGS)

In [20]:
iter(test_ds.unbatch()).next()

((<tf.Tensor: shape=(1,), dtype=int16, numpy=array([0], dtype=int16)>,
  <tf.Tensor: shape=(5,), dtype=float32, numpy=
  array([-0.78192174,  3.4315276 , -0.7918083 , -0.6576589 , -0.9102245 ],
        dtype=float32)>,
  <tf.Tensor: shape=(11,), dtype=float32, numpy=
  array([ 0.37290826, -1.5672512 ,  0.8200607 , -0.2154803 , -0.5449863 ,
          2.0780547 ,  0.8660254 ,  0.5       ,  0.6234898 ,  0.7818315 ,
          0.        ], dtype=float32)>,
  <tf.Tensor: shape=(168, 12), dtype=float32, numpy=
  array([[ 0.09663188, -1.2139744 ,  1.3741381 , ...,  0.7818315 ,
           0.        ,  0.7294189 ],
         [ 0.12732926, -1.1256552 ,  1.4357023 , ...,  0.7818315 ,
           0.        ,  0.513595  ],
         [ 0.0659345 ,  0.9940057 ,  1.4357023 , ...,  0.7818315 ,
           0.        ,  0.60095227],
         ...,
         [ 0.89476365, -1.037336  ,  0.38911152, ...,  0.        ,
           1.        ,  1.2741172 ],
         [ 0.77197415, -1.2139744 ,  0.5122399 , ...,  0.    

In [32]:
CONFIGS['target_max'] = \
    data[data['date_time']<CONFIGS['valid_start_date_time']]['target'].max()

In [33]:
class CustomRMSE(Loss):
    def __init__(self, CONFIGS, name="custom_rmse"):
        super(CustomRMSE, self).__init__(name=name)
        self.CONFIGS = CONFIGS
        self.target_mean_std = tf.cast(pd.DataFrame(
            [CONFIGS['mean_std_dict'][i]['target'] for i in range(60)]
        ).values, tf.float32)

    def call(self, y_true, y_pred):
        def tmp(num):
            print(num)
            return self.target_mean_std[0]
        means_stds = tf.map_fn(
            tmp,
            y_pred[1]
        )

        y_true_inversed_scaled = y_pred[0] * tf.reshape(means_stds[:, 1], (-1, 1))
        y_true_inversed_scaled = y_true_inversed_scaled + tf.reshape(means_stds[:, 0], (-1, 1))
        y_true_inversed_scaled = y_true_inversed_scaled/self.CONFIGS['target_max']
        y_pred_inversed_scaled = y_true * tf.reshape(means_stds[:, 1], (-1, 1))
        y_pred_inversed_scaled = y_pred_inversed_scaled + tf.reshape(means_stds[:, 0], (-1, 1))
        y_pred_inversed_scaled = y_pred_inversed_scaled/self.CONFIGS['target_max']

        rmse = (tf.reduce_mean((y_true_inversed_scaled - y_pred_inversed_scaled)**2))**0.5
        return rmse

In [34]:
def set_model(CONFIGS, model_name=None, print_summary=False):
    
    # building_num
    building_num_inputs = Input(batch_shape=(None, 1), name='building_num_inputs')
    input_dim = CONFIGS['n_buildings']; output_dim = CONFIGS['embedding_dim']
    building_num_emb = Embedding(
        input_dim=input_dim,
        output_dim=output_dim,
        name='embedding'
    )(building_num_inputs)
    bn_outputs = Reshape(target_shape=(output_dim,), name='bn_outputs')(building_num_emb)
    
    # building_info
    building_info_inputs = Input(
        batch_shape=(None, len(CONFIGS['building_info_cols'])),
        name='building_info_inputs'
    )
    bi_dense_0 = Dense(16, activation='relu', name='bi_dense_0')(building_info_inputs)
    bi_outputs = Dense(32, activation='relu', name='bi_outputs')(bi_dense_0)
    
    # target_time_info
    target_time_info_inputs = Input(
        batch_shape=(None, len(CONFIGS['target_time_info_cols'])),
        name='target_time_info_inputs'
    )
    tti_dense_0 = Dense(16, activation='relu', name='tti_dense_0')(target_time_info_inputs)
    tti_outputs = Dense(32, activation='relu', name='tti_outputs')(tti_dense_0)
    
    # time_series
    time_series_inputs = Input(batch_shape=(
        None, CONFIGS['window_size'], len(CONFIGS['time_series_cols'])
    ), name='time_series_inputs')
    
    if CONFIGS['model_type'] == 'flatten':
        time_series_outputs = Flatten(name='time_series_outputs')(time_series_inputs)
    elif CONFIGS['model_type'] == 'cnn1d':
        conv_0 = Conv1D(16, 3, 2, activation='relu', name='conv_0')(time_series_inputs)
        pool_0 = MaxPool1D(2, name='pool_0')(conv_0)
        conv_1 = Conv1D(32, 3, 2, activation='relu', name='conv_1')(pool_0)
        pool_1 = MaxPool1D(2, name='pool_1')(conv_1)
        time_series_outputs = Flatten(name='time_series_outputs')(pool_1)
    elif CONFIGS['model_type'] == 'cnn2d':
        reshape = Reshape(target_shape=(
            CONFIGS['window_size'], len(CONFIGS['time_series_cols']), 1
        ), name='reshape')(time_series_inputs)
        conv_0 = Conv2D(8, (3, 1), strides=(2, 1), activation='relu', name='conv_0')(reshape)
        pool_0 = MaxPool2D((2, 1), name='pool_0')(conv_0)
        conv_1 = Conv2D(16, (3, 1), strides=(2, 1), activation='relu', name='conv_1')(pool_0)
        pool_1 = MaxPool2D((2, 1), name='pool_1')(conv_1)
        time_series_outputs = Flatten(name='time_series_outputs')(pool_1)
    elif CONFIGS['model_type'] == 'lstm':
        lstm_0 = LSTM(16, return_sequences=True, activation='relu', name='lstm_0')(time_series_inputs)
        lstm_1 = LSTM(32, activation='relu', name='lstm_1')(lstm_0)
        time_series_outputs = Flatten(name='time_series_outputs')(lstm_1)
    elif CONFIGS['model_type'] == 'bilstm':
        bilstm_0 = Bidirectional(LSTM(
            16, return_sequences=True, activation='relu'
        ), name='bilstm_0')(time_series_inputs)
        bilstm_1 = Bidirectional(LSTM(
            32, activation='relu'
        ), name='bilstm_1')(bilstm_0)
        time_series_outputs = Flatten(name='time_series_outputs')(bilstm_1)
    
    concat = Concatenate(name='concat')([bn_outputs, bi_outputs, tti_outputs, time_series_outputs])
        
    dense_0 = Dense(64, activation='relu', name='dense_0')(concat)
    dense_1 = Dense(32, activation='relu', name='dense_1')(dense_0)
    outputs = Dense(CONFIGS['target_length'], name='outputs')(dense_1)
    
    if not model_name:
        model_name = CONFIGS['model_name']
    
    model = Model(
        inputs = [
            building_num_inputs,
            building_info_inputs,
            target_time_info_inputs,
            time_series_inputs
        ],
        outputs = [outputs, building_num_inputs],
        name = model_name
    )
    
    optimizer = Adam(learning_rate=CONFIGS['learning_rate'])
    model.compile(
        loss = CustomRMSE(CONFIGS),
        optimizer = optimizer,
    )
    
    if print_summary:
        model.summary()
    
    return model

In [35]:
CONFIGS['embedding_dim'] = 30

model = set_model(CONFIGS, print_summary=True)

Model: "multi_task_learning"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
time_series_inputs (InputLayer) [(None, 168, 12)]    0                                            
__________________________________________________________________________________________________
conv_0 (Conv1D)                 (None, 83, 16)       592         time_series_inputs[0][0]         
__________________________________________________________________________________________________
pool_0 (MaxPooling1D)           (None, 41, 16)       0           conv_0[0][0]                     
__________________________________________________________________________________________________
building_num_inputs (InputLayer [(None, 1)]          0                                            
________________________________________________________________________________

In [36]:
def train_model(model, train_ds, valid_ds, CONFIGS):
    
    early_stop = EarlyStopping(
        patience=CONFIGS['es_patience']
    )
    save_best_only = ModelCheckpoint(
        filepath = f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5',
        monitor = 'val_loss',
        save_best_only = True,
        save_weights_only = True
    )
    
    history = model.fit(
        train_ds,
        batch_size = CONFIGS['batch_size'],
        epochs = CONFIGS['epochs'],
        validation_data = valid_ds,
        callbacks = [
            early_stop,
            save_best_only,
        ]
    )
    
    return history

In [37]:
history = train_model(model, train_ds, valid_ds, CONFIGS)

Epoch 1/100
Tensor("custom_rmse/map/while/TensorArrayV2Read/TensorListGetItem:0", shape=(), dtype=float32)
Tensor("custom_rmse_1/map/while/TensorArrayV2Read/TensorListGetItem:0", shape=(), dtype=float32)
Tensor("custom_rmse/map/while/TensorArrayV2Read/TensorListGetItem:0", shape=(), dtype=float32)
Tensor("custom_rmse_1/map/while/TensorArrayV2Read/TensorListGetItem:0", shape=(), dtype=float32)
    202/Unknown - 3s 8ms/step - loss: 0.0371 - outputs_loss: 0.0076 - building_num_inputs_loss: 0.0295

KeyboardInterrupt: 

In [21]:
best_model = set_model(CONFIGS, model_name='best_'+CONFIGS['model_name'])
best_model.load_weights(f'{CONFIGS["model_path"]}{CONFIGS["model_name"]}.h5')

In [22]:
train_loss = best_model.evaluate(train_ds, verbose=0)
valid_loss = best_model.evaluate(valid_ds, verbose=0)
test_loss = best_model.evaluate(test_ds, verbose=0)

train_rmse = inversed_rmse(train_ds, best_model)
valid_rmse = inversed_rmse(valid_ds, best_model)
test_rmse = inversed_rmse(test_ds, best_model)

print(f'train_loss: {train_loss:.6f}\ttrain_rmse: {train_rmse:.6f}')
print(f'valid_loss: {valid_loss:.6f}\tvalid_rmse: {valid_rmse:.6f}')
print(f'test_loss: {test_loss:.6f}\ttest_rmse: {test_rmse:.6f}')

train_loss: 1.264660	train_rmse: 1078.519767
valid_loss: 2.009985	valid_rmse: 1240.208086
test_loss: 2.189661	test_rmse: 1375.793010
