In [1]:
import sys
sys.path.append('../')

In [2]:
import time
import os

import pandas as pd
import numpy as np
from sklearn.utils import resample
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

import tensorflow as tf
tf.__version__

'2.0.0'

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2 Physical GPUs, 2 Logical GPUs


In [4]:
from tensorflow.keras.metrics import Metric
class RSquare(Metric):
    """Compute R^2 score.
     This is also called as coefficient of determination.
     It tells how close are data to the fitted regression line.
     - Highest score can be 1.0 and it indicates that the predictors
       perfectly accounts for variation in the target.
     - Score 0.0 indicates that the predictors do not
       account for variation in the target.
     - It can also be negative if the model is worse.
     Usage:
     ```python
     actuals = tf.constant([1, 4, 3], dtype=tf.float32)
     preds = tf.constant([2, 4, 4], dtype=tf.float32)
     result = tf.keras.metrics.RSquare()
     result.update_state(actuals, preds)
     print('R^2 score is: ', r1.result().numpy()) # 0.57142866
    ```
    """

    def __init__(self, name='r_square', dtype=tf.float32):
        super(RSquare, self).__init__(name=name, dtype=dtype)
        self.squared_sum = self.add_weight("squared_sum", initializer="zeros")
        self.sum = self.add_weight("sum", initializer="zeros")
        self.res = self.add_weight("residual", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")

    def update_state(self, y_true, y_pred):
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)
        self.squared_sum.assign_add(tf.reduce_sum(y_true**2))
        self.sum.assign_add(tf.reduce_sum(y_true))
        self.res.assign_add(
            tf.reduce_sum(tf.square(tf.subtract(y_true, y_pred))))
        self.count.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))

    def result(self):
        mean = self.sum / self.count
        total = self.squared_sum - 2 * self.sum * mean + self.count * mean**2
        return 1 - (self.res / total)

    def reset_states(self):
        # The state of the metric will be reset at the start of each epoch.
        self.squared_sum.assign(0.0)
        self.sum.assign(0.0)
        self.res.assign(0.0)
        self.count.assign(0.0)

In [5]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (LinearLocator, MultipleLocator, FormatStrFormatter)
from matplotlib.dates import MONDAY
from matplotlib.dates import MonthLocator, WeekdayLocator, DateFormatter
from matplotlib import gridspec
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline

In [6]:
plt.rcParams['figure.figsize'] = ((8/2.54), (6/2.54))
plt.rcParams["font.family"] = "Arial"
plt.rcParams["mathtext.default"] = "rm"
plt.rcParams.update({'font.size': 11})
MARKER_SIZE = 15
cmap_m = ["#f4a6ad", "#f6957e", "#fccfa2", "#8de7be", "#86d6f2", "#24a9e4", "#b586e0", "#d7f293"]
cmap = ["#e94d5b", "#ef4d28", "#f9a54f", "#25b575", "#1bb1e7", "#1477a2", "#a662e5", "#c2f442"]

plt.rcParams['axes.spines.top'] = False
# plt.rcParams['axes.edgecolor'] = 
plt.rcParams['axes.linewidth'] = 1
plt.rcParams['lines.linewidth'] = 1.5
plt.rcParams['xtick.major.width'] = 1
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.major.width'] = 1
plt.rcParams['ytick.minor.width'] = 1

In [7]:
def make_patch_spines_invisible(ax):
    ax.set_frame_on(True)
    ax.patch.set_visible(False)
    for sp in ax.spines.values():
        sp.set_visible(False)

## Data preparation

### Hyperparameters

In [8]:
BEST_PATHS       = ['./models/best_MLP.h5', 
                    './models/best_LSTM.h5',
                    './models/best_AE_LSTM.h5',
                    './models/best_BiLSTM.h5',
                    './models/best_AE_BiLSTM.h5']
TRANS_BEST_PATHS = ['./models/trans_MLP.h5', 
                    './models/trans_LSTM.h5',
                    './models/trans_AE_LSTM.h5',
                    './models/trans_BiLSTM.h5',
                    './models/trans_AE_BiLSTM.h5']
RAW_BEST_PATHS   = ['./models/raw_MLP.h5', 
                    './models/raw_LSTM.h5',
                    './models/raw_AE_LSTM.h5',
                    './models/raw_BiLSTM.h5',
                    './models/raw_AE_BiLSTM.h5']

### data loading

In [9]:
l = np.load('./env_set/dataset.npz')
train_input = l['train_input']
train_label = l['train_label']
test_input = l['test_input']
test_label = l['test_label']
MAXS = l['MAXS']
MINS = l['MINS']

TIME_STEPS = l['TIME_STEPS']
OUTPUT_SIZE = l['OUTPUT_SIZE']
NUM_FEATURES = train_input.shape[-1]

In [10]:
print(train_input.shape)
print(train_label.shape)
print()
print(test_input.shape)
print(test_label.shape)

(41025, 24, 5)
(41025, 24, 5)

(16119, 24, 5)
(16119, 24, 5)


In [11]:
BATCH_SIZE = 32
test_dataset = tf.data.Dataset.from_tensor_slices((test_input, test_label))
test_dataset = test_dataset.batch(BATCH_SIZE)

# Model loading

In [12]:
class RetrainLayer(tf.keras.layers.Layer):
    def __init__(self, num_hidden, activation=tf.nn.relu):
        super(RetrainLayer, self).__init__()
        self.num_hidden = num_hidden
        
        self.dense = tf.keras.layers.Dense(self.num_hidden, activation=activation, kernel_initializer='he_uniform')
        
    def call(self, inp):
        return self.dense(inp)

In [13]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_hiddens, encoding_size):
        super(Encoder, self).__init__()
        
        self.num_hiddens = num_hiddens
        self.encoding_size = encoding_size
        
        self.denses = [tf.keras.layers.Dense(self.num_hiddens[_], activation=tf.nn.relu, kernel_initializer='he_uniform')
                       for _ in range(len(self.num_hiddens))]
        
        self.output_layer = tf.keras.layers.Dense(self.encoding_size, activation=tf.nn.sigmoid)
        
    def call(self, inp):
        for _ in range(len(self.num_hiddens)):
            inp = self.denses[_](inp)
        
        return self.output_layer(inp)        

In [14]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, num_hiddens, original_size):
        super(Decoder, self).__init__()
        
        self.num_hiddens = num_hiddens[::-1]
        self.original_size = original_size
        
        self.denses = [tf.keras.layers.Dense(self.num_hiddens[_], activation=tf.nn.relu, kernel_initializer='he_uniform')
                       for _ in range(len(self.num_hiddens))]
        
    def call(self, inp):
        for _ in range(len(self.num_hiddens)):
            inp = self.denses[_](inp)
        
        return inp

In [15]:
class Autoencoder(tf.keras.Model):
    def __init__(self, num_hiddens, encoding_size, original_size):
        super(Autoencoder, self).__init__()
        self.num_hiddens = num_hiddens
        self.encoding_size = encoding_size
        self.original_size = original_size
        
        self.in_retrain_layer = RetrainLayer(self.num_hiddens[0])
        self.encoder = Encoder(self.num_hiddens, self.encoding_size)
        self.decoder = Decoder(self.num_hiddens, self.original_size)
        self.out_retrain_layer = RetrainLayer(self.original_size, activation = tf.nn.sigmoid)
        
    def call(self, inp, need_code=False, decoding=None):
        inp = self.in_retrain_layer(inp)
        encoded_values = self.encoder(inp)
        if decoding is not None:
            decoding = self.decoder(decoding)
            return self.out_retrain_layer(decoding)
        if not need_code:
            encoded_values = self.decoder(encoded_values)
            return self.out_retrain_layer(encoded_values)
        else:
            return encoded_values

In [16]:
num_hiddens = [32, 16]
encoding_size = 8
original_size = 5
autoencoder = Autoencoder(num_hiddens, encoding_size, original_size)

In [17]:
autoencoder.load_weights('./checkpoints/trained_AE')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f278c2da588>

In [18]:
mlp_model = tf.keras.models.load_model(BEST_PATHS[0])
lstm_model = tf.keras.models.load_model(BEST_PATHS[1])
bilstm_model = tf.keras.models.load_model(BEST_PATHS[3])
ae_lstm_model = tf.keras.models.load_model(BEST_PATHS[2])
ae_bilstm_model = tf.keras.models.load_model(BEST_PATHS[4])

In [19]:
autoencoder.trainable = True

# Model evaluation w/o transfer

In [20]:
mlp_pred = mlp_model.predict(test_dataset)
lstm_pred = lstm_model.predict(test_dataset)
bilstm_pred = bilstm_model.predict(test_dataset)

In [21]:
encoded_test_input = []
encoded_test_label = []
for step, (inp, tar) in enumerate(test_dataset):
    encoded_test_input.append(autoencoder(inp, True))
    encoded_test_label.append(autoencoder(tar, True))
encoded_test_input = np.concatenate(encoded_test_input, axis=0)
encoded_test_label = np.concatenate(encoded_test_label, axis=0)
encoded_test_dataset = tf.data.Dataset.from_tensor_slices((encoded_test_input, encoded_test_label))
encoded_test_dataset = encoded_test_dataset.batch(BATCH_SIZE)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



In [22]:
_ = ae_lstm_model.predict(encoded_test_dataset)
ae_lstm_pred = autoencoder(train_input[0:1, :, :], decoding=_)
_ = ae_bilstm_model.predict(encoded_test_dataset)
ae_bilstm_pred = autoencoder(train_input[0:1, :, :], decoding=_)

In [23]:
test_label = tf.cast((MAXS-MINS)*test_label + MINS, tf.float32).numpy()
mlp_pred = tf.cast((MAXS-MINS)*mlp_pred + MINS, tf.float32).numpy()
lstm_pred = tf.cast((MAXS-MINS)*lstm_pred + MINS, tf.float32).numpy()
bilstm_pred = tf.cast((MAXS-MINS)*bilstm_pred + MINS, tf.float32).numpy()
ae_lstm_pred = tf.cast((MAXS-MINS)*ae_lstm_pred + MINS, tf.float32).numpy()
ae_bilstm_pred = tf.cast((MAXS-MINS)*ae_bilstm_pred + MINS, tf.float32).numpy()

In [24]:
pd.DataFrame(test_label.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/test_label.csv')
pd.DataFrame(mlp_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/mlp_pred.csv')
pd.DataFrame(lstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/lstm_pred.csv')
pd.DataFrame(bilstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/bilstm_pred.csv')
pd.DataFrame(ae_lstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/ae_lstm_pred.csv')
pd.DataFrame(ae_bilstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/ae_bilstm_pred.csv')

# Model evaluation w/ transfer

In [None]:
l = np.load('./env_set/val_dataset.npz')
train_input = l['train_input']
train_label = l['train_label']
test_input = l['test_input']
test_label = l['test_label']
MAXS = l['MAXS']
MINS = l['MINS']

TIME_STEPS = l['TIME_STEPS']
OUTPUT_SIZE = l['OUTPUT_SIZE']
NUM_FEATURES = train_input.shape[-1]

In [None]:
print(train_input.shape)
print(train_label.shape)
print()
print(test_input.shape)
print(test_label.shape)

In [None]:
BATCH_SIZE = 32
test_dataset = tf.data.Dataset.from_tensor_slices((test_input, test_label))
test_dataset = test_dataset.batch(BATCH_SIZE)

In [None]:
mlp_pred = mlp_model.predict(test_dataset)
lstm_pred = lstm_model.predict(test_dataset)
bilstm_pred = bilstm_model.predict(test_dataset)

In [None]:
encoded_test_input = []
encoded_test_label = []
for step, (inp, tar) in enumerate(test_dataset):
    encoded_test_input.append(autoencoder(inp, True))
    encoded_test_label.append(autoencoder(tar, True))
encoded_test_input = np.concatenate(encoded_test_input, axis=0)
encoded_test_label = np.concatenate(encoded_test_label, axis=0)
encoded_test_dataset = tf.data.Dataset.from_tensor_slices((encoded_test_input, encoded_test_label))
encoded_test_dataset = encoded_test_dataset.batch(BATCH_SIZE)

In [None]:
_ = ae_lstm_model.predict(encoded_test_dataset)
ae_lstm_pred = autoencoder(train_input[0:1, :, :], decoding=_)
_ = ae_bilstm_model.predict(encoded_test_dataset)
ae_bilstm_pred = autoencoder(train_input[0:1, :, :], decoding=_)

In [None]:
test_label = tf.cast((MAXS-MINS)*test_label + MINS, tf.float32).numpy()
mlp_pred = tf.cast((MAXS-MINS)*mlp_pred + MINS, tf.float32).numpy()
lstm_pred = tf.cast((MAXS-MINS)*lstm_pred + MINS, tf.float32).numpy()
bilstm_pred = tf.cast((MAXS-MINS)*bilstm_pred + MINS, tf.float32).numpy()
ae_lstm_pred = tf.cast((MAXS-MINS)*ae_lstm_pred + MINS, tf.float32).numpy()
ae_bilstm_pred = tf.cast((MAXS-MINS)*ae_bilstm_pred + MINS, tf.float32).numpy()

In [None]:
pd.DataFrame(test_label.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_test_label.csv')
pd.DataFrame(mlp_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_mlp_pred.csv')
pd.DataFrame(lstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_lstm_pred.csv')
pd.DataFrame(bilstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_bilstm_pred.csv')
pd.DataFrame(ae_lstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_ae_lstm_pred.csv')
pd.DataFrame(ae_bilstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_ae_bilstm_pred.csv')

In [None]:
l = np.load('./env_set/val_dataset.npz')
train_input = l['train_input']
train_label = l['train_label']
test_input = l['test_input']
test_label = l['test_label']
MAXS = l['MAXS']
MINS = l['MINS']

TIME_STEPS = l['TIME_STEPS']
OUTPUT_SIZE = l['OUTPUT_SIZE']
NUM_FEATURES = train_input.shape[-1]

In [None]:
print(train_input.shape)
print(train_label.shape)
print()
print(test_input.shape)
print(test_label.shape)

In [None]:
BATCH_SIZE = 32
test_dataset = tf.data.Dataset.from_tensor_slices((test_input, test_label))
test_dataset = test_dataset.batch(BATCH_SIZE)

In [None]:
num_hiddens = [32, 16]
encoding_size = 8
original_size = 5
trans_ae = Autoencoder(num_hiddens, encoding_size, original_size)

In [None]:
trans_ae.load_weights('./models/trans_ae')
print('transfered AE is ready.')

In [None]:
tr_mlp_model = tf.keras.models.load_model(TRANS_BEST_PATHS[0])
tr_lstm_model = tf.keras.models.load_model(TRANS_BEST_PATHS[1])
tr_bilstm_model = tf.keras.models.load_model(TRANS_BEST_PATHS[3])
tr_ae_lstm_model = tf.keras.models.load_model(TRANS_BEST_PATHS[2])
tr_ae_bilstm_model = tf.keras.models.load_model(TRANS_BEST_PATHS[4])

In [None]:
trans_ae.trainable = True

In [None]:
tr_mlp_pred = tr_mlp_model.predict(test_dataset)
tr_lstm_pred = tr_lstm_model.predict(test_dataset)
tr_bilstm_pred = tr_bilstm_model.predict(test_dataset)

In [None]:
encoded_test_input = []
encoded_test_label = []
for step, (inp, tar) in enumerate(test_dataset):
    encoded_test_input.append(trans_ae(inp, True))
    encoded_test_label.append(trans_ae(tar, True))
encoded_test_input = np.concatenate(encoded_test_input, axis=0)
encoded_test_label = np.concatenate(encoded_test_label, axis=0)
encoded_test_dataset = tf.data.Dataset.from_tensor_slices((encoded_test_input, encoded_test_label))
encoded_test_dataset = encoded_test_dataset.batch(BATCH_SIZE)

In [None]:
_ = tr_ae_lstm_model.predict(encoded_test_dataset)
tr_ae_lstm_pred = trans_ae(train_input[0:1, :, :], decoding=_)
_ = tr_ae_bilstm_model.predict(encoded_test_dataset)
tr_ae_bilstm_pred = trans_ae(train_input[0:1, :, :], decoding=_)

In [None]:
test_label = tf.cast((MAXS-MINS)*test_label + MINS, tf.float32).numpy()
tr_mlp_pred = tf.cast((MAXS-MINS)*tr_mlp_pred + MINS, tf.float32).numpy()
tr_lstm_pred = tf.cast((MAXS-MINS)*tr_lstm_pred + MINS, tf.float32).numpy()
tr_bilstm_pred = tf.cast((MAXS-MINS)*tr_bilstm_pred + MINS, tf.float32).numpy()
tr_ae_lstm_pred = tf.cast((MAXS-MINS)*tr_ae_lstm_pred + MINS, tf.float32).numpy()
tr_ae_bilstm_pred = tf.cast((MAXS-MINS)*tr_ae_bilstm_pred + MINS, tf.float32).numpy()

In [None]:
pd.DataFrame(test_label.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_tr_test_label.csv')
pd.DataFrame(tr_mlp_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_tr_mlp_pred.csv')
pd.DataFrame(tr_lstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_tr_lstm_pred.csv')
pd.DataFrame(tr_bilstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_tr_bilstm_pred.csv')
pd.DataFrame(tr_ae_lstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_tr_ae_lstm_pred.csv')
pd.DataFrame(tr_ae_bilstm_pred.reshape(-1, 5), columns=['T_in', 'T_out', 'RH_in', 'CO2', 'Rad']).to_csv('./results/val_tr_ae_bilstm_pred.csv')

# Model evaluation w/ transfer (tom)