In [1]:
import sys
sys.path.append('../')

In [2]:
import time
import os

import pandas as pd
import numpy as np
from sklearn.utils import resample
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

import tensorflow as tf
tf.__version__

'2.0.0'

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2 Physical GPUs, 2 Logical GPUs


In [4]:
from tensorflow.keras.metrics import Metric
class RSquare(Metric):
    """Compute R^2 score.
     This is also called as coefficient of determination.
     It tells how close are data to the fitted regression line.
     - Highest score can be 1.0 and it indicates that the predictors
       perfectly accounts for variation in the target.
     - Score 0.0 indicates that the predictors do not
       account for variation in the target.
     - It can also be negative if the model is worse.
     Usage:
     ```python
     actuals = tf.constant([1, 4, 3], dtype=tf.float32)
     preds = tf.constant([2, 4, 4], dtype=tf.float32)
     result = tf.keras.metrics.RSquare()
     result.update_state(actuals, preds)
     print('R^2 score is: ', r1.result().numpy()) # 0.57142866
    ```
    """

    def __init__(self, name='r_square', dtype=tf.float32):
        super(RSquare, self).__init__(name=name, dtype=dtype)
        self.squared_sum = self.add_weight("squared_sum", initializer="zeros")
        self.sum = self.add_weight("sum", initializer="zeros")
        self.res = self.add_weight("residual", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")

    def update_state(self, y_true, y_pred):
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)
        self.squared_sum.assign_add(tf.reduce_sum(y_true**2))
        self.sum.assign_add(tf.reduce_sum(y_true))
        self.res.assign_add(
            tf.reduce_sum(tf.square(tf.subtract(y_true, y_pred))))
        self.count.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))

    def result(self):
        mean = self.sum / self.count
        total = self.squared_sum - 2 * self.sum * mean + self.count * mean**2
        return 1 - (self.res / total)

    def reset_states(self):
        # The state of the metric will be reset at the start of each epoch.
        self.squared_sum.assign(0.0)
        self.sum.assign(0.0)
        self.res.assign(0.0)
        self.count.assign(0.0)

In [5]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import (LinearLocator, MultipleLocator, FormatStrFormatter)
from matplotlib.dates import MONDAY
from matplotlib.dates import MonthLocator, WeekdayLocator, DateFormatter
from matplotlib import gridspec
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline

In [6]:
plt.rcParams['figure.figsize'] = ((8/2.54), (6/2.54))
plt.rcParams["font.family"] = "Arial"
plt.rcParams["mathtext.default"] = "rm"
plt.rcParams.update({'font.size': 11})
MARKER_SIZE = 15
cmap_m = ["#f4a6ad", "#f6957e", "#fccfa2", "#8de7be", "#86d6f2", "#24a9e4", "#b586e0", "#d7f293"]
cmap = ["#e94d5b", "#ef4d28", "#f9a54f", "#25b575", "#1bb1e7", "#1477a2", "#a662e5", "#c2f442"]

plt.rcParams['axes.spines.top'] = False
# plt.rcParams['axes.edgecolor'] = 
plt.rcParams['axes.linewidth'] = 1
plt.rcParams['lines.linewidth'] = 1.5
plt.rcParams['xtick.major.width'] = 1
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.major.width'] = 1
plt.rcParams['ytick.minor.width'] = 1

In [7]:
def make_patch_spines_invisible(ax):
    ax.set_frame_on(True)
    ax.patch.set_visible(False)
    for sp in ax.spines.values():
        sp.set_visible(False)

## Data preparation

### Hyperparameters

In [8]:
LEARNING_RATE = 0.001
BATCH_SIZE = 32

### data loading

In [9]:
l = np.load('./env_set/dataset.npz')
train_input = l['train_input']
train_label = l['train_label']
test_input = l['test_input']
test_label = l['test_label']
MAXS = l['MAXS']
MINS = l['MINS']

TIME_STEPS = l['TIME_STEPS']
OUTPUT_SIZE = l['OUTPUT_SIZE']
NUM_FEATURES = train_input.shape[-1]

In [10]:
print(train_input.shape)
print(train_label.shape)
print()
print(test_input.shape)
print(test_label.shape)

(41025, 24, 5)
(41025, 24, 5)

(16119, 24, 5)
(16119, 24, 5)


In [11]:
trainset = np.concatenate([train_input, train_label], axis=0).astype(np.float32)
testset = np.concatenate([test_input, test_label], axis=0).astype(np.float32)

In [12]:
train_dataset = tf.data.Dataset.from_tensor_slices((trainset, trainset))
train_dataset = train_dataset.cache().shuffle(BATCH_SIZE*100).batch(BATCH_SIZE).repeat()
test_dataset = tf.data.Dataset.from_tensor_slices((testset, testset))
test_dataset = test_dataset.batch(BATCH_SIZE)

In [13]:
class RetrainLayer(tf.keras.layers.Layer):
    def __init__(self, num_hidden, activation=tf.nn.relu):
        super(RetrainLayer, self).__init__()
        self.num_hidden = num_hidden
        
        self.dense = tf.keras.layers.Dense(self.num_hidden, activation=activation, kernel_initializer='he_uniform')
        
    def call(self, inp):
        return self.dense(inp)

In [14]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_hiddens, encoding_size):
        super(Encoder, self).__init__()
        
        self.num_hiddens = num_hiddens
        self.encoding_size = encoding_size
        
        self.denses = [tf.keras.layers.Dense(self.num_hiddens[_], activation=tf.nn.relu, kernel_initializer='he_uniform')
                       for _ in range(len(self.num_hiddens))]
        
        self.output_layer = tf.keras.layers.Dense(self.encoding_size, activation=tf.nn.sigmoid)
        
    def call(self, inp):
        for _ in range(len(self.num_hiddens)):
            inp = self.denses[_](inp)
        
        return self.output_layer(inp)        

In [15]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, num_hiddens, original_size):
        super(Decoder, self).__init__()
        
        self.num_hiddens = num_hiddens[::-1]
        self.original_size = original_size
        
        self.denses = [tf.keras.layers.Dense(self.num_hiddens[_], activation=tf.nn.relu, kernel_initializer='he_uniform')
                       for _ in range(len(self.num_hiddens))]
        
    def call(self, inp):
        for _ in range(len(self.num_hiddens)):
            inp = self.denses[_](inp)
        
        return inp

In [16]:
class Autoencoder(tf.keras.Model):
    def __init__(self, num_hiddens, encoding_size, original_size):
        super(Autoencoder, self).__init__()
        self.num_hiddens = num_hiddens
        self.encoding_size = encoding_size
        self.original_size = original_size
        
        self.in_retrain_layer = RetrainLayer(self.num_hiddens[0])
        self.encoder = Encoder(self.num_hiddens, self.encoding_size)
        self.decoder = Decoder(self.num_hiddens, self.original_size)
        self.out_retrain_layer = RetrainLayer(self.original_size, activation = tf.nn.sigmoid)
        
    def call(self, inp, need_code=False, decoding=None):
        inp = self.in_retrain_layer(inp)
        encoded_values = self.encoder(inp)
        if decoding is not None:
            decoding = self.decoder(decoding)
            return self.out_retrain_layer(decoding)
        if not need_code:
            encoded_values = self.decoder(encoded_values)
            return self.out_retrain_layer(encoded_values)
        else:
            return encoded_values

In [17]:
def loss(model, original):
    reconstruction_error = tf.reduce_mean(tf.square(tf.subtract(model(original), original)))
    return reconstruction_error

In [18]:
@tf.function
def train(loss, model, opt, original):
    with tf.GradientTape() as tape:
        gradients = tape.gradient(loss(model, original), model.trainable_variables)
        gradient_variables = zip(gradients, model.trainable_variables)
        opt.apply_gradients(gradient_variables)

In [19]:
num_hiddens = [32, 16]
encoding_size = 8
original_size = 5
autoencoder = Autoencoder(num_hiddens, encoding_size, original_size)
opt = tf.optimizers.Adam(learning_rate=0.001)

In [20]:
checkpoint_path = "./checkpoints/trained_AE"
ckpt = tf.train.Checkpoint(autoencoder=autoencoder,
                           opt=opt)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=10)

In [21]:
writer = tf.summary.create_file_writer('tmp')

In [23]:
EPOCHS = 50
prev_test_loss = 100.0
with writer.as_default():
    with tf.summary.record_if(True):
        for epoch in range(EPOCHS):
            for step, (inp, tar) in enumerate(train_dataset):
                train(loss, autoencoder, opt, inp)
                loss_values = loss(autoencoder, inp)
                tf.summary.scalar('loss', loss_values, step=step)
                
                if step % 500 == 0:
                    test_loss = 0
                    for step_, (inp_, tar_) in enumerate(test_dataset):
                        test_loss += loss(autoencoder, inp_)
                        
                        if step_ > 500:
                            test_loss /= 100
                            break
                    if test_loss.numpy() < prev_test_loss:
                        ckpt_save_path = ckpt_manager.save()
                        prev_test_loss = test_loss.numpy()
                        print('Saving checkpoint at {}'.format(ckpt_save_path))
                    print('Epoch {} batch {} train loss: {:.4f} test loss: {:.4f}'
                          .format(epoch, step, loss_values.numpy(), test_loss.numpy()))

Saving checkpoint at ./checkpoints/trained_AE/ckpt-59
Epoch 0 batch 0 train loss: 0.0000 test loss: 0.0001
Saving checkpoint at ./checkpoints/trained_AE/ckpt-60
Epoch 0 batch 500 train loss: 0.0000 test loss: 0.0001
Saving checkpoint at ./checkpoints/trained_AE/ckpt-61
Epoch 0 batch 1000 train loss: 0.0000 test loss: 0.0001
Saving checkpoint at ./checkpoints/trained_AE/ckpt-62
Epoch 0 batch 1500 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 2000 train loss: 0.0000 test loss: 0.0001
Saving checkpoint at ./checkpoints/trained_AE/ckpt-63
Epoch 0 batch 2500 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 3000 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 3500 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 4000 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 4500 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 5000 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 5500 train loss: 0.0000 test loss: 0.0001
Epoch 0 batch 6000 train loss: 0.0000 test loss: 0.0001
Epoch 

KeyboardInterrupt: 

# Model saving

In [24]:
i = -1
if ckpt_manager.checkpoints:
    ckpt.restore(ckpt_manager.checkpoints[i])
    print ('Checkpoint ' + ckpt_manager.checkpoints[i][-6:] +' restored!!')

Checkpoint pt-119 restored!!


In [25]:
test_loss = 0
for step_, (inp_, tar_) in enumerate(test_dataset):
    test_loss += loss(autoencoder, inp_)

In [26]:
autoencoder.save_weights('./checkpoints/trained_AE')