# Timeseries prediction of CO

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

%matplotlib inline

In [None]:
# Load data and filter for a single station

df_CO_0 = pd.read_pickle('../src/data/processed/CO_0.pkl')
df_CO_0 = df_CO_0.loc[df_CO_0['station'] == 101]


In [None]:
# Create data for model

CO_data = df_CO_0['value']
CO_data.index = df_CO_0['datetime']
CO_data.plot(subplots=True)


In [None]:
# drop datetime

CO_data = CO_data.values


In [None]:
# Standardization

train_split = 24*365*2

CO_train_mean = CO_data[:train_split].mean()
CO_train_std = CO_data[:train_split].std()

CO_data = (CO_data-CO_train_mean)/CO_train_std


In [None]:
# function that segments data into historical and target values

def chop_data(dataset, start_index, end_index, input_size, target_size):
    data = []
    labels = []
    
    start_index = start_index + input_size
    if end_index is None:
        end_index = len(dataset) - target_size
    
    for i in range(start_index, end_index):
        indices = range(i-input_size, i)
        data.append(np.reshape(dataset[indices], (input_size, 1)))
        labels.append(dataset[i+target_size])
        
    return np.array(data), np.array(labels)

In [None]:
# train-validation split

history_size = 20
future_size = 0

x_train, y_train = chop_data(CO_data, 0, train_split,
                             history_size,
                             future_size)
x_val, y_val = chop_data(CO_data, train_split, None,
                         history_size,
                         future_size)

In [None]:
# plotting actual vs prediction functions

def create_time_steps(length):
    return list(range(-length, 0))

def show_plot(plot_data, delta, title):
    labels = ['History', 'True Future', 'Model Prediction']
    marker = ['.-', 'rx', 'go']
    time_steps = create_time_steps(plot_data[0].shape[0])
    if delta:
        future = delta
    else:
        future = 0
        
    plt.title(title)
    for i, x in enumerate(plot_data):
        if i:
            plt.plot(future, plot_data[i], marker[i], markersize=10,
                     label=labels[i])
        else:
            plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
    
    plt.legend()
    plt.xlim([time_steps[0], (future+5)*2])
    plt.xlabel('Time-Step')
    return plt

## Begin with tensorflow

In [None]:
# create tensorflow Dataset datasets

BATCH_SIZE = 256
BUFFER_SIZE = 10000

train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train = train.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val = val.batch(BATCH_SIZE).repeat()

### Model Definition

In [None]:
# define sequential tf.keras model

simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(8, input_shape=x_train.shape[-2:]),
    tf.keras.layers.Dense(1)
])

In [None]:
OPT = 'adam'
LOSS = 'mae'

simple_lstm_model.compile(optimizer=OPT, loss=LOSS, metrics=['mae'])


In [None]:
# check the output of the model

for x, y in val.take(1):
    print(simple_lstm_model.predict(x).shape)
    

In [None]:
EVALUATION_INTERVAL = 200
EPOCHS = 4

simple_lstm_model.fit(train, epochs=EPOCHS,
                      steps_per_epoch=EVALUATION_INTERVAL,
                      validation_data=val, validation_steps=50)

In [None]:
print(simple_lstm_model.metrics_names)
simple_lstm_model.evaluate(train, steps=200)

In [None]:
print(simple_lstm_model.metrics_names)
simple_lstm_model.evaluate(val, steps=200)

In [None]:
simple_lstm_model.summary()

In [None]:
simple_lstm_model.save('../src/models/LSTM8.h5')

In [None]:
def plot_train_history(history, title):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(loss))
    
    plt.figure()
    
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(title)
    plt.legend()
    
    plt.show()

In [None]:
plot_train_history(simple_lstm_model.history,
                   'Single Step Training and validation loss')

In [None]:
for x, y in val.take(3):
    plot = show_plot([x[0].numpy(), y[0].numpy(),
                      simple_lstm_model.predict(x)[0]], 0, 'Simple LSTM model')
    plot.show()