# Importing libraries

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, LSTM, TimeDistributed, Reshape
from tensorflow.keras.layers import MaxPool2D, MaxPooling2D, MaxPooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow import keras

from tensorflow.keras.callbacks import CSVLogger, EarlyStopping

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
from tensorboard.plugins.hparams import api as hp

# Loading Files

In [4]:
peeps = pd.read_csv("peeps_interpolations.csv")
peeps = peeps[~peeps.duplicated()]
#peeps = peeps.set_index('timestamp')
peeps = peeps.drop('Unnamed: 0', axis=1)
peeps = peeps.dropna()

In [5]:
leon = pd.read_csv("leon_interpolations.csv")
leon = leon[~leon.duplicated()]
#leon = leon.set_index('timestamp')
leon = leon.drop('Unnamed: 0', axis=1)
leon = leon.dropna()

In [6]:
guadalajara = pd.read_csv("guadalajara_interpolations.csv")
guadalajara = guadalajara[~guadalajara.duplicated()]
#guadalajara = guadalajara.set_index('timestamp')
guadalajara = guadalajara.drop('Unnamed: 0', axis=1)
guadalajara = guadalajara.dropna()

In [7]:
datasets = [peeps, leon, guadalajara]

# Functions

In [8]:
def ss_temporal_set(dataset):
    split_percent = 0.70
    split = int(split_percent*len(dataset))
    
    dataset = dataset.values
    
    def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
        data = []
        labels = []

        start_index = start_index + history_size
        if end_index is None:
            end_index = len(dataset) - target_size

        for i in range(start_index, end_index):
            indices = range(i-history_size, i, step)
            data.append(dataset[indices])

            if single_step:
                labels.append(target[i+target_size])
            else:
                labels.append(target[i:i+target_size])

        return np.array(data), np.array(labels)
    


    past_history = (6)#30
    future_target = 1#1
    STEP=1#12


    X_train, y_train = multivariate_data(dataset, dataset[:, 0], 0,
                                                       split, past_history,
                                                       future_target, STEP)
    X_test, y_test = multivariate_data(dataset, dataset[:, 0],
                                                   split, None, past_history,
                                                   future_target, STEP)
    
    
    X_train = X_train[::7] 
    y_train = y_train[::7] 
    X_test = X_test[::7]
    y_test = y_test[::7]
    
    return X_train, y_train, X_test, y_test

# Hyper-parameter Tuning, 1 Layer

## Peeps

In [18]:
%load_ext tensorboard

In [7]:
rm -rf ./logs/

In [11]:

HP_LSTM_UNITS = hp.HParam('num_lstm', hp.Discrete([1, 20, 50, 100, 200]))
HP_DENSE_UNITS = hp.HParam('num_dense', hp.Discrete([5, 10, 15, 20, 100]))


METRIC_MAE = 'mean_absolute_error'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_NUM_PREV, HP_NUM_FUT, HP_LSTM_UNITS,HP_DENSE_UNITS],
    metrics=[hp.Metric(METRIC_MAE, display_name='MAE')],
  )

NameError: name 'HP_NUM_PREV' is not defined

In [46]:
def one_layer_lstm(hparams,dataset):
    
    df = dataset

    X_train, y_train, X_test, y_test = ss_temporal_set(df)


    model = Sequential()
    model.add(LSTM(units = 200, activation='relu', input_shape=X_train.shape[-2:]))
    model.add(Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.1)))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss = 'mean_absolute_percentage_error')

    model.fit(X_train, y_train, epochs=3, batch_size=32, verbose = 0)

    y_pred = model.predict(X_test)

    mape = mean_absolute_percentage_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
        
    return mape, mae

In [14]:
def run(run_dir, hparams, dataset):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
       
        mape = one_layer_lstm(hparams, dataset)
        tf.summary.scalar(METRIC_MAPE, mape, step=1)

In [15]:
session_num = 0
 
for num_lstm in HP_LSTM_UNITS.domain.values:
    for num_dense in HP_DENSE_UNITS.domain.values:
        hparams = {
          HP_LSTM_UNITS: num_lstm,
          HP_DENSE_UNITS: num_dense,  
        }
        run_name = "peeps-run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run('logs/hparam_tuning/' + run_name, hparams, peeps)
        session_num += 1

--- Starting trial: peeps-run-0
{'num_lstm': 200, 'num_dense': 100}


In [20]:
%tensorboard --logdir logs/hparam_tuning

## Leon

In [None]:
rm -rf ./logs/

In [None]:
session_num = 0
 
for num_lstm in HP_LSTM_UNITS.domain.values:
    for num_dense in HP_DENSE_UNITS.domain.values:
        hparams = {
          HP_LSTM_UNITS: num_lstm,
          HP_DENSE_UNITS: num_dense,  
        }
        run_name = "leon-run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run('logs/hparam_tuning/' + run_name, hparams, leon)
        session_num += 1

In [None]:
%tensorboard --logdir logs/hparam_tuning

## Guadalajara 

In [18]:
rm -rf ./logs/

In [11]:
session_num = 0
 
for num_lstm in HP_LSTM_UNITS.domain.values:
    for num_dense in HP_DENSE_UNITS.domain.values:
        hparams = {
          HP_LSTM_UNITS: num_lstm,
          HP_DENSE_UNITS: num_dense,  
        }
        run_name = "guadalajara-run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run('logs/hparam_tuning/' + run_name, hparams, guadalajara)
        session_num += 1

--- Starting trial: guadalajara-run-0
{'num_lstm': 200, 'num_dense': 10}


In [12]:
%tensorboard --logdir logs/hparam_tuning