In [13]:
from tensorflow import keras
from keras.preprocessing.sequence import TimeseriesGenerator
from keras import layers
from sklearn.preprocessing import MinMaxScaler


In [14]:
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
logging.getLogger('keras').setLevel(logging.ERROR)


In [15]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import pickle 



In [16]:
df = pd.read_csv('ddb_output.csv')
df.rename(columns={'humidity.S': 'humidity',
                   'temperature.S':'temperature',
                   'timestamp.S':'timestamp'},inplace=True)
df.drop(columns=['Unnamed: 0'],inplace=True)
#df.reset_index(drop=True,inplace=True)
df.sort_values(by='timestamp',inplace=True)


### Baseline - persistence forecast 1,2,4 hours in advance

In [17]:
df_temp =  df[ df['timestamp'] > str(pd.Timestamp('2023-04-28')) ].drop(columns=['humidity'])
df_temp.reset_index(inplace=True,drop=True)

df_temp['timestamp'] = df_temp['timestamp'].apply(lambda x: pd.Timestamp(x)) 

df_temp['timestamp'] = df_temp['timestamp'].dt.round('5min')


df_temp['type'] = 'observation'
df_temp.rename(columns={'temperature':'value'},inplace=True)


In [18]:
df_temp =  df[ df['timestamp'] > str(pd.Timestamp('2023-04-28')) ].drop(columns=['humidity'])
df_temp.reset_index(inplace=True,drop=True)

df_temp['timestamp'] = df_temp['timestamp'].apply(lambda x: pd.to_datetime(x)) 

df_temp['timestamp'] = df_temp['timestamp'].dt.round('5min')

df_temp['timestamp'] = df_temp['timestamp'].apply(lambda x: pd.to_datetime(x)) 

df_temp.set_index('timestamp',inplace=True)
df_temp

Unnamed: 0_level_0,temperature
timestamp,Unnamed: 1_level_1
2023-04-28 08:25:00,17.83
2023-04-28 08:25:00,17.85
2023-04-28 08:30:00,17.85
2023-04-28 08:30:00,17.88
2023-04-28 09:20:00,17.88
...,...
2023-05-10 20:30:00,21.63
2023-05-10 20:40:00,21.57
2023-05-10 20:50:00,21.54
2023-05-10 21:00:00,21.50


In [19]:
df_temp.dropna(inplace=True)


In [96]:
# Split the data, 50% in training and 25% in validation, 25% in test, and not randomly selected since this is a time series

scaler = MinMaxScaler()

last_train_index = int(np.round(df_temp['temperature'].shape[0]*0.5))
last_val_index = int(np.round(df_temp['temperature'].shape[0]*0.75))

train_data = df_temp.iloc[:last_train_index,]
validation_data = df_temp.iloc[last_train_index:last_val_index]
test_data = df_temp.iloc[last_val_index:]

# Fit to the train set, then transform the train set and the test set
# TODO: Check, should not fit on test set?

scaler.fit(train_data)
scaled_train = scaler.transform(train_data)
scaled_validation = scaler.transform(validation_data)
scaled_test = scaler.transform(test_data)


In [144]:
delay = 12
sequence_length = 36


train_old = TimeseriesGenerator(scaled_train[:-delay], scaled_train[delay:], 
                            length=sequence_length, batch_size=1, shuffle=True)

validation_old = TimeseriesGenerator(scaled_validation[:-delay], scaled_validation[delay:], 
                                 length=sequence_length, batch_size=1, shuffle=True)

test_old = TimeseriesGenerator(scaled_test[:-delay], scaled_test[delay:], 
                           length=sequence_length, batch_size=1, shuffle=True)


train = keras.preprocessing.timeseries_dataset_from_array(scaled_train[:-delay], scaled_train[sequence_length+delay:], 
                            sequence_length=sequence_length, batch_size=1, shuffle=True)

validation = keras.preprocessing.timeseries_dataset_from_array(scaled_validation[:-delay], scaled_validation[sequence_length+delay:], 
                            sequence_length=sequence_length, batch_size=1, shuffle=True)

test = keras.preprocessing.timeseries_dataset_from_array(scaled_test[:-delay], scaled_test[sequence_length+delay:], 
                            sequence_length=sequence_length, batch_size=1, shuffle=True)




In [145]:
for input, target in train_old:
    print('Input')
    print((np.reshape(input[0],(1,36))))
    print('Target')
    print(target[0])
    print('\n ==== \n')

Input
[[0.86440678 0.89265537 0.87853107 0.87288136 0.88418079 0.87288136
  0.89265537 0.89265537 0.88135593 0.87853107 0.88418079 0.87853107
  0.88983051 0.90112994 0.93785311 0.93220339 0.91242938 0.94632768
  0.9180791  0.94632768 0.92090395 0.9039548  0.92372881 0.90960452
  0.92090395 0.86158192 0.84180791 0.81920904 0.8700565  0.83333333
  0.81073446 0.81073446 0.80508475 0.77683616 0.74858757 0.75706215]]
Target
[0.64689266]

 ==== 

Input
[[0.83333333 0.81073446 0.81073446 0.80508475 0.77683616 0.74858757
  0.75706215 0.77683616 0.73728814 0.76836158 0.72881356 0.70903955
  0.70056497 0.67514124 0.64124294 0.57344633 0.61299435 0.61581921
  0.63276836 0.64689266 0.64124294 0.64689266 0.66666667 0.68079096
  0.66101695 0.63841808 0.64971751 0.65819209 0.64971751 0.65536723
  0.64689266 0.64689266 0.63276836 0.64689266 0.66949153 0.64689266]]
Target
[0.63276836]

 ==== 

Input
[[0.19491525 0.19491525 0.20338983 0.15819209 0.16949153 0.16666667
  0.19774011 0.19774011 0.22316384 0

In [146]:
for input, target in train:
    print('Input')
    print((np.reshape(input[0],(1,36))))
    print('Target')
    print(target[0])
    print('\n ==== \n')

2023-05-16 11:14:05.096295: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype double and shape [998,1]
	 [[{{node Placeholder/_20}}]]
2023-05-16 11:14:05.096617: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype double and shape [998,1]
	 [[{{node Placeholder/_20}}]]


Input
[[0.20903955 0.21751412 0.23446328 0.23446328 0.23446328 0.21751412
  0.25423729 0.20903955 0.24293785 0.21468927 0.22316384 0.2259887
  0.24293785 0.24293785 0.22881356 0.23728814 0.24293785 0.21468927
  0.23446328 0.26271186 0.23446328 0.26553672 0.25423729 0.27966102
  0.26553672 0.22316384 0.24858757 0.25423729 0.25706215 0.26271186
  0.26553672 0.24576271 0.25423729 0.25423729 0.24858757 0.26271186]]
Target
tf.Tensor([0.26553672], shape=(1,), dtype=float64)

 ==== 

Input
[[0.50282486 0.50282486 0.49717514 0.51129944 0.48870056 0.51694915
  0.49717514 0.51129944 0.48587571 0.55084746 0.51129944 0.51129944
  0.51412429 0.49717514 0.52259887 0.53107345 0.52259887 0.50564972
  0.54519774 0.52542373 0.55084746 0.53672316 0.53389831 0.55084746
  0.54237288 0.57344633 0.55084746 0.55084746 0.54237288 0.56214689
  0.59039548 0.56497175 0.56214689 0.5819209  0.60169492 0.5819209 ]]
Target
tf.Tensor([0.64124294], shape=(1,), dtype=float64)

 ==== 

Input
[[0.1299435  0.13841808 0.158

In [131]:
def try_lstm_model_diff_datasets(train, validation, test):

    inputs = keras.Input(shape=(36, 1))

    x = layers.LSTM(16)(inputs)
    outputs = layers.Dense(1)(x)

    model = keras.Model(inputs, outputs)

    model.compile(optimizer="adam", loss="mse", metrics=["mae"])


    history = model.fit(train,
                        epochs=2,
                        validation_data=validation,
                                               )


    test_mae = model.evaluate(test)[1]
    loss = history.history["mae"]
    val_loss = history.history["val_mae"]
    epochs = range(1, len(loss) + 1)

    print({'test_mae': test_mae, 'loss': loss, 'val_loss' : val_loss, 'epochs' : epochs})

try_lstm_model_diff_datasets(train_old, validation_old, test_old)
try_lstm_model_diff_datasets(train, validation, test)


# Ok, seems like these generator-ish things are roughly equivalent

Epoch 1/2
Epoch 2/2
{'test_mae': 0.07649621367454529, 'loss': [0.05931251123547554, 0.043490517884492874], 'val_loss': [0.0655580684542656, 0.06040392071008682], 'epochs': range(1, 3)}
Epoch 1/2


2023-05-16 11:08:32.884500: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-05-16 11:08:37.481594: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/2

2023-05-16 11:08:42.232517: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


{'test_mae': 0.07237011194229126, 'loss': [0.07024755328893661, 0.04705963283777237], 'val_loss': [0.07002148032188416, 0.06247007101774216], 'epochs': range(1, 3)}


In [148]:
def try_lstm_model_different_dropouts(dropout, recurrent_dropout):

    inputs = keras.Input(shape=(36, 1))

    x = layers.LSTM(16, dropout=dropout, 
                    recurrent_dropout=recurrent_dropout)(inputs)
    outputs = layers.Dense(1)(x)

    model = keras.Model(inputs, outputs)

    model.compile(optimizer="adam", loss="mse", metrics=["mae"])

    path_checkpoint = f"model_checkpoint_dropout_{dropout}_reccurent_dropout_{recurrent_dropout}.h5"
    es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5)

    modelckpt_callback = keras.callbacks.ModelCheckpoint(
        monitor="val_loss",
        filepath=path_checkpoint,
        verbose=1,
        save_weights_only=True,
        save_best_only=True,
        )

    history = model.fit(train,
                        epochs=40,
                        validation_data=validation,
                            callbacks=[es_callback, modelckpt_callback],
                        )


    test_mae = model.evaluate(test)[1]
    loss = history.history["mae"]
    val_loss = history.history["val_mae"]
    epochs = range(1, len(loss) + 1)

    print('\n ==== Parameter configuration training complete === \n')
    print(f'Dropout rate: {dropout}, recurrent dropuout: {recurrent_dropout} . \n Test MAE = {test_mae}.' )
    return {'test_mae': test_mae, 'loss': loss, 'val_loss' : val_loss, 'epochs' : epochs}


dropout_options = [0, 0.1, 0.2]


results = {}

for dropout in dropout_options:
    for recurrent_dropout in dropout_options: 
        results[f'dropout: {dropout}, recurrent_dropout: {recurrent_dropout}'] = try_lstm_model_different_dropouts(dropout, recurrent_dropout)

with open('dropout_experiments.pkl', 'wb') as f:
    pickle.dump(results, f)

Epoch 1/40
Epoch 1: val_loss improved from inf to 0.00585, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.h5
Epoch 2/40
Epoch 2: val_loss improved from 0.00585 to 0.00549, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.h5
Epoch 3/40
Epoch 3: val_loss improved from 0.00549 to 0.00485, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.h5
Epoch 4/40
Epoch 4: val_loss improved from 0.00485 to 0.00391, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.h5
Epoch 5/40
Epoch 5: val_loss improved from 0.00391 to 0.00387, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.h5
Epoch 6/40
Epoch 6: val_loss improved from 0.00387 to 0.00296, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.h5
Epoch 7/40
Epoch 7: val_loss did not improve from 0.00296
Epoch 8/40
Epoch 8: val_loss did not improve from 0.00296
Epoch 9/40
Epoch 9: val_loss did not improve from 0.00296
Epoch 10/40
Epoch 10: val_loss improved from 0.00296 to 

2023-05-16 11:17:40.916734: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_14' with dtype int32
	 [[{{node Placeholder/_14}}]]
2023-05-16 11:17:40.917048: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype double and shape [475,1]
	 [[{{node Placeholder/_20}}]]



 ==== Parameter configuration training complete === 

Dropout rate: 0, recurrent dropuout: 0 . 
 Test MAE = 0.0635860338807106.
Epoch 1/40
Epoch 1: val_loss improved from inf to 0.00589, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.1.h5
Epoch 2/40
Epoch 2: val_loss improved from 0.00589 to 0.00545, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.1.h5
Epoch 3/40
Epoch 3: val_loss improved from 0.00545 to 0.00489, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.1.h5
Epoch 4/40
Epoch 4: val_loss improved from 0.00489 to 0.00421, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.1.h5
Epoch 5/40
Epoch 5: val_loss did not improve from 0.00421
Epoch 6/40
Epoch 6: val_loss improved from 0.00421 to 0.00370, saving model to model_checkpoint_dropout_0_reccurent_dropout_0.1.h5
Epoch 7/40
Epoch 7: val_loss did not improve from 0.00370
Epoch 8/40
Epoch 8: val_loss improved from 0.00370 to 0.00365, saving model to model_checkpoint_dropout_

In [None]:
# Keras - time series forecasting example 

# https://keras.io/examples/timeseries/timeseries_weather_forecasting/

In [116]:
dataset = np.arange(1,10)

delay = 1
sequence_length = 3

dummy_generator = keras.preprocessing.timeseries_dataset_from_array(dataset[:-delay], dataset[sequence_length+delay:], 
                                                                    sequence_length = sequence_length)

for input, target in dummy_generator:
    print('Input: ', input, 'target: ',  target)


dummy_generator_2 = TimeseriesGenerator(dataset[:-delay], dataset[delay:], 
                                length=sequence_length, batch_size=1)

for input, target in dummy_generator_2:
    print('Input: ', input, 'target: ',  target)

Input:  tf.Tensor(
[[1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]
 [5 6 7]], shape=(5, 3), dtype=int64) target:  tf.Tensor([5 6 7 8 9], shape=(5,), dtype=int64)
Input:  [[1 2 3]] target:  [5]
Input:  [[2 3 4]] target:  [6]
Input:  [[3 4 5]] target:  [7]
Input:  [[4 5 6]] target:  [8]
Input:  [[5 6 7]] target:  [9]


2023-05-16 10:59:08.063147: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_20' with dtype int64 and shape [5]
	 [[{{node Placeholder/_20}}]]
2023-05-16 10:59:08.063424: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_14' with dtype int32
	 [[{{node Placeholder/_14}}]]
