# First Approach to Long-Short-Term Memory model #
 https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM


In [1]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

from itertools import permutations

from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.stattools import adfuller,kpss
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_pacf

import statsmodels.graphics.tsaplots as tsaplot
from statsmodels.tsa.holtwinters import Holt, ExponentialSmoothing, SimpleExpSmoothing

import tensorflow as tf

from tensorflow import keras

from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.utils import plot_model

import keras 
from keras.models import Sequential # intitialize the ANN
from keras.layers import Dense, Activation, Dropout, LSTM     # create layers


np.random.seed(42)
tf.random.set_seed(42)


We will start with the test train split 
in our case we can create several shorter sequences that we will use to train our model with 


In [3]:
df = pd.read_pickle("../data/final_dataframe.pkl")

In [4]:
def col_names(df):
    column_names = {'Photovoltaics [MWh] Original resolutions': 'Solar_generation_MWh',
                'Photovoltaics [MW] Calculated resolutions': 'Solar_installed_MW',
                'Total (grid load) [MWh] Original resolutions': 'Total_consumption_MWh',
                'Germany/Luxembourg [€/MWh] Calculated resolutions': 'DE_LU_price_per_MWh',}
    df.rename(columns=column_names, inplace=True)


In [5]:
col_names(df)

In [6]:
df.set_index('Date')

Unnamed: 0_level_0,Solar_generation_MWh,Solar_installed_MW,Total_consumption_MWh,DE_LU_price_per_MWh,normalisation_factor,Solar_generation_MWh_normalized
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-10-01 00:00:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
2018-10-01 00:15:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
2018-10-01 00:30:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
2018-10-01 00:45:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
2018-10-01 01:00:00,0.0,42805.0,10589.75,56.10,0.684015,0.0
...,...,...,...,...,...,...
2023-06-01 22:45:00,0.0,62579.0,12945.50,95.41,1.000000,0.0
2023-06-01 23:00:00,0.0,62579.0,12817.75,86.53,1.000000,0.0
2023-06-01 23:15:00,0.0,62579.0,12539.00,86.53,1.000000,0.0
2023-06-01 23:30:00,0.0,62579.0,12371.00,86.53,1.000000,0.0


In [36]:
df.head(50)

Unnamed: 0,Date,Solar_generation_MWh,Solar_installed_MW,Total_consumption_MWh,DE_LU_price_per_MWh,normalisation_factor,Solar_generation_MWh_normalized
0,2018-10-01 00:00:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
1,2018-10-01 00:15:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
2,2018-10-01 00:30:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
3,2018-10-01 00:45:00,0.0,42805.0,10589.75,59.53,0.684015,0.0
4,2018-10-01 01:00:00,0.0,42805.0,10589.75,56.1,0.684015,0.0
5,2018-10-01 01:15:00,0.0,42805.0,10560.25,56.1,0.684015,0.0
6,2018-10-01 01:30:00,0.0,42805.0,10507.0,56.1,0.684015,0.0
7,2018-10-01 01:45:00,0.0,42805.0,10407.5,56.1,0.684015,0.0
8,2018-10-01 02:00:00,0.0,42805.0,10263.5,51.41,0.684015,0.0
9,2018-10-01 02:15:00,0.0,42805.0,10274.0,51.41,0.684015,0.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163680 entries, 0 to 163679
Data columns (total 7 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   Date                             163680 non-null  datetime64[ns]
 1   Solar_generation_MWh             163680 non-null  float64       
 2   Solar_installed_MW               163680 non-null  float64       
 3   Total_consumption_MWh            163680 non-null  float64       
 4   DE_LU_price_per_MWh              163680 non-null  float64       
 5   normalisation_factor             163680 non-null  float64       
 6   Solar_generation_MWh_normalized  163680 non-null  float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 8.7 MB


In [None]:
# We have a lot of samples therefore I will limit the sample size for training a bit more
#df = df.iloc[60000: , :]
#len(df)

In [8]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df[['Total_consumption_MWh']], test_size=.25, shuffle=False)

In [9]:
#Let's scale the data
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [10]:
#! Work in progress ...

scaler = StandardScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)

In [11]:
# split a univariate sequence into samples
def split_sequence(input, n_steps, pred_size):
    x, y = list(), list()
    for i in range(len(input)):
        end_ix = i + n_steps # find the end of this pattern
        if end_ix+pred_size > len(input)-1: # check if we are beyond the sequence
            break
        seq_x, seq_y = input[i:end_ix], input[end_ix: end_ix+pred_size]# gather input and output parts of the pattern
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)

In [12]:
# define input sequence
input = train
# choose a number of time steps
n_steps = 672

# prediction size 
pred_size= 96
# split into samples
X, y = split_sequence(input, n_steps, pred_size)
# summarize the data
print(len(X), len(y))


121992 121992


In [13]:
print(y.shape, X.shape)

(121992, 96, 1) (121992, 672, 1)


In [14]:
#Now we have to define the validation set for our model #! I see this approach is not so useful, therfore I will use the train test split with shuffling to obtain the validation data. Here i am not loosing the lateest data for training my model 
def val_set(X,y):
    X, X_val, y, y_val = train_test_split(X, y, test_size=0.2, shuffle=True)
    return X, X_val, y, y_val
    #! old approach
    #train_size = round(len(X) * 0.8)
    #X = X[:train_size, :]
    #X_val = X[train_size:, :]
    #y = y[:train_size, :]
    #y_val = y[train_size:, :]
    
X, X_val, y, y_val = val_set(X, y)

In [15]:
X.shape

(97593, 672, 1)

In [16]:
# reshape from [samples, timesteps] into [samples, timesteps, features]

def reshape_for_LSTM(X, y, features):
    features
    X = X.reshape((X.shape[0], X.shape[1], features))
    y = y.reshape((y.shape[0], y.shape[1]))
    return X, y

In [17]:
X, y = reshape_for_LSTM(X, y, 1)

In [19]:
X_val, y_val = reshape_for_LSTM(X_val, y_val, 1)

In [20]:
X_val.shape

(24399, 672, 1)

In [21]:
X.shape

(97593, 672, 1)

## Lets start the modeling approach using the Long short term memory model ##



In [22]:
# Define dictionary to store results
history = {}

# Define number of epochs and learning rate decay
N_TRAIN = len(X)
EPOCHS = 50
BATCH_SIZE = 2371 # total sample size = 113808 each batch 2371 samples (48 batches ) #! has to be adjusted further to improve
STEPS_PER_EPOCH = N_TRAIN // BATCH_SIZE
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(  #! check the source 
    0.01,  #! please adjust and finetune
    decay_steps=STEPS_PER_EPOCH*1000,
    decay_rate=1,
    staircase=False)


# Define optimizer used for modelling
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule, name='Adam')  # due to a warning message I used the legacy.Adam 

In [23]:
# Define path where checkpoints should be stored
checkpoint_path = "modeling/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=0) # Set verbose != 0 if you want output during training 

#create a callback to stop early once there is no improvement in the loss
cp_early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0,
                                mode='auto',
                                baseline=None,
                                restore_best_weights=True,
                                verbose = True)

Note how many output layer are needed for predicting several timestamps? Please check one output layer is enough but some of the parameters have to be adjusted,

n_steps, n_features
X.shape[1], X.shape[2]

reason for not having activation functions https://datascience.stackexchange.com/questions/66594/activation-function-between-lstm-layers
https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTMCell

output layer structure : https://stackoverflow.com/questions/46797891/output-shape-of-lstm-model#46799544

https://shiva-verma.medium.com/understanding-input-and-output-shape-in-lstm-keras-c501ee95c65e

In [24]:
def get_simple_LSTM_model():
    simple_LSTM = tf.keras.Sequential([
      tf.keras.layers.LSTM(45, kernel_initializer = 'uniform', input_shape = (X.shape[1], X.shape[2]), return_sequences=True), # ! units are not set in stone yet 
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.LSTM(32, return_sequences=False),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(y.shape[1] ,kernel_initializer = 'uniform', activation='linear') #96 to predict a day 
    ])

    simple_LSTM.compile(optimizer=optimizer,
                  loss=tf.keras.losses.MeanAbsolutePercentageError(), 
                  metrics=[tf.keras.losses.MeanAbsolutePercentageError()])
    return simple_LSTM

In [25]:
with tf.device('/cpu:0'):
    simple_LSTM = get_simple_LSTM_model()
    print(simple_LSTM.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 672, 45)           8460      
                                                                 
 dropout (Dropout)           (None, 672, 45)           0         
                                                                 
 lstm_1 (LSTM)               (None, 32)                9984      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense (Dense)               (None, 96)                3168      
                                                                 
Total params: 21612 (84.42 KB)
Trainable params: 21612 (84.42 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [26]:
with tf.device('/cpu:0'):
    history = simple_LSTM.fit(X,
                        y,
                        batch_size= BATCH_SIZE,
                        validation_data= (X_val, y_val),   ##### probably best to make validation data D #! TO DO 
                        verbose=100,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=EPOCHS,
                        callbacks=[cp_callback, cp_early_stop])

Epoch 1/50
Epoch 2/50
Restoring model weights from the end of the best epoch: 1.
Epoch 2: early stopping


In [27]:
# first we split the test set too 
X_test, y_test = split_sequence(test, n_steps, pred_size)


In [28]:
history.history

{'loss': [105.76807403564453, 109.21354675292969],
 'mean_absolute_percentage_error': [105.76810455322266, 109.24549865722656],
 'val_loss': [105.07128143310547, 106.9535140991211],
 'val_mean_absolute_percentage_error': [104.9910888671875, 106.83114624023438]}

In [29]:
X_test[:]

array([[[ 1.24523597],
        [ 1.23129074],
        [ 1.24082165],
        ...,
        [ 0.85296359],
        [ 0.81483994],
        [ 0.8068139 ]],

       [[ 1.23129074],
        [ 1.24082165],
        [ 1.23690896],
        ...,
        [ 0.81483994],
        [ 0.8068139 ],
        [ 0.79056119]],

       [[ 1.24082165],
        [ 1.23690896],
        [ 1.22105755],
        ...,
        [ 0.8068139 ],
        [ 0.79056119],
        [ 0.75564794]],

       ...,

       [[-0.99472959],
        [-1.04910597],
        [-1.11020414],
        ...,
        [-0.53523919],
        [-0.61519854],
        [-0.72625878]],

       [[-1.04910597],
        [-1.11020414],
        [-1.19859084],
        ...,
        [-0.61519854],
        [-0.72625878],
        [-0.79317583]],

       [[-1.11020414],
        [-1.19859084],
        [-1.25818413],
        ...,
        [-0.72625878],
        [-0.79317583],
        [-0.87433909]]])

than we take only the first element of the splited test set and let the model predict 

In [30]:
y_test = y_test[:2, :]

In [31]:
y_test

array([[[ 0.79056119],
        [ 0.75564794],
        [ 0.77340554],
        [ 0.79507583],
        [ 0.78143157],
        [ 0.76588113],
        [ 0.74230466],
        [ 0.72916203],
        [ 0.71732363],
        [ 0.63896948],
        [ 0.54877693],
        [ 0.44865217],
        [ 0.38374162],
        [ 0.32735874],
        [ 0.21088093],
        [ 0.12399912],
        [ 0.1251027 ],
        [ 0.0761439 ],
        [-0.04665441],
        [-0.15721302],
        [-0.23777433],
        [-0.31051025],
        [-0.43651898],
        [-0.48838722],
        [-0.57235959],
        [-0.62001416],
        [-0.69134553],
        [-0.74100661],
        [-0.75505217],
        [-0.80130219],
        [-0.84363951],
        [-0.85878865],
        [-0.89921979],
        [-0.90744648],
        [-0.89109344],
        [-0.88988953],
        [-0.85908963],
        [-0.88958855],
        [-0.87524202],
        [-0.87203161],
        [-0.8396265 ],
        [-0.79136997],
        [-0.77311075],
        [-0

In [32]:
x_input = X_test[:2, :]
x_input = x_input.reshape((x_input.shape[0], x_input.shape[1], 1))
y_pred = simple_LSTM.predict(x_input, verbose=0)
print(y_pred)

[[ 4.08063494e-02 -5.13654836e-02  1.05123222e-02 -1.15628541e-03
  -4.27013561e-02 -3.32875028e-02 -4.20079418e-02  1.21642891e-02
   3.99669744e-02  1.30486460e-02 -8.74670129e-03  4.71927784e-03
   5.42570837e-03 -4.32776287e-04 -4.69744205e-02 -2.13410240e-05
   9.16296989e-03 -9.60112363e-03  6.62624184e-03 -5.12521565e-02
   1.37399882e-04 -5.84100373e-02  5.19590043e-02 -4.48680855e-03
  -2.68295594e-02 -3.92691046e-03  1.64784351e-03  4.27949056e-03
   6.48635402e-02  4.90011647e-02 -5.30908350e-04 -3.27749923e-02
   2.52108835e-03 -1.63261220e-03 -1.13625228e-02  1.41014028e-02
   1.12632578e-02  3.91016155e-03  1.07045695e-02  4.40444238e-03
  -3.26629449e-03 -1.17291929e-02  2.23706756e-03 -6.89699221e-03
  -4.84339185e-02 -9.78859141e-03  2.21516546e-02  3.71545777e-02
   2.70959875e-03  1.08263567e-02 -2.15003453e-03 -3.01081687e-04
   1.56172924e-03  2.98223831e-03  2.19835415e-02  1.93621796e-02
   4.59717074e-03 -1.05841160e-02  9.09958221e-03  3.65388580e-04
   1.60910

In [33]:
def last_mse(y_test, y_pred):
    return keras.metrics.mean_absolute_percentage_error(y_test.reshape(y_test.shape[0], y_test.shape[1]), y_pred)

rmse = mean_squared_error(y_test, y_pred)

In [34]:
last_mse(y_test, y_pred)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([109.543564, 109.84453 ], dtype=float32)>

In [35]:
# Save the entire small model as a SavedModel.
!mkdir -p ../models/saved_model
simple_LSTM.save('../models/saved_model/simple_LSTM')

INFO:tensorflow:Assets written to: ../models/saved_model/simple_LSTM/assets


INFO:tensorflow:Assets written to: ../models/saved_model/simple_LSTM/assets


### Short summary ###
model is performing shitty it looks like it cannot handle the seasonality at all 

### Alternative approach get rif of the seasonality beforehand 

In [37]:
df['Consumption_detrend'] = (df['Total_consumption_MWh'] - df['Total_consumption_MWh'].shift(96))


In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163680 entries, 0 to 163679
Data columns (total 8 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   Date                             163680 non-null  datetime64[ns]
 1   Solar_generation_MWh             163680 non-null  float64       
 2   Solar_installed_MW               163680 non-null  float64       
 3   Total_consumption_MWh            163680 non-null  float64       
 4   DE_LU_price_per_MWh              163680 non-null  float64       
 5   normalisation_factor             163680 non-null  float64       
 6   Solar_generation_MWh_normalized  163680 non-null  float64       
 7   Consumption_detrend              163584 non-null  float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 10.0 MB


In [39]:
#Split into test and train set
train, test = train_test_split(df['Consumption_detrend'], test_size=.3, shuffle=False)

# define input sequence
input = train
# choose a number of time steps (a week)
n_steps = 672

# prediction size (we want to predict a day)
pred_size= 96

# split into samples
X, y = split_sequence(input, n_steps, pred_size)
# summarize the data
print(len(X), len(y))

113808 113808


In [40]:
# split the train set into train and validation set
X, X_val, y, y_val = val_set(X, y)

In [41]:
X, y = reshape_for_LSTM(X, y, 1)

In [42]:
def get_season_LSTM_model():
    wo_season_LSTM = tf.keras.Sequential([
      tf.keras.layers.LSTM(45 ,kernel_initializer = 'uniform', input_shape = (X.shape[1], X.shape[2]), return_sequences=True), # ! units are not set in stone yet 
      tf.keras.layers.LSTM(32, return_sequences=False),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(y.shape[1] ,kernel_initializer = 'uniform', activation='relu') #96 to predict a day 
    ])

    wo_season_LSTM.compile(optimizer=optimizer,
                  loss='mean_squared_error')
    return wo_season_LSTM

In [43]:
with tf.device('/cpu:0'):
    wo_season_LSTM = get_season_LSTM_model()
    print(wo_season_LSTM.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 672, 45)           8460      
                                                                 
 lstm_3 (LSTM)               (None, 32)                9984      
                                                                 
 dropout_2 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 96)                3168      
                                                                 
Total params: 21612 (84.42 KB)
Trainable params: 21612 (84.42 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [44]:
with tf.device('/cpu:0'):
    training_history['season_free'] = wo_season_LSTM.fit(X,
                        y,
                        batch_size= BATCH_SIZE,
                        validation_data= (X_val, y_val),   ##### probably best to make validation data D #! TO DO 
                        verbose=100,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=EPOCHS,
                        callbacks=[cp_callback, cp_early_stop])

Epoch 1/50
Epoch 2/50


KeyboardInterrupt: 

In [None]:
training_history['season_free'].history

# short Summary 
sadly I still only get nan for loss and mse. check with this in detail 
https://stackoverflow.com/questions/37232782/nan-loss-when-training-regression-network

In [45]:
print("Evaluate on test data")
results = wo_season_LSTM.evaluate(X_test, y_test, batch_size=2371)
print("test loss, test acc:", results)

Evaluate on test data


ValueError: Data cardinality is ambiguous:
  x sizes: 40152
  y sizes: 2
Make sure all arrays contain the same number of samples.