# First Approach to Long-Short-Term Memory model #
 https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM


In [29]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

from itertools import permutations

from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.stattools import adfuller,kpss
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_pacf

import statsmodels.graphics.tsaplots as tsaplot
from statsmodels.tsa.holtwinters import Holt, ExponentialSmoothing, SimpleExpSmoothing

import tensorflow as tf

from tensorflow import keras

from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.utils import plot_model

import keras 
from keras.models import Sequential # intitialize the ANN
from keras.layers import Dense, Activation, Dropout, LSTM     # create layers


np.random.seed(42)
tf.random.set_seed(42)


We will start with the test train split 
in our case we can create several shorter sequences that we will use to train our model with 


In [30]:
df = pd.read_pickle("../data/final_dataframe.pkl")

In [31]:
def col_names(df):
    column_names = {'Photovoltaics [MWh] Original resolutions': 'Solar_generation_MWh',
                'Photovoltaics [MW] Calculated resolutions': 'Solar_installed_MW',
                'Total (grid load) [MWh] Original resolutions': 'Total_consumption_MWh',
                'Germany/Luxembourg [€/MWh] Calculated resolutions': 'DE_LU_price_per_MWh',}
    df.rename(columns=column_names, inplace=True)


In [32]:
col_names(df)

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163680 entries, 0 to 163679
Data columns (total 7 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   Date                             163680 non-null  datetime64[ns]
 1   Solar_generation_MWh             163680 non-null  float64       
 2   Solar_installed_MW               163680 non-null  float64       
 3   Total_consumption_MWh            163680 non-null  float64       
 4   DE_LU_price_per_MWh              163680 non-null  float64       
 5   normalisation_factor             163680 non-null  float64       
 6   Solar_generation_MWh_normalized  163680 non-null  float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 8.7 MB


In [34]:
# We have a lot of samples therefore I will limit the sample size for training a bit mor
#df = df.iloc[60000: , :]
#len(df)

In [35]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df['Solar_generation_MWh_normalized'], test_size=.3, shuffle=False)

In [36]:
# split a univariate sequence into samples
def split_sequence(input, n_steps, pred_size):
    x, y = list(), list()
    for i in range(len(input)):
        end_ix = i + n_steps # find the end of this pattern
        if end_ix+pred_size > len(input)-1: # check if we are beyond the sequence
            break
        seq_x, seq_y = input[i:end_ix], input[end_ix: end_ix+pred_size]# gather input and output parts of the pattern
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)

In [37]:
# define input sequence
input = train
# choose a number of time steps
n_steps = 672

# prediction size 
pred_size= 96
# split into samples
X, y = split_sequence(input, n_steps, pred_size)
# summarize the data
print(len(X), len(y))


113808 113808


In [38]:
print(y.shape, X.shape)

(113808, 96) (113808, 672)


In [39]:
#Now we have to define the validation set for our model 
def val_set(X,y):
    train_size = round(len(X) * 0.8)
    X = X[:train_size, :]
    X_val = X[train_size:, :]
    y = y[:train_size, :]
    y_val = y[train_size:, :]
    return X, X_val, y, y_val
X, X_val, y, y_val = val_set(X, y)

In [40]:
X.shape

(91046, 672)

In [111]:
# reshape from [samples, timesteps] into [samples, timesteps, features]

def reshape_for_LSTM(X, y, features):
    n_features = features
    X = X.reshape((X.shape[0], X.shape[1], n_features))
    y = y.reshape((y.shape[0], y.shape[1]))
    return X, y

## Lets start the modeling approach using the Long short term memory model ##



In [42]:
# Define dictionary to store results
training_history = {}

# Define number of epochs and learning rate decay
N_TRAIN = len(X)
EPOCHS = 200
BATCH_SIZE = 2371 # total sample size = 113808 each batch 2371 samples (48 batches ) #! has to be adjusted further to improve
STEPS_PER_EPOCH = N_TRAIN // BATCH_SIZE
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    0.01,
    decay_steps=STEPS_PER_EPOCH*1000,
    decay_rate=1,
    staircase=False)


# Define optimizer used for modelling
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule, name='Adam')  # due to a warning message I used the legacy.Adam 

In [43]:
# Define path where checkpoints should be stored
checkpoint_path = "modeling/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=0) # Set verbose != 0 if you want output during training 

cp_early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience=0, verbose=0,
                                mode='auto',
                                baseline=None,
                                restore_best_weights=False,
                                start_from_epoch=0)

Note how many output layer are needed for predicting several timestamps? Please check one output layer is enough but some of the parameters have to be adjusted,

n_steps, n_features
X.shape[1], X.shape[2]

reason for not having activation functions https://datascience.stackexchange.com/questions/66594/activation-function-between-lstm-layers
https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTMCell

output layer structure : https://stackoverflow.com/questions/46797891/output-shape-of-lstm-model#46799544

https://shiva-verma.medium.com/understanding-input-and-output-shape-in-lstm-keras-c501ee95c65e

In [73]:
def get_simple_LSTM_model():
    simple_LSTM = tf.keras.Sequential([
      tf.keras.layers.LSTM(45 ,kernel_initializer = 'uniform', input_shape = (X.shape[1], X.shape[2]), return_sequences=True), # ! units are not set in stone yet 
      tf.keras.layers.LSTM(32, return_sequences=False),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(y.shape[1] ,kernel_initializer = 'uniform', activation='relu') #96 to predict a day 
    ])

    simple_LSTM.compile(optimizer=optimizer,
                  loss='mean_squared_error')
    return simple_LSTM

In [74]:
with tf.device('/cpu:0'):
    simple_LSTM = get_simple_LSTM_model()
    print(simple_LSTM.summary())

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 672, 45)           8460      
                                                                 
 lstm_11 (LSTM)              (None, 32)                9984      
                                                                 
 dropout_6 (Dropout)         (None, 32)                0         
                                                                 
 dense_5 (Dense)             (None, 96)                3168      
                                                                 
Total params: 21612 (84.42 KB)
Trainable params: 21612 (84.42 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [75]:
with tf.device('/cpu:0'):
    training_history['small'] = simple_LSTM.fit(X,
                        y,
                        batch_size= BATCH_SIZE,
                        validation_data= (X_val, y_val),   ##### probably best to make validation data D #! TO DO 
                        verbose=100,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=EPOCHS,
                        callbacks=[cp_callback, cp_early_stop])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200


In [76]:
# first we split the test set too 
X_test, y_test = split_sequence(test, n_steps, pred_size)


In [77]:
X_test[:5]

array([[2054.61789898, 2210.56216942, 2347.96923089, ...,  953.7394014 ,
        1104.35422426, 1229.01695457],
       [2210.56216942, 2347.96923089, 2453.3996069 , ..., 1104.35422426,
        1229.01695457, 1355.06997555],
       [2347.96923089, 2453.3996069 , 2530.79245434, ..., 1229.01695457,
        1355.06997555, 1466.29322936],
       [2453.3996069 , 2530.79245434, 2569.48887806, ..., 1355.06997555,
        1466.29322936, 1566.39415778],
       [2530.79245434, 2569.48887806, 2576.90376165, ..., 1466.29322936,
        1566.39415778, 1660.4704933 ]])

than we take only the first element of the splited test set and let the model predict 

In [82]:
y_test = y_test[:2, :]

In [86]:
y_test

array([[1.35506998e+03, 1.46629323e+03, 1.56639416e+03, 1.66047049e+03,
        1.75454683e+03, 1.83564712e+03, 1.87527040e+03, 1.87619726e+03,
        1.86113578e+03, 1.82846395e+03, 1.77192546e+03, 1.68943488e+03,
        1.58076049e+03, 1.44335343e+03, 1.28370172e+03, 1.10667138e+03,
        9.23153007e+02, 7.40561498e+02, 5.63067722e+02, 3.90439964e+02,
        2.32178542e+02, 1.11686684e+02, 3.75378482e+01, 5.09773247e+00,
        4.63430224e-01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.000000

In [83]:
x_input = X_test[:2, :]
x_input = x_input.reshape((x_input.shape[0], x_input.shape[1], n_features))
y_pred = simple_LSTM.predict(x_input, verbose=0)
print(y_pred)

[[286.59402 286.51938 286.21994 286.3898  286.82928 286.6774  286.6389
  286.62173 286.60873 286.71832 286.03122 286.46057 286.98767 286.65497
  286.83405 286.72284 286.67163 286.51608 286.373   286.36923 284.6028
  286.73822 286.2747  286.38876 286.65762 286.2997  286.22675 286.55508
  286.5328  286.74207 286.80722 286.4979  286.49902 286.41113 286.55402
  286.18497 286.5143  286.07092 285.95804 286.0895  286.61246 286.2862
  286.26508 286.37247 286.17657 286.38394 286.69855 286.91843 286.17145
  286.5744  286.4122  287.00052 286.71686 286.7545  286.21356 286.88184
  286.3336  286.3371  286.48718 286.40094 286.33313 286.92947 286.74484
  286.7728  286.30743 286.43207 286.03055 286.84692 287.18433 286.5848
  286.82327 286.9135  286.32715 286.46082 286.0757  286.6289    0.
  286.81787 286.19464 287.01535 287.04318 286.89453 286.9978  286.67902
  286.39923 286.80142 287.03888 286.86307   0.      286.69598 286.5787
  286.66776 286.5675  286.59763 286.85474 286.52054]
 [286.59402 286.51938

In [84]:
def last_mse(y_test, y_pred):
    return keras.metrics.mean_squared_error(y_test, y_pred)

In [85]:
last_mse(y_test, y_pred)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([440084.16, 455714.1 ], dtype=float32)>

### Short summary ###
model is performing shitty it looks like it cannot handle the seasonality at all 

### Alternative approach get rif of the seasonality beforehand 

In [106]:
df['Solar_detrend'] = (df['Solar_generation_MWh_normalized'] - df['Solar_generation_MWh_normalized'].shift(96))


In [107]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163680 entries, 0 to 163679
Data columns (total 8 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   Date                             163680 non-null  datetime64[ns]
 1   Solar_generation_MWh             163680 non-null  float64       
 2   Solar_installed_MW               163680 non-null  float64       
 3   Total_consumption_MWh            163680 non-null  float64       
 4   DE_LU_price_per_MWh              163680 non-null  float64       
 5   normalisation_factor             163680 non-null  float64       
 6   Solar_generation_MWh_normalized  163680 non-null  float64       
 7   Solar_detrend                    163584 non-null  float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 10.0 MB


In [108]:
#Split into test and train set
train, test = train_test_split(df['Solar_detrend'], test_size=.3, shuffle=False)

# define input sequence
input = train
# choose a number of time steps (a week)
n_steps = 672

# prediction size (we want to predict a day)
pred_size= 96

# split into samples
X, y = split_sequence(input, n_steps, pred_size)
# summarize the data
print(len(X), len(y))

113808 113808


In [109]:
# split the train set into train and validation set
X, X_val, y, y_val = val_set(X, y)

In [112]:
X, y = reshape_for_LSTM(X, y, 1)

In [113]:
def get_season_LSTM_model():
    wo_season_LSTM = tf.keras.Sequential([
      tf.keras.layers.LSTM(45 ,kernel_initializer = 'uniform', input_shape = (X.shape[1], X.shape[2]), return_sequences=True), # ! units are not set in stone yet 
      tf.keras.layers.LSTM(32, return_sequences=False),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(y.shape[1] ,kernel_initializer = 'uniform', activation='relu') #96 to predict a day 
    ])

    wo_season_LSTM.compile(optimizer=optimizer,
                  loss='mean_squared_error')
    return wo_season_LSTM

In [114]:
with tf.device('/cpu:0'):
    wo_season_LSTM = get_season_LSTM_model()
    print(wo_season_LSTM.summary())

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 672, 45)           8460      
                                                                 
 lstm_13 (LSTM)              (None, 32)                9984      
                                                                 
 dropout_7 (Dropout)         (None, 32)                0         
                                                                 
 dense_6 (Dense)             (None, 96)                3168      
                                                                 
Total params: 21612 (84.42 KB)
Trainable params: 21612 (84.42 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [119]:
with tf.device('/cpu:0'):
    training_history['season_free'] = wo_season_LSTM.fit(X,
                        y,
                        batch_size= BATCH_SIZE,
                        validation_data= (X_val, y_val),   ##### probably best to make validation data D #! TO DO 
                        verbose=100,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=EPOCHS,
                        callbacks=[cp_callback, cp_early_stop])

Epoch 1/200
Epoch 2/200


In [121]:
training_history['season_free'].history

{'loss': [nan, nan], 'mse': [nan, nan]}

# short Summary 
sadly I still only get nan for loss and mse. check with this in detail 
https://stackoverflow.com/questions/37232782/nan-loss-when-training-regression-network

In [123]:
print("Evaluate on test data")
results = wo_season_LSTM.evaluate(X_test, y_test, batch_size=2371)
print("test loss, test acc:", results)

Evaluate on test data


ValueError: Data cardinality is ambiguous:
  x sizes: 48336
  y sizes: 2
Make sure all arrays contain the same number of samples.