# First Approach to Long-Short-Term Memory model #
 https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM


In [46]:
#%pip install protobuf
#%pip install tensorflow-macos
#%pip install tensorflow-metal
#%pip install Keras
#%pip install tensorflow-hub

In [47]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns

from itertools import permutations

from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.stattools import adfuller,kpss
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_pacf

from pmdarima.arima import auto_arima
import statsmodels.graphics.tsaplots as tsaplot
from statsmodels.tsa.holtwinters import Holt, ExponentialSmoothing, SimpleExpSmoothing

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.utils import plot_model

import keras 
from keras.models import Sequential # intitialize the ANN
from keras.layers import Dense, Activation, Dropout, LSTM     # create layers


np.random.seed(42)
tf.random.set_seed(42)


We will start with the test train split 
in our case we can create several shorter sequences that we will use to train our model with 


In [48]:
df = pd.read_pickle("../data/final_dataframe.pkl")

In [49]:
def col_names(df):
    column_names = {'Photovoltaics [MWh] Original resolutions': 'Solar_generation_MWh',
                'Photovoltaics [MW] Calculated resolutions': 'Solar_installed_MW',
                'Total (grid load) [MWh] Original resolutions': 'Total_consumption_MWh',
                'Germany/Luxembourg [€/MWh] Calculated resolutions': 'DE_LU_price_per_MWh',}
    df.rename(columns=column_names, inplace=True)


In [50]:
col_names(df)

In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163680 entries, 0 to 163679
Data columns (total 7 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   Date                             163680 non-null  datetime64[ns]
 1   Solar_generation_MWh             163680 non-null  float64       
 2   Solar_installed_MW               163680 non-null  float64       
 3   Total_consumption_MWh            163680 non-null  float64       
 4   DE_LU_price_per_MWh              163680 non-null  float64       
 5   normalisation_factor             163680 non-null  float64       
 6   Solar_generation_MWh_normalized  163680 non-null  float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 8.7 MB


In [52]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df['Solar_generation_MWh_normalized'], test_size=.3, shuffle=False)

In [53]:
# split a univariate sequence into samples
def split_sequence(input, n_steps, pred_size):
    x, y = list(), list()
    for i in range(len(input)):
        end_ix = i + n_steps # find the end of this pattern
        if end_ix+pred_size > len(input)-1: # check if we are beyond the sequence
            break
        seq_x, seq_y = input[i:end_ix], input[end_ix: end_ix+pred_size]# gather input and output parts of the pattern
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)

In [54]:
# define input sequence
input = train
# choose a number of time steps
n_steps = 672

# prediction size 
pred_size= 96
# split into samples
X, y = split_sequence(input, n_steps, pred_size)
# summarize the data
print(len(X), len(y))


113808 113808


In [55]:
print(y.shape, X.shape)

(113808, 96) (113808, 672)


In [57]:
round(len(X) * 0.8)

91046

In [59]:
#Now we have to define the validation set for our model 
def val_set(X,y):
    train_size = round(len(X) * 0.8)
    X = X[:train_size, :]
    X_val = X[train_size:, :]
    y = y[:train_size, :]
    y_val = y[train_size:, :]
    return X, X_val, y, y_val
X, X_val, y, y_val = val_set(X, y)

In [60]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
y = y.reshape((y.shape[0], y.shape[1]))

## Lets start the modeling approach using the Long short term memory model ##



In [61]:
# Define dictionary to store results
training_history = {}

# Define number of epochs and learning rate decay
N_TRAIN = len(X)
EPOCHS = 2000
BATCH_SIZE = 2371 # total sample size = 113808 each batch 2371 samples (48 batches ) #! has to be adjusted further to improve
STEPS_PER_EPOCH = N_TRAIN // BATCH_SIZE
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    0.01,
    decay_steps=STEPS_PER_EPOCH*1000,
    decay_rate=1,
    staircase=False)


# Define optimizer used for modelling
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule, name='Adam')  # due to a warning message I used the legacy.Adam 

In [63]:
def plot_metric(history):
    plt.plot(history.history['mse'])
    plt.plot(history.history['val_mse'])
    plt.title('Model MSE')
    plt.ylabel('MSE')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    plt.show()

In [64]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.title('Model Loss')
    plt.ylim([0, 10])
    plt.xlabel('Epoch')
    plt.ylabel('Error')
    plt.legend()
    plt.grid(True)

In [62]:
# Define path where checkpoints should be stored
checkpoint_path = "modeling/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=0) # Set verbose != 0 if you want output during training 

cp_early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0,
                                mode='auto',
                                baseline=None,
                                restore_best_weights=False,
                                start_from_epoch=0)

Note how many output layer are needed for predicting several timestamps? Please check one output layer is enough but some of the parameters have to be adjusted,

n_steps, n_features
X.shape[1], X.shape[2]

reason for not having activation functions https://datascience.stackexchange.com/questions/66594/activation-function-between-lstm-layers
https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTMCell

In [65]:
def get_simple_LSTM_model():
    simple_LSTM = tf.keras.Sequential([
      tf.keras.layers.LSTM(units = 45 ,kernel_initializer = 'uniform', input_shape = (X.shape[1], X.shape[2]), return_sequences=True), # ! units are not set in stone yet 
      tf.keras.layers.LSTM(32, activation='relu', return_sequences=False),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(y.shape[1] ,kernel_initializer = 'uniform', activation='relu' ) #96 to predict a day 
    ])

    simple_LSTM.compile(optimizer=optimizer,
                  loss='mse',
                  metrics=['mse'])
    return simple_LSTM

In [66]:
with tf.device('/cpu:0'):
    simple_LSTM = get_simple_LSTM_model()
    print(simple_LSTM.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 672, 45)           8460      
                                                                 
 lstm_9 (LSTM)               (None, 32)                9984      
                                                                 
 dropout_4 (Dropout)         (None, 32)                0         
                                                                 
 dense_4 (Dense)             (None, 96)                3168      
                                                                 
Total params: 21,612
Trainable params: 21,612
Non-trainable params: 0
_________________________________________________________________
None


In [67]:
with tf.device('/cpu:0'):
    training_history['small'] = simple_LSTM.fit(X,
                        y,
                        batch_size= BATCH_SIZE,
                        validation_data= (X_val, y_val),   ##### probably best to make validation data D #! TO DO 
                        verbose=0,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=EPOCHS,
                        callbacks=[cp_callback, cp_early_stop])

2023-07-02 18:47:27.156388: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [None]:
# first we split the test set too 
X_test, y_test = split_sequence(test)

than we take only the first element of the splited test set and let the model predict 

In [None]:
x_input = test
x_input = x_input.reshape((1, n_steps, n_features))
y_pred = model.predict(x_input, verbose=0)
print(ypred)