# Hyperparameter tuning using Stacked LSTM with GridSearchCV 

In [1]:
# Author: Geethu Thottungal Harilal
# data from : https://power.larc.nasa.gov/data-access-viewer/

# This code will use daily data from all locations after feature engineering and 
# implement GridSearchCV hyperparameter tuning with stacked LSTM to find best parameters for each dataset

In [2]:
# import libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout,LSTM, Dense 
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
import matplotlib.pyplot as plt
from tensorflow.keras import optimizers
import warnings
warnings.filterwarnings('ignore')

In [3]:
# read data 
data_en=pd.read_csv("England_dataset_final_1981-2023.csv",parse_dates=["date"]) #England
data_wl=pd.read_csv("Wales_dataset_final_1981-2023.csv",parse_dates=["date"]) #wales
data_sc=pd.read_csv("Scotland_dataset_final_1981-2023.csv",parse_dates=["date"]) #scotland
data_ir=pd.read_csv("Ireland_dataset_final_1981-2023.csv",parse_dates=["date"]) #ireland

## England
This code section will implement hyperparameter tuning in England dataset

In [4]:
target_col = 'Rainfall'

# Extract the target variable
target_en = data_en[target_col].values
df_dates = data_en['date']
df_en =data_en.drop(columns=['date'])

### Train-Validation-Test Split

In [5]:
#split test and train
train_split=round(len(df_en)*0.67)
df_for_training_en=df_en[:train_split]
df_temp_en=df_en[train_split:]
test_split=round(len(df_temp_en)*0.5)
df_for_validation_en=df_temp_en[:test_split]
df_for_testing_en=df_temp_en[test_split:]

In [6]:
print("df_for_training Shape-- ",df_for_training_en.shape)
print("df_for_validation Shape-- ",df_for_validation_en.shape)
print("df_for_testing Shape-- ",df_for_testing_en.shape)

df_for_training Shape--  (10419, 7)
df_for_validation Shape--  (2566, 7)
df_for_testing Shape--  (2566, 7)


In [7]:
#split test dates
temp_date=df_dates[train_split:]
df_for_testdate=temp_date[test_split:]

In [8]:
# scale the dataset with minmaxscaler within the range 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled_en = scaler.fit_transform(df_for_training_en)
df_for_validation_scaled_en = scaler.fit_transform(df_for_validation_en)
df_for_testing_scaled_en=scaler.transform(df_for_testing_en)

In [9]:
# Define the number of time steps and features
n_steps = 15  # Number of time steps to consider
n_features = df_for_testing_scaled_en.shape[1]

In [10]:
# split the data in X and Y
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            #print(i)
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
            dataY.append(dataset[i,0])
    return np.array(dataX),np.array(dataY)
trainX_en,trainY_en=createXY(df_for_training_scaled_en,n_steps)
valX_en,valY_en=createXY(df_for_validation_scaled_en,n_steps)
testX_en,testY_en=createXY(df_for_testing_scaled_en,n_steps)

In [11]:
# Define the stacked LSTM model
def create_lstm_model(units=32,learning_rate=.01, optimizer='adam', activation='relu'):
    grid_model = Sequential()
    grid_model.add(LSTM(units,activation=activation, return_sequences=True, input_shape=(n_steps,n_features)))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    # second LSTM layer 
    grid_model.add(LSTM(units))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    grid_model.add(Dense(1, activation='tanh'))
    opt = optimizers.Adam(learning_rate=learning_rate) if optimizer == 'adam' else optimizer
    grid_model.compile(loss = 'mean_squared_error',optimizer = opt)    
    return grid_model

# Create KerasRegressor wrapper for scikit-learn
grid_model = KerasRegressor(build_fn=create_lstm_model, verbose=1,validation_data=(valX_en, valY_en))

# Define hyperparameters and values to search
parameters = {'units': [32, 64, 128],
              'optimizer': ['adam','SGD'],
              'learning_rate':[.1,.01,.001],
              'activation':['relu', 'tanh', 'sigmoid','swish'] }


In [12]:
# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator = grid_model,
                            param_grid = parameters,
                            cv=3, refit=True, n_jobs = -1)
# fit the model in out trainX and trainY data
grid_result = grid_search.fit(trainX_en, trainY_en ,verbose=1)



In [13]:
# Print the best parameters and the corresponding RMSE
print("Best params for England data:  ",grid_result.best_params_)

Best params for England data:   {'activation': 'relu', 'learning_rate': 0.01, 'optimizer': 'adam', 'units': 64}


##################################################################################

## Wales
This code section will implement hyperparameter tuning in Wales dataset

In [14]:
target_col = 'Rainfall'

# Extract the target variable
target_wl = data_wl[target_col].values
df_dates = data_wl['date']
df_wl =data_wl.drop(columns=['date'])

### Train-Validation-Test Split

In [15]:
#split test and train
train_split=round(len(df_wl)*0.67)
df_for_training_wl=df_wl[:train_split]
df_temp_wl=df_wl[train_split:]
test_split=round(len(df_temp_wl)*0.5)
df_for_validation_wl=df_temp_wl[:test_split]
df_for_testing_wl=df_temp_wl[test_split:]

In [16]:
print("df_for_training Shape-- ",df_for_training_wl.shape)
print("df_for_validation Shape-- ",df_for_validation_wl.shape)
print("df_for_testing Shape-- ",df_for_testing_wl.shape)

df_for_training Shape--  (10419, 7)
df_for_validation Shape--  (2566, 7)
df_for_testing Shape--  (2566, 7)


In [17]:
# scale the dataset with minmaxscaler within the range 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled_wl = scaler.fit_transform(df_for_training_wl)
df_for_validation_scaled_wl = scaler.fit_transform(df_for_validation_wl)
df_for_testing_scaled_wl=scaler.transform(df_for_testing_wl)

In [18]:
# Define the number of time steps and features
n_steps = 15  # Number of time steps to consider
n_features = df_for_testing_scaled_wl.shape[1]

In [19]:
# split the data in X and Y
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            #print(i)
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
            dataY.append(dataset[i,0])
    return np.array(dataX),np.array(dataY)
trainX_wl,trainY_wl=createXY(df_for_training_scaled_wl,n_steps)
valX_wl,valY_wl=createXY(df_for_validation_scaled_wl,n_steps)
testX_wl,testY_wl=createXY(df_for_testing_scaled_wl,n_steps)

In [20]:
# Define the stacked LSTM model
def create_lstm_model(units=32,learning_rate=.01, optimizer='adam', activation='relu'):
    grid_model = Sequential()
    grid_model.add(LSTM(units,activation=activation, return_sequences=True, input_shape=(n_steps,n_features)))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    # second LSTM layer 
    grid_model.add(LSTM(units))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    grid_model.add(Dense(1, activation='tanh'))
    opt = optimizers.Adam(learning_rate=learning_rate) if optimizer == 'adam' else optimizer
    grid_model.compile(loss = 'mean_squared_error',optimizer = opt)    
    return grid_model

# Create KerasRegressor wrapper for scikit-learn
grid_model = KerasRegressor(build_fn=create_lstm_model, verbose=1,validation_data=(valX_wl, valY_wl))

# Define hyperparameters and values to search
parameters = {'units': [32, 64, 128],
              'optimizer': ['adam','SGD'],
              'learning_rate':[.1,.01,.001],
              'activation':['relu', 'tanh', 'sigmoid','swish'] }


In [21]:
# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator = grid_model,
                            param_grid = parameters,
                            cv=3, refit=True, n_jobs = -1)
# fit the model in out trainX and trainY data
grid_result = grid_search.fit(trainX_wl, trainY_wl ,verbose=1)



In [22]:
# Print the best parameters and the corresponding RMSE
print("Best params for Wales data:  ",grid_result.best_params_)

Best params for Wales data:   {'activation': 'tanh', 'learning_rate': 0.01, 'optimizer': 'adam', 'units': 64}


#########################################################################################

## Scotland
This code section will implement hyperparameter tuning in Scotland dataset

In [23]:
target_col = 'Rainfall'

# Extract the target variable
target_sc = data_sc[target_col].values
df_dates = data_sc['date']
df_sc =data_sc.drop(columns=['date'])

### Train-Validation-Test Split

In [24]:
#split test and train
train_split=round(len(df_sc)*0.67)
df_for_training_sc=df_sc[:train_split]
df_temp_sc=df_sc[train_split:]
test_split=round(len(df_temp_sc)*0.5)
df_for_validation_sc=df_temp_sc[:test_split]
df_for_testing_sc=df_temp_sc[test_split:]

In [25]:
print("df_for_training Shape-- ",df_for_training_sc.shape)
print("df_for_validation Shape-- ",df_for_validation_sc.shape)
print("df_for_testing Shape-- ",df_for_testing_sc.shape)

df_for_training Shape--  (10419, 7)
df_for_validation Shape--  (2566, 7)
df_for_testing Shape--  (2566, 7)


In [26]:
#split test dates
temp_date=df_dates[train_split:]
df_for_testdate=temp_date[test_split:]

In [27]:
# scale the dataset with minmaxscaler within the range 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled_sc = scaler.fit_transform(df_for_training_sc)
df_for_validation_scaled_sc = scaler.fit_transform(df_for_validation_sc)
df_for_testing_scaled_sc=scaler.transform(df_for_testing_sc)

In [28]:
# Define the number of time steps and features
n_steps = 15  # Number of time steps to consider
n_features = df_for_testing_scaled_sc.shape[1]

In [29]:
# split the data in X and Y
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            #print(i)
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
            dataY.append(dataset[i,0])
    return np.array(dataX),np.array(dataY)
trainX_sc,trainY_sc=createXY(df_for_training_scaled_sc,n_steps)
valX_sc,valY_sc=createXY(df_for_validation_scaled_sc,n_steps)
testX_sc,testY_sc=createXY(df_for_testing_scaled_sc,n_steps)

In [30]:
# Define the stacked LSTM model
def create_lstm_model(units=32,learning_rate=.01, optimizer='adam', activation='relu'):
    grid_model = Sequential()
    grid_model.add(LSTM(units,activation=activation, return_sequences=True, input_shape=(n_steps,n_features)))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    # second LSTM layer 
    grid_model.add(LSTM(units))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    grid_model.add(Dense(1, activation='tanh'))
    opt = optimizers.Adam(learning_rate=learning_rate) if optimizer == 'adam' else optimizer
    grid_model.compile(loss = 'mean_squared_error',optimizer = opt)    
    return grid_model

# Create KerasRegressor wrapper for scikit-learn
grid_model = KerasRegressor(build_fn=create_lstm_model, verbose=1,validation_data=(valX_sc, valY_sc))

# Define hyperparameters and values to search
parameters = {'units': [32, 64, 128],
              'optimizer': ['adam','SGD'],
              'learning_rate':[.1,.01,.001],
              'activation':['relu', 'tanh', 'sigmoid','swish'] }


In [31]:
# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator = grid_model,
                            param_grid = parameters,
                            cv=3, refit=True, n_jobs = -1)
# fit the model in out trainX and trainY data
grid_result = grid_search.fit(trainX_sc, trainY_sc ,verbose=1)



In [32]:
# Print the best parameters and the corresponding RMSE
print("Best params for Scotland data:  ",grid_result.best_params_)

Best params for Scotland data:   {'activation': 'swish', 'learning_rate': 0.01, 'optimizer': 'adam', 'units': 128}


##################################################################################################

## Northern Ireland
This code section will implement hyperparameter tuning in Northern Ireland dataset

In [33]:
target_col = 'Rainfall'

# Extract the target variable
target_ir = data_ir[target_col].values
df_dates = data_ir['date']
df_ir =data_ir.drop(columns=['date'])

### Train-Validation-Test Split

In [34]:
#split test and train
train_split=round(len(df_ir)*0.67)
df_for_training_ir=df_ir[:train_split]
df_temp_ir=df_ir[train_split:]
test_split=round(len(df_temp_ir)*0.5)
df_for_validation_ir=df_temp_ir[:test_split]
df_for_testing_ir=df_temp_ir[test_split:]

In [35]:
print("df_for_training Shape-- ",df_for_training_ir.shape)
print("df_for_validation Shape-- ",df_for_validation_ir.shape)
print("df_for_testing Shape-- ",df_for_testing_ir.shape)

df_for_training Shape--  (10419, 7)
df_for_validation Shape--  (2566, 7)
df_for_testing Shape--  (2566, 7)


In [36]:
# scale the dataset with minmaxscaler within the range 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))
df_for_training_scaled_ir = scaler.fit_transform(df_for_training_ir)
df_for_validation_scaled_ir = scaler.fit_transform(df_for_validation_ir)
df_for_testing_scaled_ir=scaler.transform(df_for_testing_ir)

In [37]:
# Define the number of time steps and features
n_steps = 15  # Number of time steps to consider
n_features = df_for_testing_scaled_ir.shape[1]

In [38]:
# split the data in X and Y
def createXY(dataset,n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
            #print(i)
            dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
            dataY.append(dataset[i,0])
    return np.array(dataX),np.array(dataY)
trainX_ir,trainY_ir=createXY(df_for_training_scaled_ir,n_steps)
valX_ir,valY_ir=createXY(df_for_validation_scaled_ir,n_steps)
testX_ir,testY_ir=createXY(df_for_testing_scaled_ir,n_steps)

In [39]:
# Define the stacked LSTM model
def create_lstm_model(units=32,learning_rate=.01, optimizer='adam', activation='relu'):
    grid_model = Sequential()
    grid_model.add(LSTM(units,activation=activation, return_sequences=True, input_shape=(n_steps,n_features)))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    # second LSTM layer 
    grid_model.add(LSTM(units))
    grid_model.add(Dropout(0.2)) # Dropout regularisation
    grid_model.add(Dense(1, activation='tanh'))
    opt = optimizers.Adam(learning_rate=learning_rate) if optimizer == 'adam' else optimizer
    grid_model.compile(loss = 'mean_squared_error',optimizer = opt)    
    return grid_model

# Create KerasRegressor wrapper for scikit-learn
grid_model = KerasRegressor(build_fn=create_lstm_model, verbose=1,validation_data=(valX_ir, valY_ir))

# Define hyperparameters and values to search
parameters = {'units': [32, 64, 128],
              'optimizer': ['adam','SGD'],
              'learning_rate':[.1,.01,.001],
              'activation':['relu', 'tanh', 'sigmoid','swish'] }


In [40]:
# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator = grid_model,
                            param_grid = parameters,
                            cv=3, refit=True, n_jobs = -1)
# fit the model in out trainX and trainY data
grid_result = grid_search.fit(trainX_ir, trainY_ir ,verbose=1)



In [42]:
# Print the best parameters and the corresponding RMSE
print("Best params for Ireland data:  ",grid_result.best_params_)

Best params for Ireland data:   {'activation': 'swish', 'learning_rate': 0.01, 'optimizer': 'adam', 'units': 128}


#################################################################################################