# Lecture 6: Deep Learning for Time Series Analysis - Exercise
### **Learning Objectives**:
- Apply feature engineering techniques specifically for deep learning models.
- Building and Training RNN based Models (Simple RNN, LSTM, GRU)
- Implement sequence-to-sequence prediction for multi-step time series forecasting.
- Understand the importance of hyperparameters for LSTM and GRU models.


## Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.models import Sequential
from keras.layers import SimpleRNN, LSTM, GRU, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.metrics import RootMeanSquaredError, MeanAbsoluteError

from pydataset import data                                   

## Load Data and Select Predictors

### Load the Beijing_Air_Quality data set

In [None]:
path='Data_sets/Beijing_Air_Quality.csv'

# Load the dataset into a pandas DataFrame
bjn_a_q_data = pd.read_csv(path)#, parse_dates=['time'], index_col='time')

# Convert the year, month, day, hour columns to a datetime index
bjn_a_q_data['datetime'] = pd.to_datetime(bjn_a_q_data[['year', 'month', 'day', 'hour']])

# Set the datetime column as the index
bjn_a_q_data.set_index('datetime', inplace=True)

# Drop the year, month, day, hour and No columns
bjn_a_q_data.drop(columns=['year', 'month', 'day', 'hour', 'No'], inplace=True)

# set bjn_a_q_data frequency to hourly
bjn_a_q_data = bjn_a_q_data.asfreq('H')

### Handel missing values
### Resample the data to daily mean

In [None]:
air_q_df = bjn_a_q_data.copy()
# Drop the columns with non numeric values
air_q_df.drop(columns=['station','wd'], inplace=True)

# Count the number of missing values by columns
missing_values = air_q_df.isnull().sum()
print('missing values by columns:', missing_values)

# count number of rows with missing values
missing_value_rows = air_q_df.isnull().sum(axis=1)
missing_value_rows = missing_value_rows[missing_value_rows > 0].count()
print('\n number of rows with missing values:', missing_value_rows)

# Since PM2.5 is the target variable, we will drop the rows with missing values in the PM2.5 column
air_q_df.dropna(subset=['PM2.5'], inplace=True)

# Count the number of leftover missing values by columns
missing_values = air_q_df.isnull().sum()

# most of missing values are left in the CO and O3 columns
# Fill the missing values in the remaining columns with forward fill
air_q_df.fillna(method='ffill', inplace=True)

# Resample creat a daily avrege form for the data
air_q_df = air_q_df.resample('D').mean()  # 'D' stands for daily
air_q_df = air_q_df.round(2) # set to 2 digits after the decimal point
air_q_df = air_q_df.asfreq('D') # set frequency to daily
air_q_df.dropna(inplace=True)

# print(air_q_df.info())
air_q_df

### Visualize the time series 

In [None]:
# plot the data
air_q_df.plot(subplots=True,figsize=(10, 12))
#plt.title('Air Quality Data')
plt.show()

## Preprocessing the time series for Deep Learning application

### Select Predictors 
- Choose the target variable and exogenoues variables to serve as predictors
- Rearrange the DataFrame to include the target variable as the first column and the predictors as the remaining columns

In [None]:
# Select the PM2.5 column as the target variable and climate data as the predictor variables
target = ## Your code here

# For example- select the climate data columns to use as predictors
predictors = ## Your code here
 
# Reset the columns of the DataFrame to the selected columns
air_q_df= air_q_df[[target] + predictors] 

### Feature Engineering
#### Create additional meaningful features that help models capture temporal patterns
this is optional and may add additional information to the model, for example:
- Lagged Features
- Rolling Statistics 
- Seasonal Components

##### The model should be tested with or without additional features to evaluate their impact on the model's performance

In [None]:
## Example for possible feature engineering

# Adding lag features to capture previous values (choose specific variables and lags)
air_q_df['PM2.5_lag1'] = ## Your code here
air_q_df['WSPM_1'] = ## Your code here
# you can add more lag features

# Rolling Mean 
# Adding rolling mean to capture trend and seasonal components
air_q_df['PM2.5_rolling_mean'] = ## Your code here

# Adding seasonal components to capture seasonality
air_q_df['day_of_week'] = ## Your code here

# you can add more statistical features or seasonal components

# Drop NaN values that arise from lagging and rolling operations
air_q_df.dropna(inplace=True)


### Scaling the data

In [None]:
# **Scaling the Data**
# choose the scaler to be used, for example MinMaxScaler or StandardScaler
scaler = ## Your code here

 # fit and transform the scaler on the data without the time index
scaled_features = ## Your code here

# Convert the scaled data into a DataFrame
air_q_scaled = pd.DataFrame(scaled_features, columns=[air_q_df.columns])

# reset the index to be the tiime index of the original data
air_q_scaled.index = air_q_df.index
print(air_q_scaled.info())

# plot the scaled data
air_q_scaled.plot(subplots=True,figsize=(9, 8))

## Reshaping Data for Deep Learning Models
- Convert feature data into a 3D format suitable for RNN-based models 
- Required shape: (samples, timesteps, features)

In [None]:
def create_window_features(df, window_size, variables=None, target_variable='PM2.5'):
    """
    Create lagged features for specified variables based on a given window size.
    Prepare time windows for an RNN model.
    """
    # Initialize empty DataFrame to collect the lagged values
    rnn_df = pd.DataFrame()

    # Add the target variable to the rnn_df
    rnn_df['target'] = df[target_variable]

    # Select the specified predictor variables for creating the time windows
    # if no variables are specified, use all columns including the laged values of target variable
    if variables is None:
        variables = df.columns 

    df = df[variables] 
    
    # Iterate over the window size in reverse
    for lag in range(window_size, 0, -1):
        shifted= df.shift(lag)
        rnn_df[f'Lag{lag}_values'] = shifted.apply(lambda x: x.values, axis=1)

    # Drop the first rows in the df, according to the window size used
    rnn_df = rnn_df[window_size:]
       
    return rnn_df

In [None]:
df_to_RNN=create_window_features(df=air_q_scaled, window_size= '?', variables= '?', target_variable='?')
print('df_to_RNN.shape',df_to_RNN.shape)


##  Splitting X,y Train Test (Using Scikit-Learn)

In [None]:
# Create the features and target (X, y) for the train, validation, and test data
X_set, y_set = df_to_RNN.drop('target', axis=1), df_to_RNN['target']
# convert y_set to 2D dataframe
y_set = y_set.to_frame()

# Split the data into train, validation, and test sets
# When shuffle=False,  train_test_split will split based on the original order last 15% of the rows will be assigned to test)
X_train, X_test, y_train, y_test = train_test_split(X_set, y_set, test_size=0.15, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, shuffle=False)

# print the shape of the train, validation, and test sets
print('Train set:', X_train.shape, y_train.shape)
print('Validation set:', X_val.shape, y_val.shape)
print('Test set:', X_test.shape, y_test.shape)


### Reshape X data to 3D to fit RNN input shape

In [None]:
# Flatten and reshape the training data
x_train_flattened = np.array([item for sublist in X_train.values for item in sublist])
x_train_RNN_array = x_train_flattened.reshape(X_train.shape[0], X_train.shape[1], -1)

# Flatten and reshape the test data
x_test_flattened = np.array([item for sublist in X_test.values for item in sublist])
x_test_RNN_array = x_test_flattened.reshape(X_test.shape[0], X_test.shape[1], -1)

# Flatten and reshape the validation data
x_val_flattened = np.array([item for sublist in X_val.values for item in sublist])
x_val_RNN_array = x_val_flattened.reshape(X_val.shape[0], X_val.shape[1], -1)

# Print the shapes of the reshaped arrays
print('x_train_RNN_array.shape:', x_train_RNN_array.shape, 
      'x_test_RNN_array.shape:', x_test_RNN_array.shape, 
      'x_val_RNN_array.shape:', x_val_RNN_array.shape)

## Simple RNN

In [None]:
keras.backend.clear_session()

### Create Model 

In [None]:
def create_RNN_model(x_train, units):
    model = keras.Sequential()
    # RNNs Expect Input Data in 3D Shape: (samples=batch size, timesteps, features)
    model.add(SimpleRNN(units=units, return_sequences=False, input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(Dense(1, activation='linear')) # output layer predicting a single numerical value
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_RNN= create_RNN_model(x_train=x_train_RNN_array, units= 32)
simple_RNN.summary()

### Train Model and Predict

#### Training the model

In [None]:
history = simple_RNN.fit(x_train_RNN_array, y_train , epochs=30, batch_size=100, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

### Evaluate Training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate prediction on the test set

In [None]:
y_pred= simple_RNN.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale

y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()


## LSTM (Long Short Term Memory)

In [None]:
keras.backend.clear_session()

### Create Model

In [None]:
def create_LSTM_model(x_train, units):
    model = keras.Sequential()
    model.add(LSTM(units=units, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_LSTM= create_LSTM_model(x_train=x_train_RNN_array, units=16)
simple_LSTM.summary()

### Train Model and Predict

In [None]:
history = simple_LSTM.fit(x_train_RNN_array, y_train , epochs=50, batch_size=50, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

#### Evaluate the training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate Prediction

In [None]:
y_pred= simple_LSTM.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale
y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()


## Exercise Task: Experimenting with Preprocessing and Model Complexity for RNN Models

#### Understand how feature engineering and model parameters affect RNNs performance, and how model complexity impact prediction, overfitting, and generalization

#### Feature Engineering for RNN Models:

- Select Additional Variables: Choose additional variables from the dataset that may influence the target variable 

- Create Lagged Features and Rolling Statistics for Selected variables: This helps the model capture temporal dependencies and identify patterns over different time periods

- Control the Window Size: Evaluate how larger window sizes impact the model's complexity and potential overfitting

#### Deep Learning Model Hyperparameter Tuning:

- Number of Layers: Experiment with different numbers of RNN layers (typically between 1 and 3). You may also add Dense layers after the RNN layers to capture non-linear relationships

- Number of Units: Modify the number of units in each RNN layer. A higher number increases the model's capacity to learn complex patterns but can also lead to overfitting. Common values range between 16 to 100 units

- Dropout Layer: Introduce dropout layers to mitigate overfitting. Test dropout rates between 0.2 to 0.5 to observe their effect on the model's robustness

- Early Stopping: Prevent overfitting by controlling the number of training **epochs**. Set a suitable value to determine when training should stop based on validation loss

#### Advanced Hyperparameter Tuning (for those with prior knowledg on deep learning models):

- Activation Functions: Adjust activation functions in the Dense layers to test their effect on non-linearity capture. Use 'relu' for Dense layers and 'linear' for the output layer

- Learning Rate: Experiment with learning rates to control the model's convergence speed

- Batch Size: Test different batch sizes to evaluate their effect on training stability and convergence speed. Small batch sizes provide regularization benefits

## Complex LSTM Tuning

In [None]:
keras.backend.clear_session()

### Create Model

In [None]:
def create_LSTM_model(x_train, units):
    model = keras.Sequential()
    model.add(LSTM(units=units, input_shape=(x_train.shape[1], x_train.shape[2])))
    # Add a Dropout layer
    #model.add(Dropout(0.2))

    # Add a second LSTM layer
    #model.add(LSTM(units=?))

    # Add a Dense layer
    #model.add(Dense(16, activation='relu'))


    # Add the output layer
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_LSTM= create_LSTM_model(x_train=x_train_RNN_array, units=50)
simple_LSTM.summary()

### Train Model and Predict

In [None]:
history = simple_LSTM.fit(x_train_RNN_array, y_train , epochs=50, batch_size=20, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

#### Evaluate the training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate Prediction

In [None]:
y_pred= simple_LSTM.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale
y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()


# Lecture 6 Exercise - Example Solution
### **Learning Objectives**:
- Apply feature engineering techniques specifically for deep learning models.
- Building and Training RNN based Models (Simple RNN, LSTM, GRU)
- Implement sequence-to-sequence prediction for multi-step time series forecasting.
- Understand the importance of hyperparameters for LSTM and GRU models.

## Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.models import Sequential
from keras.layers import SimpleRNN, LSTM, GRU, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.metrics import RootMeanSquaredError, MeanAbsoluteError

from pydataset import data                                   

## Load Data and Select Predictors

### Load the Beijing_Air_Quality data set

In [None]:
path='Data_sets/Beijing_Air_Quality.csv'

# Load the dataset into a pandas DataFrame
bjn_a_q_data = pd.read_csv(path)#, parse_dates=['time'], index_col='time')

# Convert the year, month, day, hour columns to a datetime index
bjn_a_q_data['datetime'] = pd.to_datetime(bjn_a_q_data[['year', 'month', 'day', 'hour']])

# Set the datetime column as the index
bjn_a_q_data.set_index('datetime', inplace=True)

# Drop the year, month, day, hour and No columns
bjn_a_q_data.drop(columns=['year', 'month', 'day', 'hour', 'No'], inplace=True)

# set bjn_a_q_data frequency to hourly
bjn_a_q_data = bjn_a_q_data.asfreq('H')

### Handel missing values
### Resample the data to daily mean

In [None]:
air_q_df = bjn_a_q_data.copy()
# Drop the columns with non numeric values
air_q_df.drop(columns=['station','wd'], inplace=True)

# Count the number of missing values by columns
missing_values = air_q_df.isnull().sum()
print('missing values by columns:', missing_values)

# count number of rows with missing values
missing_value_rows = air_q_df.isnull().sum(axis=1)
missing_value_rows = missing_value_rows[missing_value_rows > 0].count()
print('\n number of rows with missing values:', missing_value_rows)

# Since PM2.5 is the target variable, we will drop the rows with missing values in the PM2.5 column
air_q_df.dropna(subset=['PM2.5'], inplace=True)

# Count the number of leftover missing values by columns
missing_values = air_q_df.isnull().sum()

# most of missing values are left in the CO and O3 columns
# Fill the missing values in the remaining columns with forward fill
air_q_df.fillna(method='ffill', inplace=True)

# Resample creat a daily avrege form for the data
air_q_df = air_q_df.resample('D').mean()  # 'D' stands for daily
air_q_df = air_q_df.round(2) # set to 2 digits after the decimal point
air_q_df = air_q_df.asfreq('D') # set frequency to daily
air_q_df.dropna(inplace=True)

# print(air_q_df.info())
air_q_df

### Visualize the time series 

In [None]:
# plot the data
air_q_df.plot(subplots=True,figsize=(10, 12))
#plt.title('Air Quality Data')
plt.show()

## Preprocessing the time series for Deep Learning application

### Select Predictors 
- Choose the target variable and exogenoues variables to serve as predictors
- Rearrange the DataFrame to include the target variable as the first column and the predictors as the remaining columns

In [None]:
# Select the PM2.5 column as the target variable and climate data as the predictor variables
target = 'PM2.5'
# For example- select the climate data columns to use as predictors
predictors = ['DEWP', 'TEMP', 'PRES', 'RAIN', 'WSPM'] 
# Reset the columns of the DataFrame to the selected columns
air_q_df= air_q_df[[target] + predictors] 

### Feature Engineering
#### Create additional meaningful features that help models capture temporal patterns
this is optional and may add additional information to the model, for example:
- Lagged Features
- Rolling Statistics 
- Seasonal Components

##### The model should be tested with or without additional features to evaluate their impact on the model's performance

In [None]:
## Example for possible feature engineering

# Adding lag features to capture previous values (choose specific variables and lags)
air_q_df['PM2.5_lag1'] = air_q_df['PM2.5'].shift(1)
air_q_df['WSPM_1'] = air_q_df['WSPM'].shift(1)

# Rolling Mean 
# Adding rolling mean to capture trend and seasonal components
air_q_df['PM2.5_rolling_mean'] = air_q_df['PM2.5'].rolling(window=5).mean()

# Adding seasonal components to capture seasonality
# day of the week
air_q_df['day_of_week'] = air_q_df.index.dayofweek

# Drop NaN values that arise from lagging and rolling operations
air_q_df.dropna(inplace=True)


### Scaling the data

In [None]:
# **Scaling the Data**
# choose the scaler to be used, for example MinMaxScaler or StandardScaler
scaler = MinMaxScaler()
 # fit and transform the scaler on the data without the time index
scaled_features = scaler.fit_transform(air_q_df[air_q_df.columns])

# Convert the scaled data into a DataFrame
air_q_scaled = pd.DataFrame(scaled_features, columns=[air_q_df.columns])
# reset the index to be the tiime index of the original data
air_q_scaled.index = air_q_df.index
print(air_q_scaled.info())

# plot the scaled data
air_q_scaled.plot(subplots=True,figsize=(9, 8))

## Reshaping Data for Deep Learning Models
- Convert feature data into a 3D format suitable for RNN-based models 
- Required shape: (samples, timesteps, features)

In [None]:
def create_window_features(df, window_size, variables=None, target_variable='PM2.5'):
    """
    Create lagged features for specified variables based on a given window size.
    Prepare time windows for an RNN model.
    """
    # Initialize empty DataFrame to collect the lagged values
    rnn_df = pd.DataFrame()

    # Add the target variable to the rnn_df
    rnn_df['target'] = df[target_variable]

    # Select the specified predictor variables for creating the time windows
    # if no variables are specified, use all columns including the laged values of target variable
    if variables is None:
        variables = df.columns 

    df = df[variables] 
    
    # Iterate over the window size in reverse
    for lag in range(window_size, 0, -1):
        shifted= df.shift(lag)
        rnn_df[f'Lag{lag}_values'] = shifted.apply(lambda x: x.values, axis=1)

    # Drop the first rows in the df, according to the window size used
    rnn_df = rnn_df[window_size:]
       
    return rnn_df

In [None]:
df_to_RNN=create_window_features(df=air_q_df, window_size=3, variables= None, target_variable='PM2.5')
df_to_RNN= df_to_RNN.round(2)
df_to_RNN.tail()

In [None]:
df_to_RNN=create_window_features(df=air_q_scaled, window_size=10, variables= predictors, target_variable='PM2.5')
print('df_to_RNN.shape',df_to_RNN.shape)


##  Splitting X,y Train Test (Using Scikit-Learn)

In [None]:
# Create the features and target (X, y) for the train, validation, and test data
X_set, y_set = df_to_RNN.drop('target', axis=1), df_to_RNN['target']
# convert y_set to 2D dataframe
y_set = y_set.to_frame()

# Split the data into train, validation, and test sets
# When shuffle=False,  train_test_split will split based on the original order last 15% of the rows will be assigned to test)
X_train, X_test, y_train, y_test = train_test_split(X_set, y_set, test_size=0.15, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, shuffle=False)

# print the shape of the train, validation, and test sets
print('Train set:', X_train.shape, y_train.shape)
print('Validation set:', X_val.shape, y_val.shape)
print('Test set:', X_test.shape, y_test.shape)


### Reshape X data to 3D to fit RNN input shape

In [None]:
# Flatten and reshape the training data
x_train_flattened = np.array([item for sublist in X_train.values for item in sublist])
x_train_RNN_array = x_train_flattened.reshape(X_train.shape[0], X_train.shape[1], -1)

# Flatten and reshape the test data
x_test_flattened = np.array([item for sublist in X_test.values for item in sublist])
x_test_RNN_array = x_test_flattened.reshape(X_test.shape[0], X_test.shape[1], -1)

# Flatten and reshape the validation data
x_val_flattened = np.array([item for sublist in X_val.values for item in sublist])
x_val_RNN_array = x_val_flattened.reshape(X_val.shape[0], X_val.shape[1], -1)

# Print the shapes of the reshaped arrays
print('x_train_RNN_array.shape:', x_train_RNN_array.shape, 
      'x_test_RNN_array.shape:', x_test_RNN_array.shape, 
      'x_val_RNN_array.shape:', x_val_RNN_array.shape)

In [None]:
## print the first element of the training set
x_train_RNN_array[0][0]

## Simple RNN

In [None]:
keras.backend.clear_session()

### Create Model 

In [None]:
def create_RNN_model(x_train, units):
    model = keras.Sequential()
    # RNNs Expect Input Data in 3D Shape: (samples=batch size, timesteps, features)
    model.add(SimpleRNN(units=units, return_sequences=False, input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(Dense(1, activation='linear')) # output layer predicting a single numerical value
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_RNN= create_RNN_model(x_train=x_train_RNN_array, units= 32)
simple_RNN.summary()

### Train Model and Predict

#### Training the model

In [None]:
history = simple_RNN.fit(x_train_RNN_array, y_train , epochs=50, batch_size=100, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

### Evaluate Training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate prediction on the test set

In [None]:
y_pred= simple_RNN.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale

y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()


## LSTM (Long Short Term Memory)

In [None]:
keras.backend.clear_session()

### Create Model

In [None]:
def create_LSTM_model(x_train, units):
    model = keras.Sequential()
    model.add(LSTM(units=units, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_LSTM= create_LSTM_model(x_train=x_train_RNN_array, units=16)
simple_LSTM.summary()

### Train Model and Predict

In [None]:
history = simple_LSTM.fit(x_train_RNN_array, y_train , epochs=50, batch_size=50, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

#### Evaluate the training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate Prediction

In [None]:
y_pred= simple_LSTM.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale
y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()


## GRU Gated Recurrent Unit

In [None]:
keras.backend.clear_session()

### Create Model 

In [None]:
def create_GRU_model(x_train, units):
    model = keras.Sequential()
    # RNNs Expect Input Data in 3D Shape: (samples=batch size, timesteps, features)
    model.add(GRU(units=units, return_sequences=False, input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(Dense(1)) # output layer predicting a single numerical value

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_GRU= create_GRU_model(x_train=x_train_RNN_array, units= 32)
simple_GRU.summary()

### Train Model and Predict

#### Training the model

In [None]:
history = simple_GRU.fit(x_train_RNN_array, y_train, epochs=50, batch_size=32, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

#### Evaluate Training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate prediction on the test set

In [None]:
y_pred= simple_GRU.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale
y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()


## Exercise Task: Experimenting with Preprocessing and Model Complexity for RNN Models

#### Understand how feature engineering and model parameters affect RNNs performance, and how model complexity impact prediction, overfitting, and generalization

#### Feature Engineering for RNN Models:

- Select Additional Variables: Choose additional variables from the dataset that may influence the target variable 

- Create Lagged Features and Rolling Statistics for Selected variables: This helps the model capture temporal dependencies and identify patterns over different time periods

- Control the Window Size: Evaluate how larger window sizes impact the model's complexity and potential overfitting

#### Deep Learning Model Hyperparameter Tuning:

- Number of Layers: Experiment with different numbers of RNN layers (typically between 1 and 3). You may also add Dense layers after the RNN layers to capture non-linear relationships

- Number of Units: Modify the number of units in each RNN layer. A higher number increases the model's capacity to learn complex patterns but can also lead to overfitting. Common values range between 16 to 100 units

- Dropout Layer: Introduce dropout layers to mitigate overfitting. Test dropout rates between 0.2 to 0.5 to observe their effect on the model's robustness

- Early Stopping: Prevent overfitting by controlling the number of training **epochs**. Set a suitable value to determine when training should stop based on validation loss

#### Advanced Hyperparameter Tuning (for those with prior knowledg on deep learning models):

- Activation Functions: Adjust activation functions in the Dense layers to test their effect on non-linearity capture. Use 'relu' for Dense layers and 'linear' for the output layer

- Learning Rate: Experiment with learning rates to control the model's convergence speed

- Batch Size: Test different batch sizes to evaluate their effect on training stability and convergence speed. Small batch sizes provide regularization benefits

## Complex LSTM Tuning

In [None]:
keras.backend.clear_session()

### Create Model

In [None]:
def create_LSTM_model(x_train, units):
    model = keras.Sequential()
    model.add(LSTM(units=units, input_shape=(x_train.shape[1], x_train.shape[2])))
    # Add a Dropout layer
    #model.add(Dropout(0.2))
    # Add a Dense layer
    model.add(Dense(16, activation='relu'))
    # Add the output layer
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=[RootMeanSquaredError(), MeanAbsoluteError()])
    return model

In [None]:
simple_LSTM= create_LSTM_model(x_train=x_train_RNN_array, units=50)
simple_LSTM.summary()

### Train Model and Predict

In [None]:
history = simple_LSTM.fit(x_train_RNN_array, y_train , epochs=50, batch_size=20, 
                            validation_data=(x_val_RNN_array, y_val), verbose=1)

#### Evaluate the training

In [None]:
# Plot the loss by epoch of train and validation data
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='validation_loss')
plt.legend()
plt.title('Loss by Epoch')
plt.show()

### Evaluate Prediction

In [None]:
y_pred= simple_LSTM.predict(x_test_RNN_array)
# inverse transform the scaled y values to the original scale
y_pred= pd.Series(y_pred.flatten(), index=y_test.index)

# evaluate the model eror 
rmse = np.sqrt(np.mean((y_test.values - y_pred.values)**2))

# Plot the forecast vs actual
time_index= y_test.index
plt.figure(figsize=(8, 3))
plt.plot(time_index, y_test, label='actual')
plt.plot(time_index, y_pred, label='forecast')
# print the RMSE on the plot
plt.title(f'Forecast vs Actual - RMSE: {rmse:.2f}')
plt.legend()
plt.show()
