# Decision Tree Regressor

```plaintext

INITIALIZE MinMaxScaler for scaler_X and scaler_y

SET X to 'YearMonthWeek' values from agg_df, reshaped to (-1, 1)
SET y to 'Rate' values from agg_df, reshaped to (-1, 1)

FIT and TRANSFORM X and y with their respective scalers, store the results in X_scaled and y_scaled respectively

SET param_grid with range of 'max_depth', 'min_samples_split' and 'min_samples_leaf' values

CREATE DecisionTreeRegressor object dt_reg

CREATE TimeSeriesSplit object tscv with 5 splits

CREATE GridSearchCV object grid with dt_reg, param_grid and tscv, and fit it on X_scaled and y_scaled

PRINT best parameters and best RMSE of grid

RETRIEVE best model from grid

DEFINE FUNCTION compute_accuracy that takes a row:
    IF 'RATE_actual' in row equals to 0:
        RETURN NaN
    ELSE:
        CALCULATE error and error_proportion
        RETURN (1 - error_proportion) * 100
    ENDIF
END FUNCTION

SET weeks to 12

GET max 'YearMonthWeek' from agg_df and STORE it in last_date
GENERATE date range from last_date for weeks+1 periods with 'W' frequency, starting from the second date and STORE it in future_dates
CONVERT future_dates to ordinal and RESHAPE it to (-1, 1), STORE it in future_dates_ordinal

SCALE future_dates_ordinal using scaler_X and STORE it in future_dates_ordinal_scaled

PREDICT using best_model on future_dates_ordinal_scaled and STORE it in forecasted_values_scaled

INVERSE TRANSFORM forecasted_values_scaled using scaler_y and RESHAPE it to (-1, 1), STORE it in forecasted_values

CREATE DataFrame df_forecasted from future_dates and forecasted_values

ROUND 'Rate' in df_forecasted to 2 decimal places


# ARIMA

```plaintext
DEFINE FUNCTION calculate_RMSE that takes y_true, y_pred:
    RETURN the square root of mean_squared_error of y_true and y_pred
END FUNCTION

DEFINE FUNCTION ARIMA_Execute that takes data:
    # Fit an auto_arima model
    SET arima_model to auto_arima with data, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True, alpha=0.07, m=7

    # Print the summary of the model
    PRINT the summary of arima_model

    # Forecast
    SET predictions to arima_model's prediction with n_periods equals to the length of data

    # Calculate the RMSE
    SET RMSE_ARIMA_data to calculate_RMSE with data and predictions
    PRINT "RMSE: %.3f" % RMSE_ARIMA_data

    RETURN arima_model and RMSE_ARIMA_data
END FUNCTION

# Get the 'Rate' column values
SET data to 'Rate' values from agg_df

# Run ARIMA model
SET model and ARIMA_rmse to ARIMA_Execute with data

# ARIMAX

```plaintext
# Creating Features

SET agg_df['Rate_rolled_3w'] to rolling mean of 'Rate' over a window of 2, minimum period of 0 and shift of 1

SET 'Covid' column in agg_df to 0
FOR each row in agg_df where 'YearMonthWeek' is between '2020-01' and '2023-01':
    SET 'Covid' to 1
END FOR

FOR i in range from 1 to 4:
    SET agg_df['Rate_lag_' concatenated with i] to 'Rate' column shifted by i
END FOR

SET agg_df['Rate_mean_rolled_3w'] to rolling mean of 'Rate' over a window of 3 and shift of 1
SET agg_df['Rate_std_rolled_3w'] to rolling standard deviation of 'Rate' over a window of 3 and shift of 1

SET exogenous_features to ['Rate_rolled_3w', 'Covid', 'Rate_lag_1', 'Rate_lag_2', 'Rate_lag_3', 'Rate_mean_rolled_3w', 'Rate_std_rolled_3w']


# ARIMAX Model

DEFINE FUNCTION calculate_RMSE that takes y_true, y_pred:
    RETURN the square root of mean_squared_error between y_true and y_pred
END FUNCTION

DEFINE FUNCTION ARIMAX_Execute that takes endog, exog:
    # Fit an auto_arima model
    SET arimax_model to auto_arima model with endog and exog, seasonal=False, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True, alpha=0.07, m=7

    # Print the summary of the model
    PRINT summary of arimax_model

    # Forecast
    SET data_forecast to arimax_model's prediction in sample with exogenous=exog

    # Calculate the RMSE
    SET RMSE_ARIMAX_data to calculate_RMSE with endog and data_forecast
    PRINT "RMSE: %.3f" % RMSE_ARIMAX_data

    RETURN arimax_model and RMSE_ARIMAX_data
END FUNCTION

# Get the 'Rate' column values as endogenous variable
SET endog to 'Rate' values from agg_df

# Get the exogenous features
SET exog to exogenous_features values from agg_df

# Run ARIMA model
SET model and ARIMAX_rmse to ARIMAX_Execute with endog and exog

```

# LSTM

```plaintext
DEFINE FUNCTION calculate_RMSE that takes y_true, y_pred:
    RETURN the square root of mean_squared_error between y_true and y_pred
END FUNCTION

DEFINE FUNCTION create_dataset that takes dataset, look_back=1:
    INITIALIZE dataX and dataY as empty lists
    FOR i in range from 0 to (length of dataset - look_back - 1):
        SET a to the i-th to (i + look_back)-th elements of dataset
        APPEND a to dataX
        APPEND (i + look_back)-th element of dataset to dataY
    RETURN dataX and dataY as numpy arrays
END FUNCTION

DEFINE FUNCTION create_LSTM_model that takes trainX, trainY, testX, testY, epochs, lstm_layers:
    INITIALIZE a Sequential model
    FOR i in range from 0 to (length of lstm_layers):
        IF i equals to 0:
            ADD LSTM layer with lstm_layers[i] units, return_sequences=True, 
            input_shape=(look_back, trainX.shape[2]), activation='tanh', recurrent_activation='hard_sigmoid', kernel_regularizer=l2(0.01)
        ELSE IF i equals to (length of lstm_layers - 1):
            ADD LSTM layer with lstm_layers[i] units, activation='tanh', kernel_regularizer=l2(0.01)
        ELSE:
            ADD LSTM layer with lstm_layers[i] units, return_sequences=True, activation='tanh', recurrent_activation='hard_sigmoid', kernel_regularizer=l2(0.01)
        ADD Dropout layer with 0.2 rate
    ADD Dense layer with 1 unit
    COMPILE the model with 'mean_squared_error' loss and Adam optimizer with learning_rate=0.0005
    SET es to EarlyStopping with monitor='val_loss', mode='min', verbose=1, patience=20
    FIT the model to trainX, trainY with epochs=epochs, batch_size=128, verbose=2, validation_data=(testX, testY), callbacks=[es] and store the result to history
    RETURN model and history
END FUNCTION

DEFINE FUNCTION plot_train_val_loss that takes history:
    CREATE a plot with figure size (10, 6)
    PLOT history.history['loss'] as 'Train Loss'
    PLOT history.history['val_loss'] as 'Validation Loss'
    SET plot title as 'Model loss progress during training and validation'
    SET plot xlabel as 'Epoch' and ylabel as 'Training and Validation Loss'
    ADD legend to plot
    SHOW plot
END FUNCTION

# Initialize two separate scalers for normalization
SET scaler_rate to MinMaxScaler with feature_range=(0, 1)

# Fit and transform 'RATE' and 'during_covid' separately
SET rate_scaled to the fit_transform result of scaler_rate on agg_df[['Rate']]

# Concatenate the scaled data
SET dataset to the concatenation of rate_scaled along axis 1

# Split into train and test sets
SET train_size to the integer part of (0.7 times the length of dataset)
SET test_size to the difference of the length of dataset and train_size
SET train to the first train_size elements of dataset
SET test to the elements from train_size to the end of dataset

# Reshape into X=t and Y=t+1, timestep  look_back
SET look_back to 5
SET trainX, trainY to the result of create_dataset on train with look_back
SET testX, testY to the result of create_dataset on test with look_back

# Reshape input to be [samples, time steps, features]
RESHAPE trainX to (trainX.shape[0], look_back, trainX.shape[2])
RESHAPE testX to (testX.shape[0], look_back, testX.shape[2])

SET epochs_list to [50, 100, 150, 300]
SET lstm_layers_list to 
    [[64, 64, 32, 32, 16, 16, 8, 8, 4, 4, 2, 2], 
    [32, 32, 16, 16, 8, 8, 4, 4, 2, 2], 
    [16, 16, 8, 8, 4, 4, 2, 2], 
    [8, 8, 4, 4, 2, 2], 
    [4, 4, 2, 2], 
    [2, 2]]
    
INITIALIZE rmse_results as empty dictionary
INITIALIZE history_results as empty dictionary

FOR each epochs in epochs_list:
    PRINT "Training for {epochs} epochs..."
    FOR each lstm_layers in lstm_layers_list:
        PRINT "Training with LSTM layers: {lstm_layers}"
        SET model, history to the result of create_LSTM_model or create_BI_LSTM_model on trainX, trainY, testX, testY, epochs, lstm_layers
        ADD the history to history_results with key "{epochs} epochs, {lstm_layers} layers"
        PLOT history.history['loss'] and history.history['val_loss'] with corresponding labels
        SET trainPredict to the prediction result of model on trainX
        SET testPredict to the prediction result of model on test(Pseudocode continued)

        # inverse_transform
        SET trainPredict to the inverse_transform result of scaler_rate on trainPredict
        SET trainY_orig to the inverse_transform result of scaler_rate on trainY (reshape trainY first)
        SET testPredict to the inverse_transform result of scaler_rate on testPredict
        SET testY_orig to the inverse_transform result of scaler_rate on testY (reshape testY first)

        # Calculate mean squared error
        SET trainScore to the result of calculate_RMSE on trainY_orig[0] and trainPredict[:, 0]
        PRINT 'Train Score: {trainScore:.2f} RMSE for {epochs} epochs'
        SET testScore to the result of calculate_RMSE on testY_orig[0] and testPredict[:, 0]
        PRINT 'Test Score: {testScore:.2f} RMSE for {epochs} epochs'

        SET rmse_results["{epochs} epochs, {lstm_layers} layers"] to a dictionary containing 
        'Train RMSE' as trainScore and 'Test RMSE' as testScore

FOR each key-value pair in history_results:
    PLOT val['loss'] and val['val_loss'] with corresponding labels

# Configure and show the plot
CREATE a plot with figure size (20, 10)
SET plot title as 'Model loss progress during training and validation'
SET plot xlabel as 'Epoch' and ylabel as 'Training and Validation Loss'
ADD legend to plot
SHOW plot

# Convert the dictionary to a DataFrame for easy display
SET rmse_df to DataFrame constructed from rmse_results (transpose the DataFrame)
PRINT rmse_df
```

# BI-LSTM

```plaintext
CREATE FUNCTION create_dataset TAKING dataset, look_back=1
    CREATE dataX, dataY AS empty lists
    FOR i IN RANGE 0 TO (LENGTH of dataset - look_back - 1)
        SET a AS dataset FROM i TO (i + look_back)
        ADD a TO dataX
        ADD (i + look_back) ELEMENT OF dataset TO dataY
    END FOR
    RETURN dataX, dataY AS numpy arrays
END FUNCTION

CREATE FUNCTION create_BiLSTM_model TAKING trainX, trainY, testX, testY, epochs, lstm_layers
    INITIALIZE model AS Sequential

    FOR i IN RANGE 0 TO (LENGTH of lstm_layers)
        IF i EQUALS 0
            ADD Bidirectional LSTM layer WITH units=lstm_layers[0], return_sequences=True, input_shape=(trainX.shape[1], trainX.shape[2]) TO model
            ADD Dropout layer WITH 0.2 rate TO model
        ELSE IF i EQUALS (LENGTH of lstm_layers - 1)
            ADD Bidirectional LSTM layer WITH units=lstm_layers[i], return_sequences=False, activation='tanh', kernel_regularizer=l2(0.01) TO model
        ELSE
            ADD Bidirectional LSTM layer WITH units=lstm_layers[i], return_sequences=True, activation='tanh', recurrent_activation='hard_sigmoid', kernel_regularizer=l2(0.01) TO model
            ADD Dropout layer WITH 0.2 rate TO model
        END IF
    END FOR

    ADD Dense layer WITH 1 unit TO model
    COMPILE model WITH 'mean_squared_error' loss AND Adam optimizer WITH learning_rate=0.0005

    SET es AS EarlyStopping MONITORING 'val_loss', MODE='min', VERBOSE=1, PATIENCE=20

    FIT model ON trainX, trainY WITH epochs=epochs, batch_size=128, verbose=2, validation_data=(testX, testY), callbacks=[es] AND STORE the result AS history

    RETURN model, history
END FUNCTION

```