In [1]:
# Import Required Libraries

import pandas as pd
import math
import numpy as np

import matplotlib.pyplot as plt

import plotly.graph_objs as go
from plotly.offline import iplot

from prophet import Prophet
import holidays
from prophet.diagnostics import cross_validation
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import tensorflow as tf
import os

from sklearn.preprocessing import MinMaxScaler
import joblib

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [4]:
def is_weekend(ds):
    date = pd.to_datetime(ds)
    # Return True for Saturday (5) and Sunday (6), False otherwise
    return date.weekday() >= 5

def df_to_X_y(df, window_size=6):
    df_as_np = df.to_numpy()
    X = []
    y = []
    for i in range(len(df_as_np) - window_size):
        row = [r for r in df_as_np[i:i + window_size]]
        X.append(row)
        label = df_as_np[i + window_size][6]  # 'Entry' is the 7th column (index 6)
        y.append(label)
    return np.array(X), np.array(y)

def evaluate_model(test, test_forecast):
    # Evaluate performance
    mse = mean_squared_error(y_true=test['y'],  y_pred=test_forecast['yhat'])
    rmse = np.sqrt(mean_squared_error(y_true=test['y'], y_pred=test_forecast['yhat']))
    mae = mean_absolute_error(y_true=test['y'], y_pred=test_forecast['yhat'])
    r2 = r2_score(y_true=test['y'], y_pred=test_forecast['yhat'])
    return mse, rmse, mae, r2

def evaluatel(y_test_inv, y_pred_inv):
        # Calculate evaluation metrics
    mse = mean_squared_error(y_test_inv, y_pred_inv)
    rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
    mae = mean_absolute_error(y_test_inv, y_pred_inv)
    r2 = r2_score(y_test_inv, y_pred_inv)
    
    return mse, rmse, mae, r2

def prophet_model(train, test, df):
    # Set evaluation metrics to 0
    p_rmse = 0
    p_mse = 0
    p_mae = 0
    p_r2 = 0

    # Create holidays dataframe
    holiday = pd.DataFrame([])
    for date, name in sorted(holidays.Philippines(years=[2022, 2023]).items()):
        holiday = pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': name}, index=[0])], ignore_index=True)
    holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

    # Initialize the Prophet model
    m = Prophet(
        yearly_seasonality=True,
        daily_seasonality=False,
        weekly_seasonality=True,
        holidays=holiday,
        seasonality_mode='multiplicative',
        scaling='minmax'
    )

    # Add the regressors
    m.add_seasonality(name='daily_is_weekend', period=1, fourier_order=4, condition_name='weekend')
    m.add_seasonality(name='daily_is_weekday', period=1, fourier_order=4, condition_name='weekday')
    m.add_regressor('off_hour')
    m.add_regressor('rain_amount')

    # Fit the model on the training data
    m.fit(train)

    # Make predictions for Test set
    test_forecast = m.predict(test)

    # Remove negative forecasts
    test_forecast['yhat'] = test_forecast['yhat'].apply(lambda x: max(x, 0))
    test_forecast['yhat_lower'] = test_forecast['yhat_lower'].apply(lambda x: max(x, 0))
    test_forecast['yhat_upper'] = test_forecast['yhat_upper'].apply(lambda x: max(x, 0))

    # Round forecast values
    test_forecast['yhat'] = test_forecast['yhat'].round()
    
    return test_forecast

def prophet_future(train,df):
      # Set evaluation metrics to 0
    p_rmse = 0
    p_mse = 0
    p_mae = 0
    p_r2 = 0

    # Create holidays dataframe
    holiday = pd.DataFrame([])
    for date, name in sorted(holidays.Philippines(years=[2022, 2023]).items()):
        holiday = pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': name}, index=[0])], ignore_index=True)
    holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

    # Initialize the Prophet model
    m = Prophet(
        yearly_seasonality=True,
        daily_seasonality=False,
        weekly_seasonality=True,
        holidays=holiday,
        seasonality_mode='multiplicative',
        scaling='minmax'
    )

    # Add the regressors
    m.add_seasonality(name='daily_is_weekend', period=1, fourier_order=4, condition_name='weekend')
    m.add_seasonality(name='daily_is_weekday', period=1, fourier_order=4, condition_name='weekday')
    m.add_regressor('off_hour')
    m.add_regressor('rain_amount')

    # Fit the model on the training data
    m.fit(train)
    
    # Create a future DataFrame with hourly intervals for the desired forecast period
    future = m.make_future_dataframe(periods=150, freq='D')
    future['hour'] = pd.to_datetime(future['ds']).dt.hour
    future['is_weekend'] = df['is_weekend']
    future['weekday'] = future['ds'].apply(is_weekend)
    future['weekend'] = ~future['ds'].apply(is_weekend)
    future['rain_amount'] = df['rain_amount']
    future['off_hour'] = future['hour'].apply(lambda x: 1 if (x >= 23) or (x <= 3) else 0)
    future = future[future['ds'].dt.hour < 23]
    future = future[future['ds'].dt.hour > 3]

    # Make predictions for future set
    future_forecast = m.predict(future)
    
    return future_forecast

def lstm_model(df, X_train, y_train, X_test, y_test, X_val, y_val):
    model = Sequential()
    model.add(InputLayer((X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='linear'))
    model.add(Dense(1, activation='linear'))
    model.summary()

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Define the ModelCheckpoint callback with the correct file path
    os.makedirs('model', exist_ok=True)
    cp1 = ModelCheckpoint(filepath='../Hybrid Model/model/best_lstm.keras', save_best_only=True, monitor='val_loss', mode='min')

    # Use early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Fit the model
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=[cp1, early_stopping])

    # Assuming df is the original DataFrame with the 'Date Time' column
    # Extract the 'Date Time' column for the entire dataset
    date_time_test = df['Date Time']

    # Load the scaler for inverse transformation
    scaler_entry = joblib.load('../Hybrid Model/model/scaler_entry.pkl')

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Create a DataFrame to hold the predictions and actual values
    df_pred = pd.DataFrame(y_pred, columns=['Entry'])
    df_actual = pd.DataFrame(y_test, columns=['Entry'])

    # Inverse transform the 'Entry' column
    y_pred_inv = scaler_entry.inverse_transform(df_pred)
    y_test_inv = scaler_entry.inverse_transform(df_actual)

    # Ensure date_time_test matches the length of y_test_inv and y_pred_inv
    date_time_test = date_time_test[-len(y_test_inv):]

    # Round the predictions to the nearest whole number and ensure non-negative values
    y_pred_inv = np.round(np.maximum(y_pred_inv, 0))
    y_test_inv = np.round(np.maximum(y_test_inv, 0))

    return y_pred_inv

def model_lstm(df, X_train, y_train, X_val, y_val):
    model = Sequential()
    model.add(InputLayer((X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='linear'))
    model.add(Dense(1, activation='linear'))
    model.summary()

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Define the ModelCheckpoint callback with the correct file path
    os.makedirs('model', exist_ok=True)
    cp1 = ModelCheckpoint(filepath='../Hybrid Model/model/best_lstm.keras', save_best_only=True, monitor='val_loss', mode='min')

    # Use early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Fit the model
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=[cp1, early_stopping])
    
    return model

### Prophet Model

In [5]:
# Import dataset
df = pd.read_csv('2223TaftProphet.csv', parse_dates=[0])

# Rename header to Prophet's requirements
df.reset_index()
df = df.rename(columns={'Datetime':'ds', 'Entry':'y'})

# Add additional regressors as columns in the dataframe
df['hour'] = pd.to_datetime(df['ds']).dt.hour
df['off_hour'] = df['hour'].apply(lambda x: 1 if (x >= 23) or (x <= 3) else 0)
df['weekday'] = ~df['ds'].apply(is_weekend)
df['weekend'] = df['ds'].apply(is_weekend)

# Split the dataset to training and testing sets
train_len = math.floor((df.shape[0]*80)/100)
train = df[:train_len]
test = df[train_len:]

test_forecast = prophet_model(train, test, df)
prophet_future = prophet_future(train, df)
prophet_eval = evaluate_model(test, test_forecast)
print(prophet_eval)


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


errors='ignore' is deprecated and will raise in a future version. Use to_datetime without passing `errors` and catch exceptions explicitly instead

01:58:27 - cmdstanpy - INFO - Chain [1] start processing
01:59:07 - cmdstanpy - INFO - Chain [1] done processing

errors='ignore' is deprecated and will raise in a future version. Use to_datetime without passing `errors` and catch exceptions explicitly instead

01:59:13 - cmdstanpy - INFO - Chain [1] start processing
01:59:42 - cmdstanpy - INFO - Chain [1] done processing


(388428.54452054796, 623.2403585460011, 424.76849315068495, 0.802910465276687)


### LSTM Model

In [6]:
# Load your time series data
df2 = pd.read_csv('2223TaftLSTM.csv')

# Set if holiday
ph_holidays = holidays.PH()
df2['is_holiday'] = df2['Date'].apply(lambda x: 1 if x in ph_holidays else 0)
df2.head(5)

# Combine 'Date' and 'Time' into a new column 'DateAndTime'
df2['DateAndTime'] = pd.to_datetime(df2['Date'] + ' ' + df2['Time'])

# Drop unneeded columns
df2.drop(['Date', 'Time', 'rain_amount', 'rain_desc'], axis=1, inplace=True)

 # Define additional features
df2['Date Time'] = pd.to_datetime(df2['DateAndTime'], format='%d.%m.%Y %H.%M.%S')
df2['hour'] = df2['Date Time'].dt.hour
df2['day_of_week'] = df2['Date Time'].dt.dayofweek
df2['month'] = df2['Date Time'].dt.month
df2['year'] = df2['Date Time'].dt.year
df2.drop(['DateAndTime', 'Day', 'is_weekend'], axis=1, inplace=True)
df2['is_weekend'] = df2['day_of_week'].apply(lambda x: 1 if x in [5, 6] else 0)
model_features = ['hour', 'day_of_week', 'is_weekend', 'month', 'year', 'rain_class', 'Entry']
df_model = df2[model_features]

# Normalize all features except 'Entry'
scaler = MinMaxScaler(feature_range=(0, 1))
df_model[df_model.columns[:-1]] = scaler.fit_transform(df_model[df_model.columns[:-1]])

# Save the scaler for the features
joblib.dump(scaler, 'Users/Models/hybrid/model/scaler_features.pkl')

# Normalize the 'Entry' column separately
scaler_entry = MinMaxScaler(feature_range=(0, 1))
df_model['Entry'] = scaler_entry.fit_transform(df_model[['Entry']])

# Save the scaler for the 'Entry' column
joblib.dump(scaler_entry, 'Users/Models/hybrid/model/scaler_entry.pkl')

#
X, y = df_to_X_y(df_model)

# Determine the split points
train_split_point = int(len(X) * 0.8)
valntest_split_point = int(len(X) * 0.9)

# Split the data
X_train, X_val, X_test = X[:train_split_point], X[train_split_point:valntest_split_point], X[valntest_split_point:]
y_train, y_val, y_test = y[:train_split_point], y[train_split_point:valntest_split_point], y[valntest_split_point:]

lstm_forecast = lstm_model(df2, X_train, y_train, X_test, y_test, X_val, y_val)
# lstm_eval = evaluatel(y_test_inv, y_pred_inv)
# print(lstm_eval)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



FileNotFoundError: [Errno 2] No such file or directory: 'Users/Models/hybrid/model/scaler_features.pkl'

In [25]:
lstm_forecast = pd.DataFrame(lstm_forecast, columns=['LSTM'])
lstm_forecast

Unnamed: 0,LSTM
0,126.0
1,2543.0
2,4684.0
3,5723.0
4,4638.0
...,...
1455,1923.0
1456,1512.0
1457,102.0
1458,8.0


In [31]:
test_forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,All Saints' Day,All Saints' Day_lower,All Saints' Day_upper,Black Saturday,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2023-08-08 04:00:00,1498.353585,0.000000,508.945403,1498.353585,1498.353585,0.0,0.0,0.0,0.0,...,0.083584,0.083584,0.083584,-0.038524,-0.038524,-0.038524,0.0,0.0,0.0,0.0
1,2023-08-08 05:00:00,1498.412067,2161.163602,3295.391618,1498.412067,1498.412067,0.0,0.0,0.0,0.0,...,0.080157,0.080157,0.080157,-0.038042,-0.038042,-0.038042,0.0,0.0,0.0,2736.0
2,2023-08-08 06:00:00,1498.470548,3908.584167,5028.748078,1498.470548,1498.470548,0.0,0.0,0.0,0.0,...,0.076540,0.076540,0.076540,-0.037557,-0.037557,-0.037557,0.0,0.0,0.0,4447.0
3,2023-08-08 07:00:00,1498.529029,4213.663386,5402.845338,1498.529029,1498.529029,0.0,0.0,0.0,0.0,...,0.072788,0.072788,0.072788,-0.037069,-0.037069,-0.037069,0.0,0.0,0.0,4816.0
4,2023-08-08 08:00:00,1498.587511,3721.205885,4907.789373,1498.587511,1498.587511,0.0,0.0,0.0,0.0,...,0.068958,0.068958,0.068958,-0.036578,-0.036578,-0.036578,0.0,0.0,0.0,4295.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2915,2023-12-31 19:00:00,1702.746109,0.000000,5699.036353,-598.093715,4185.276050,0.0,0.0,0.0,0.0,...,-0.080696,-0.080696,-0.080696,-0.018686,-0.018686,-0.018686,0.0,0.0,0.0,2302.0
2916,2023-12-31 20:00:00,1702.804590,0.000000,4446.985978,-598.984946,4186.733250,0.0,0.0,0.0,0.0,...,-0.071715,-0.071715,-0.071715,-0.018191,-0.018191,-0.018191,0.0,0.0,0.0,1760.0
2917,2023-12-31 21:00:00,1702.863072,0.000000,1829.751734,-599.876177,4188.190451,0.0,0.0,0.0,0.0,...,-0.062326,-0.062326,-0.062326,-0.017698,-0.017698,-0.017698,0.0,0.0,0.0,673.0
2918,2023-12-31 22:00:00,1702.921553,0.000000,468.652501,-600.767408,4189.647651,0.0,0.0,0.0,0.0,...,-0.052610,-0.052610,-0.052610,-0.017206,-0.017206,-0.017206,0.0,0.0,0.0,0.0


### Hybrid Model

In [36]:
# Get the number of rows
num_rows = len(test_forecast)

# Calculate the midpoint
midpoint = num_rows // 2

# Take the latter half
prophet_forecast = test_forecast[midpoint:]
hybrid_actual = test[midpoint:]

# Reset the index of the DataFrame and drop the old index
prophet_forecast.reset_index(drop=True, inplace=True)
hybrid_actual.reset_index(drop=True, inplace=True)

prophet_forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,All Saints' Day,All Saints' Day_lower,All Saints' Day_upper,Black Saturday,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2023-10-20 04:00:00,1600.813013,0.000000,277.743296,821.874572,2424.942380,0.0,0.0,0.0,0.0,...,-0.026419,-0.026419,-0.026419,-0.061949,-0.061949,-0.061949,0.0,0.0,0.0,0.0
1,2023-10-20 05:00:00,1600.871495,1256.668278,4205.689684,821.321303,2425.626313,0.0,0.0,0.0,0.0,...,-0.021410,-0.021410,-0.021410,-0.062757,-0.062757,-0.062757,0.0,0.0,0.0,2667.0
2,2023-10-20 06:00:00,1600.929976,2297.714886,6907.378303,820.768034,2426.310245,0.0,0.0,0.0,0.0,...,-0.016103,-0.016103,-0.016103,-0.063560,-0.063560,-0.063560,0.0,0.0,0.0,4507.0
3,2023-10-20 07:00:00,1600.988458,2400.356843,7543.304765,820.214765,2426.994178,0.0,0.0,0.0,0.0,...,-0.010556,-0.010556,-0.010556,-0.064359,-0.064359,-0.064359,0.0,0.0,0.0,4914.0
4,2023-10-20 08:00:00,1601.046939,2233.062846,6777.227598,819.661496,2427.678110,0.0,0.0,0.0,0.0,...,-0.004829,-0.004829,-0.004829,-0.065154,-0.065154,-0.065154,0.0,0.0,0.0,4371.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,2023-12-31 19:00:00,1702.746109,0.000000,5699.036353,-598.093715,4185.276050,0.0,0.0,0.0,0.0,...,-0.080696,-0.080696,-0.080696,-0.018686,-0.018686,-0.018686,0.0,0.0,0.0,2302.0
1456,2023-12-31 20:00:00,1702.804590,0.000000,4446.985978,-598.984946,4186.733250,0.0,0.0,0.0,0.0,...,-0.071715,-0.071715,-0.071715,-0.018191,-0.018191,-0.018191,0.0,0.0,0.0,1760.0
1457,2023-12-31 21:00:00,1702.863072,0.000000,1829.751734,-599.876177,4188.190451,0.0,0.0,0.0,0.0,...,-0.062326,-0.062326,-0.062326,-0.017698,-0.017698,-0.017698,0.0,0.0,0.0,673.0
1458,2023-12-31 22:00:00,1702.921553,0.000000,468.652501,-600.767408,4189.647651,0.0,0.0,0.0,0.0,...,-0.052610,-0.052610,-0.052610,-0.017206,-0.017206,-0.017206,0.0,0.0,0.0,0.0


In [37]:
hybrid_forecast = prophet_forecast[['ds', 'yhat']].rename(columns={'yhat': 'prophet'})
hybrid_forecast['lstm'] = lstm_forecast['LSTM']
hybrid_forecast['y'] = hybrid_actual['y']

hybrid_forecast

Unnamed: 0,ds,prophet,lstm,y
0,2023-10-20 04:00:00,0.0,126.0,0
1,2023-10-20 05:00:00,2667.0,2543.0,2894
2,2023-10-20 06:00:00,4507.0,4684.0,4339
3,2023-10-20 07:00:00,4914.0,5723.0,5548
4,2023-10-20 08:00:00,4371.0,4638.0,4502
...,...,...,...,...
1455,2023-12-31 19:00:00,2302.0,1923.0,1681
1456,2023-12-31 20:00:00,1760.0,1512.0,297
1457,2023-12-31 21:00:00,673.0,102.0,0
1458,2023-12-31 22:00:00,0.0,8.0,0


In [47]:
# Define weights
weight_prophet = 0.9
weight_lstm = 0.1
# Calculate the hybrid forecast
hybrid_forecast['yhat'] = (
    weight_prophet * hybrid_forecast['prophet'] + 
    weight_lstm * hybrid_forecast['lstm']
)

actual = hybrid_forecast['y']
forecast = hybrid_forecast['yhat']

#Evaluate hybrid forecast
mse, rmse, mae, r2 = evaluate_model(hybrid_forecast, hybrid_forecast)

print("Hybrid Forecast Evaluation")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")

Hybrid Forecast Evaluation
MSE: 374451.8437
RMSE: 611.9247
MAE: 423.3206
R²: 0.7960


### Comparison Graphs

In [48]:
# Create traces for LSTM, Prophet, and Hybrid forecasts
lstm_trace = go.Scatter(x=hybrid_forecast['ds'],
                        y=hybrid_forecast['lstm'],
                        mode='lines',
                        name='LSTM')

prophet_trace = go.Scatter(x=hybrid_forecast['ds'],
                           y=hybrid_forecast['prophet'],
                           mode='lines',
                           name='Prophet')

hybrid_trace = go.Scatter(x=hybrid_forecast['ds'],
                          y=hybrid_forecast['yhat'],
                          mode='lines',
                          name='Hybrid')

# Define layout
layout = go.Layout(title={'text': 'LSTM vs Prophet vs Hybrid Forecast',
                          'y': 0.9,
                          'x': 0.5,
                          'xanchor': 'center',
                          'yanchor': 'top'},
                   xaxis=dict(title='Date'),
                   yaxis=dict(title='Entry'),
                   template='plotly_dark')

# Combine traces
data = [lstm_trace, prophet_trace, hybrid_trace,]

# Create figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
iplot(fig)

In [49]:
# Create traces for LSTM, Prophet, and Hybrid forecasts
lstm_trace = go.Scatter(x=hybrid_forecast['ds'],
                        y=hybrid_forecast['lstm'],
                        mode='lines',
                        name='LSTM')

prophet_trace = go.Scatter(x=hybrid_forecast['ds'],
                           y=hybrid_forecast['prophet'],
                           mode='lines',
                           name='Prophet')

actual_trace = go.Scatter(x=hybrid_forecast['ds'],
                          y=hybrid_forecast['y'],
                          mode='lines',
                          name='Actual')

# Define layout
layout = go.Layout(title={'text': 'LSTM vs Prophet vs Actual Forecast',
                          'y': 0.9,
                          'x': 0.5,
                          'xanchor': 'center',
                          'yanchor': 'top'},
                   xaxis=dict(title='Date'),
                   yaxis=dict(title='Entry'),
                   template='plotly_dark')

# Combine traces
data = [lstm_trace, prophet_trace, actual_trace,]

# Create figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
iplot(fig)

In [50]:
# Create traces for LSTM, Prophet, and Hybrid forecasts
hybrid_trace = go.Scatter(x=hybrid_forecast['ds'],
                          y=hybrid_forecast['yhat'],
                          mode='lines',
                          name='Hybrid')

actual_trace = go.Scatter(x=hybrid_forecast['ds'],
                          y=hybrid_forecast['y'],
                          mode='lines',
                          name='Actual')

# Define layout
layout = go.Layout(title={'text': 'Hybrid vs Actual Forecast',
                          'y': 0.9,
                          'x': 0.5,
                          'xanchor': 'center',
                          'yanchor': 'top'},
                   xaxis=dict(title='Date'),
                   yaxis=dict(title='Entry'),
                   template='plotly_dark')

# Combine traces
data = [hybrid_trace, actual_trace]

# Create figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
iplot(fig)

In [51]:
# Create traces for LSTM, Prophet, and Hybrid forecasts
lstm_trace = go.Scatter(x=hybrid_forecast['ds'],
                        y=hybrid_forecast['lstm'],
                        mode='lines',
                        name='LSTM')

prophet_trace = go.Scatter(x=hybrid_forecast['ds'],
                           y=hybrid_forecast['prophet'],
                           mode='lines',
                           name='Prophet')

hybrid_trace = go.Scatter(x=hybrid_forecast['ds'],
                          y=hybrid_forecast['yhat'],
                          mode='lines',
                          name='Hybrid')

actual_trace = go.Scatter(x=hybrid_forecast['ds'],
                          y=hybrid_forecast['y'],
                          mode='lines',
                          name='Actual')

# Define layout
layout = go.Layout(title={'text': 'LSTM vs Prophet vs Hybrid Forecast',
                          'y': 0.9,
                          'x': 0.5,
                          'xanchor': 'center',
                          'yanchor': 'top'},
                   xaxis=dict(title='Date'),
                   yaxis=dict(title='Entry'),
                   template='plotly_dark')

# Combine traces
data = [lstm_trace, prophet_trace, hybrid_trace, actual_trace]

# Create figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
iplot(fig)

### SHAP Implementation (Unfinished)

In [75]:
import shap

model = model_lstm(df, X_train, y_train, X_val, y_val)

# Create a background dataset for SHAP using a larger sample from the training set
background = X_train[np.random.choice(X_train.shape[0], min(1000, X_train.shape[0]), replace=False)]

# Initialize SHAP GradientExplainer
explainer = shap.GradientExplainer(model, df2)

# Compute SHAP values for the entire test dataset
shap_values = explainer.shap_values(X_test)  # Use the entire test dataset

# Check the shape of shap_values
print("Original SHAP values shape:", shap_values.shape)

# Reshape the SHAP values to (n_samples, n_timesteps, n_features)
shap_values_flat = shap_values.reshape(X_test.shape[0], 19, -1)
print("Reshaped SHAP values shape:", shap_values_flat.shape)

# Average across the time steps
shap_values_avg = np.mean(shap_values_flat, axis=1)  # Now the shape will be (n_samples, n_features)
print("Averaged SHAP values shape:", shap_values_avg.shape)

# Prepare X_test for plotting (only keep the features excluding 'Entry')
X_test_flat = X_test[:, -1, :-1]  # Take the last time step and exclude 'Entry'
print("X_test_flat shape:", X_test_flat.shape)

# Ensure the number of features matches (we will take only the first 7 features for SHAP)
shap_values_avg = shap_values_avg[:, :-1]  # Exclude the last feature to match

# Now the shapes should match
assert shap_values_avg.shape[1] == X_test_flat.shape[1], \
    f"SHAP values shape: {shap_values_avg.shape[1]}, X_test_flat shape: {X_test_flat.shape[1]}"

# Define feature names according to your dataset (excluding 'Entry')
feature_names = ['Hour', 'Day of Week', 'Is Weekend', 'Is Holiday', 'Month', 'Year', 'Rain Class']

# Plot the SHAP summary with feature names
shap.summary_plot(shap_values_avg, X_test_flat, feature_names=feature_names)

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 6, 64)             18432     
                                                                 
 dropout_12 (Dropout)        (None, 6, 64)             0         
                                                                 
 lstm_13 (LSTM)              (None, 64)                33024     
                                                                 
 dropout_13 (Dropout)        (None, 64)                0         
                                                                 
 dense_12 (Dense)            (None, 32)                2080      
                                                                 
 dense_13 (Dense)            (None, 1)                 33        
                                                                 
Total params: 53,569
Trainable params: 53,569
Non-trai

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

TypeError: unsupported operand type(s) for *: 'float' and 'Timestamp'