In [21]:
# Import Required Libraries

import pandas as pd
import math
import numpy as np

import matplotlib.pyplot as plt

import plotly.graph_objs as go
from plotly.offline import iplot

from prophet import Prophet
import holidays
from prophet.diagnostics import cross_validation
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

import tensorflow as tf
import os

from sklearn.preprocessing import MinMaxScaler
import joblib

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split

In [44]:
def is_weekend(ds):
    date = pd.to_datetime(ds)
    # Return True for Saturday (5) and Sunday (6), False otherwise
    return date.weekday() >= 5

def df_to_X_y(df, window_size=6):
    df_as_np = df.to_numpy()
    X = []
    y = []
    for i in range(len(df_as_np) - window_size):
        row = [r for r in df_as_np[i:i + window_size]]
        X.append(row)
        label = df_as_np[i + window_size][6]  # 'Entry' is the 7th column (index 6)
        y.append(label)
    return np.array(X), np.array(y)

def mean_absolute_scaled_error(y_true, y_pred):
    seasonality = 24

    # Ensure inputs are numpy arrays
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    # Calculate the absolute error of the model's predictions
    abs_error = np.mean(np.abs(y_true - y_pred))
    
    # Calculate the naive forecast (seasonal naive) and its errors
    naive_forecast = y_true[:-seasonality]
    naive_errors = np.abs(y_true[seasonality:] - naive_forecast)
    
    # Mean absolute error of the naive forecast
    naive_mae = np.mean(naive_errors)
    
    # MASE calculation
    mase = abs_error / naive_mae

    return mase, naive_forecast

def evaluate_model(test, test_forecast):
    # Evaluate performance
    mse = mean_squared_error(y_true=test['y'],  y_pred=test_forecast['yhat'])
    rmse = np.sqrt(mean_squared_error(y_true=test['y'], y_pred=test_forecast['yhat']))
    mae = mean_absolute_error(y_true=test['y'], y_pred=test_forecast['yhat'])
    r2 = r2_score(y_true=test['y'], y_pred=test_forecast['yhat'])
    mase, naive_forecast = mean_absolute_scaled_error(y_true=test['y'],  y_pred=test_forecast['yhat'])
    return mse, rmse, mae, r2, mase, naive_forecast


def prophet_model(train, test, df):
    # Create holidays dataframe
    holiday = pd.DataFrame([])
    for date, name in sorted(holidays.Philippines(years=[2022, 2023]).items()):
        holiday = pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': name}, index=[0])], ignore_index=True)
    holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

    # Initialize the Prophet model
    m = Prophet(
        yearly_seasonality=True,
        daily_seasonality=False,
        weekly_seasonality=True,
        holidays=holiday,
        seasonality_mode='multiplicative'
    )

    # Add the regressors
    m.add_seasonality(name='daily_is_weekend', period=1, fourier_order=4, condition_name='weekend')
    m.add_seasonality(name='daily_is_weekday', period=1, fourier_order=4, condition_name='weekday')
    m.add_regressor('off_hour')
    m.add_regressor('rain_amount')

    # Fit the model on the training data
    m.fit(train)

    # Make predictions for Test set
    test_forecast = m.predict(test)

    # Remove negative forecasts
    test_forecast['yhat'] = test_forecast['yhat'].apply(lambda x: max(x, 0))
    test_forecast['yhat_lower'] = test_forecast['yhat_lower'].apply(lambda x: max(x, 0))
    test_forecast['yhat_upper'] = test_forecast['yhat_upper'].apply(lambda x: max(x, 0))

    # Round forecast values
    test_forecast['yhat'] = test_forecast['yhat'].round()

    # Create a future DataFrame with hourly intervals for the desired forecast period
    future = m.make_future_dataframe(periods=150, freq='D')
    future['hour'] = pd.to_datetime(future['ds']).dt.hour
    future['is_weekend'] = df['is_weekend']
    future['weekday'] = future['ds'].apply(is_weekend)
    future['weekend'] = ~future['ds'].apply(is_weekend)
    future['rain_amount'] = df['rain_amount']
    future['off_hour'] = future['hour'].apply(lambda x: 1 if (x >= 23) or (x <= 3) else 0)
    future = future[future['ds'].dt.hour < 23]
    future = future[future['ds'].dt.hour > 3]

    # Make predictions for future set
    forecast = m.predict(future)
    
    return test_forecast

def lstm_model(df, X_train, y_train, X_test, y_test, X_val, y_val):
    model = Sequential()
    model.add(InputLayer((X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='linear'))
    model.add(Dense(1, activation='linear'))
    model.summary()

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Define the ModelCheckpoint callback with the correct file path
    os.makedirs('model', exist_ok=True)
    cp1 = ModelCheckpoint(filepath='model/best_lstm.keras', save_best_only=True, monitor='val_loss', mode='min')

    # Use early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Fit the model
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=[cp1, early_stopping])

    # Assuming df is the original DataFrame with the 'Date Time' column
    # Extract the 'Date Time' column for the entire dataset
    date_time_test = df['Date Time']

    # Load the scaler for inverse transformation
    scaler_entry = joblib.load('model/scaler_entry.pkl')

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and actual values
    # Create a DataFrame to hold the predictions and actual values
    df_pred = pd.DataFrame(y_pred, columns=['Entry'])
    df_actual = pd.DataFrame(y_test, columns=['Entry'])

    # Inverse transform the 'Entry' column
    y_pred_inv = scaler_entry.inverse_transform(df_pred)
    y_test_inv = scaler_entry.inverse_transform(df_actual)

    # Ensure date_time_test matches the length of y_test_inv and y_pred_inv
    date_time_test = date_time_test[-len(y_test_inv):]

    # Round the predictions to the nearest whole number and ensure non-negative values
    y_pred_inv = np.round(np.maximum(y_pred_inv, 0))
    y_test_inv = np.round(np.maximum(y_test_inv, 0))

    # Calculate evaluation metrics
    l_mse = mean_squared_error(y_test_inv, y_pred_inv)
    l_rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
    l_mae = mean_absolute_error(y_test_inv, y_pred_inv)
    l_r2 = r2_score(y_test_inv, y_pred_inv)

    return y_pred_inv



In [24]:
# Import dataset
df = pd.read_csv('data/00_MRT_2023_Prophet_weather2.csv', parse_dates=[0])

# Rename header to Prophet's requirements
df.reset_index()
df = df.rename(columns={'Datetime':'ds', 'Entry':'y'})

# Add additional regressors as columns in the dataframe
df['hour'] = pd.to_datetime(df['ds']).dt.hour
df['off_hour'] = df['hour'].apply(lambda x: 1 if (x >= 23) or (x <= 3) else 0)
df['weekday'] = ~df['ds'].apply(is_weekend)
df['weekend'] = df['ds'].apply(is_weekend)

# Split the dataset to training and testing sets
train_len = math.floor((df.shape[0]*80)/100)
train = df[:train_len]
test = df[train_len:]

test_forecast = prophet_model(train, test, df)
prophet_eval = evaluate_model(test, test_forecast)
print(prophet_eval)


22:22:54 - cmdstanpy - INFO - Chain [1] start processing
22:23:13 - cmdstanpy - INFO - Chain [1] done processing


(390194.7712328767, 624.6557221645189, 424.26027397260276, 0.8020142777903155, 0.24309350002268534)


In [25]:
# Load your time series data
df2 = pd.read_csv('data/2223TaftLSTM.csv')

# Set if holiday
ph_holidays = holidays.PH()
df2['is_holiday'] = df2['Date'].apply(lambda x: 1 if x in ph_holidays else 0)
df2.head(5)

# Combine 'Date' and 'Time' into a new column 'DateAndTime'
df2['DateAndTime'] = pd.to_datetime(df2['Date'] + ' ' + df2['Time'])

# Drop unneeded columns
df2.drop(['Date', 'Time', 'rain_amount', 'rain_desc'], axis=1, inplace=True)

 # Define additional features
df2['Date Time'] = pd.to_datetime(df2['DateAndTime'], format='%d.%m.%Y %H.%M.%S')
df2['hour'] = df2['Date Time'].dt.hour
df2['day_of_week'] = df2['Date Time'].dt.dayofweek
df2['month'] = df2['Date Time'].dt.month
df2['year'] = df2['Date Time'].dt.year
df2.drop(['DateAndTime', 'Day', 'is_weekend'], axis=1, inplace=True)
df2['is_weekend'] = df2['day_of_week'].apply(lambda x: 1 if x in [5, 6] else 0)
model_features = ['hour', 'day_of_week', 'is_weekend', 'month', 'year', 'rain_class', 'Entry']
df_model = df2[model_features]

# Normalize all features except 'Entry'
scaler = MinMaxScaler(feature_range=(0, 1))
df_model[df_model.columns[:-1]] = scaler.fit_transform(df_model[df_model.columns[:-1]])

# Save the scaler for the features
joblib.dump(scaler, 'model/scaler_features.pkl')

# Normalize the 'Entry' column separately
scaler_entry = MinMaxScaler(feature_range=(0, 1))
df_model['Entry'] = scaler_entry.fit_transform(df_model[['Entry']])

# Save the scaler for the 'Entry' column
joblib.dump(scaler_entry, 'model/scaler_entry.pkl')

#
X, y = df_to_X_y(df_model)

# Determine the split points
train_split_point = int(len(X) * 0.8)
valntest_split_point = int(len(X) * 0.9)

# Split the data
X_train, X_val, X_test = X[:train_split_point], X[train_split_point:valntest_split_point], X[valntest_split_point:]
y_train, y_val, y_test = y[:train_split_point], y[train_split_point:valntest_split_point], y[valntest_split_point:]

lstm_forecast = lstm_model(df2, X_train, y_train, X_test, y_test, X_val, y_val)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 6, 64)             18432     
                                                                 
 dropout (Dropout)           (None, 6, 64)             0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 53,569
Trainable params: 53,569
Non-traina

In [26]:
lstm_forecast = pd.DataFrame(lstm_forecast, columns=['yhat'])
lstm_forecast


Unnamed: 0,yhat
0,77.0
1,2518.0
2,4686.0
3,5591.0
4,4615.0
...,...
1455,1824.0
1456,1338.0
1457,0.0
1458,0.0


In [27]:
test_forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,All Saints' Day,All Saints' Day_lower,All Saints' Day_upper,Black Saturday,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2023-08-08 04:00:00,1502.743646,0.000000,521.139022,1502.743646,1502.743646,0.0,0.0,0.0,0.0,...,0.083363,0.083363,0.083363,-0.038610,-0.038610,-0.038610,0.0,0.0,0.0,0.0
1,2023-08-08 05:00:00,1502.802962,2132.755109,3288.500148,1502.802962,1502.802962,0.0,0.0,0.0,0.0,...,0.079941,0.079941,0.079941,-0.038131,-0.038131,-0.038131,0.0,0.0,0.0,2736.0
2,2023-08-08 06:00:00,1502.862279,3836.493301,5038.833412,1502.862279,1502.862279,0.0,0.0,0.0,0.0,...,0.076329,0.076329,0.076329,-0.037649,-0.037649,-0.037649,0.0,0.0,0.0,4448.0
3,2023-08-08 07:00:00,1502.921595,4292.327379,5411.541460,1502.921595,1502.921595,0.0,0.0,0.0,0.0,...,0.072584,0.072584,0.072584,-0.037165,-0.037165,-0.037165,0.0,0.0,0.0,4818.0
4,2023-08-08 08:00:00,1502.980911,3722.821226,4867.574689,1502.980911,1502.980911,0.0,0.0,0.0,0.0,...,0.068760,0.068760,0.068760,-0.036677,-0.036677,-0.036677,0.0,0.0,0.0,4298.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2915,2023-12-31 19:00:00,1710.054302,0.000000,5367.662598,-805.312639,3968.782967,0.0,0.0,0.0,0.0,...,-0.080293,-0.080293,-0.080293,-0.010234,-0.010234,-0.010234,0.0,0.0,0.0,2322.0
2916,2023-12-31 20:00:00,1710.113618,0.000000,4348.829192,-806.318759,3970.030400,0.0,0.0,0.0,0.0,...,-0.071339,-0.071339,-0.071339,-0.009755,-0.009755,-0.009755,0.0,0.0,0.0,1778.0
2917,2023-12-31 21:00:00,1710.172935,0.000000,1823.142463,-807.324880,3971.277833,0.0,0.0,0.0,0.0,...,-0.061977,-0.061977,-0.061977,-0.009277,-0.009277,-0.009277,0.0,0.0,0.0,688.0
2918,2023-12-31 22:00:00,1710.232251,0.000000,610.786620,-808.500347,3972.525266,0.0,0.0,0.0,0.0,...,-0.052290,-0.052290,-0.052290,-0.008801,-0.008801,-0.008801,0.0,0.0,0.0,0.0


In [28]:
# Get the number of rows
num_rows = len(test_forecast)

# Calculate the midpoint
midpoint = num_rows // 2

# Take the latter half
prophet_forecast = test_forecast[midpoint:]
test_one = test[:midpoint]
hybrid_actual = test[midpoint:]

# Reset the index of the DataFrame and drop the old index
prophet_forecast.reset_index(drop=True, inplace=True)
hybrid_actual.reset_index(drop=True, inplace=True)

print(prophet_forecast)

                      ds        trend   yhat_lower   yhat_upper  trend_lower  \
0    2023-10-20 04:00:00  1606.665897     0.000000   280.393401   773.117826   
1    2023-10-20 05:00:00  1606.725214  1161.095743  4021.882409   771.918777   
2    2023-10-20 06:00:00  1606.784530  2225.992696  6857.873161   770.719729   
3    2023-10-20 07:00:00  1606.843846  2306.446875  7281.489604   769.520681   
4    2023-10-20 08:00:00  1606.903163  2072.585097  6570.554590   768.321633   
...                  ...          ...          ...          ...          ...   
1455 2023-12-31 19:00:00  1710.054302     0.000000  5367.662598  -805.312639   
1456 2023-12-31 20:00:00  1710.113618     0.000000  4348.829192  -806.318759   
1457 2023-12-31 21:00:00  1710.172935     0.000000  1823.142463  -807.324880   
1458 2023-12-31 22:00:00  1710.232251     0.000000   610.786620  -808.500347   
1459 2023-12-31 23:00:00  1710.291567     0.000000   598.128914  -809.704723   

      trend_upper  All Saints' Day  All

In [33]:
hybrid_forecast = prophet_forecast[['ds', 'yhat']].rename(columns={'yhat': 'prophet'})
hybrid_forecast['lstm'] = lstm_forecast['yhat']
hybrid_forecast['y'] = hybrid_actual['y']
print(hybrid_forecast)

                      ds  prophet    lstm     y
0    2023-10-20 04:00:00      0.0    77.0     0
1    2023-10-20 05:00:00   2664.0  2518.0  2894
2    2023-10-20 06:00:00   4507.0  4686.0  4339
3    2023-10-20 07:00:00   4916.0  5591.0  5548
4    2023-10-20 08:00:00   4372.0  4615.0  4502
...                  ...      ...     ...   ...
1455 2023-12-31 19:00:00   2322.0  1824.0  1681
1456 2023-12-31 20:00:00   1778.0  1338.0   297
1457 2023-12-31 21:00:00    688.0     0.0     0
1458 2023-12-31 22:00:00      0.0     0.0     0
1459 2023-12-31 23:00:00      0.0     7.0     0

[1460 rows x 4 columns]


In [46]:
#Evaluate prophet
mse, rmse, mae, r2, mase, naive_forecast = evaluate_model(hybrid_forecast,prophet_forecast)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")
print(f"MASE: {mase:.4f}")

MSE: 441775.1041
RMSE: 664.6616
MAE: 461.5384
R²: 0.7594
MASE: 0.2822


In [50]:
naive = pd.DataFrame(naive_forecast, columns=['yhat'])
naive

Unnamed: 0,yhat
0,0
1,2894
2,4339
3,5548
4,4502
...,...
1431,2845
1432,2942
1433,3332
1434,2819


In [35]:
#Evaluate lstm
mse, rmse, mae, r2, mase = evaluate_model(hybrid_forecast,lstm_forecast)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")
print(f"MASE: {mase:.4f}")

MSE: 103728.5473
RMSE: 322.0692
MAE: 195.3568
R²: 0.9435
MASE: 0.1194


In [38]:
# Define weights
weight_prophet =0.1
weight_lstm = 0.9
# Calculate the hybrid forecast
hybrid_forecast['yhat'] = (
    weight_prophet * hybrid_forecast['prophet'] + 
    weight_lstm * hybrid_forecast['lstm']
)

actual = hybrid_forecast['y']
forecast = hybrid_forecast['yhat']

#Evaluate hybrid forecast
mse, rmse, mae, r2, mase = evaluate_model(hybrid_forecast, hybrid_forecast)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")
print(f"MASE: {mase:.4f}")

MSE: 103738.0617
RMSE: 322.0839
MAE: 196.1468
R²: 0.9435
MASE: 0.1199
