In [3]:
%load_ext autoreload
%autoreload 2

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import utils.helpers as helpers
import joblib
from prophet import Prophet

In [12]:
boroughs = ['Bronx','Brooklyn', 'EWR', 'Manhattan', 'Queens', 'Staten Island']

In [15]:
for b in boroughs:
    print(f"Prophet w Regressor: {b}")
    
    df = pd.read_parquet(f'../data/{b} - all txns.parquet.gz')
    df = df.reset_index()
    df['timestamp_hour'] = pd.to_datetime(df['timestamp_hour'])
    df = df.sort_values(by = 'timestamp_hour')
    df = df.rename(columns = {'num_txns_All': 'y', 'timestamp_hour': 'ds'})

    df = df[['ds', 'y', 
                   'rolling_avg_w1-w4_lags', 
                   #'is_long_weekend',
                   # 'is_rush_hour', 
                   #'is_business_hour', 
                   #'lag_168'
            ]]

    df_hist = df[df['ds'] < '2024-08-01']
    df_test = df[df['ds']>= '2024-08-01']
    
    df_hist = df_hist.dropna()

    best = joblib.load(f'../models/{b}_Prophet.joblib')
    
    model = Prophet(changepoint_prior_scale=best.changepoint_prior_scale,
                seasonality_prior_scale=best.seasonality_prior_scale,
                holidays_prior_scale=best.holidays_prior_scale,
                seasonality_mode=best.seasonality_mode,
                yearly_seasonality=best.yearly_seasonality,
                weekly_seasonality=best.weekly_seasonality,
                daily_seasonality=best.daily_seasonality)

    # Add exogenous variables as regressors
    model.add_regressor('rolling_avg_w1-w4_lags')

    # Add custom hourly seasonality (you can adjust the period if needed)
    model.add_seasonality(name='hourly', period=24, fourier_order = 8)

    # Fit the model
    model.fit(df_hist)

    forecast = model.predict(df_test)
    __ = helpers.performance_metrics(df_test['y'], forecast['yhat'])
    
    model_loc = f'../models/{b}_Prophet_wRegressor.joblib'
    forecast_loc = f'../data/forecasts/{b}_Aug2024_forecasts_wRegressor.csv'
    
    joblib.dump(model, model_loc)
    forecast.to_csv(forecast_loc)

Prophet w Regressor: Bronx


13:40:30 - cmdstanpy - INFO - Chain [1] start processing
13:42:43 - cmdstanpy - INFO - Chain [1] done processing


MAE: 179.57
MAPE: 5.82%
RMSE: 290.75
R2: 0.95
Prophet w Regressor: Brooklyn


13:42:50 - cmdstanpy - INFO - Chain [1] start processing
13:44:01 - cmdstanpy - INFO - Chain [1] done processing


MAE: 570.94
MAPE: 5.87%
RMSE: 900.38
R2: 0.95
Prophet w Regressor: EWR


13:44:08 - cmdstanpy - INFO - Chain [1] start processing
13:45:24 - cmdstanpy - INFO - Chain [1] done processing


MAE: 574.21
MAPE: 5.92%
RMSE: 901.67
R2: 0.95
Prophet w Regressor: Manhattan


13:45:30 - cmdstanpy - INFO - Chain [1] start processing
13:46:22 - cmdstanpy - INFO - Chain [1] done processing


MAE: 1595.60
MAPE: 7.02%
RMSE: 2466.12
R2: 0.94
Prophet w Regressor: Queens


13:46:28 - cmdstanpy - INFO - Chain [1] start processing
13:47:40 - cmdstanpy - INFO - Chain [1] done processing


MAE: 1761.98
MAPE: 6.20%
RMSE: 2701.91
R2: 0.95
Prophet w Regressor: Staten Island


13:47:46 - cmdstanpy - INFO - Chain [1] start processing
13:49:19 - cmdstanpy - INFO - Chain [1] done processing


MAE: 1762.42
MAPE: 6.12%
RMSE: 2710.22
R2: 0.95
