In [3]:
!pip3 install pmdarima matplotlib prophet

Defaulting to user installation because normal site-packages is not writeable
Collecting pmdarima
  Using cached pmdarima-2.0.4-cp311-cp311-macosx_11_0_arm64.whl.metadata (7.8 kB)
Using cached pmdarima-2.0.4-cp311-cp311-macosx_11_0_arm64.whl (628 kB)
Installing collected packages: pmdarima
Successfully installed pmdarima-2.0.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


In [9]:
import pandas as pd
from pmdarima import auto_arima
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error

path = '/Users/sprosad/Downloads/others/new_dea/onboardai/data/forecasting/forecasting.csv'

def auto_forecast(path, date_column, value_column, forecast_periods): 
    data = pd.read_csv(path, parse_dates=[date_column], index_col=date_column)
    data.index = pd.to_datetime(data.index)
    data[value_column].fillna(method='ffill', inplace=True)
    # Initialize SARIMA model
    sarima_model = auto_arima(data[value_column], 
                               seasonal=True, 
                               m=12,
                               trace=True, 
                               error_action='warn', 
                               suppress_warnings=True, 
                               stepwise=True)
    
    # SARIMA Forecasting
    # n_periods = 12  # Number of periods to forecast
    sarima_forecast, sarima_conf_int = sarima_model.predict(n_periods=forecast_periods, return_conf_int=True)
    
    # Create a date index for the SARIMA forecast
    sarima_forecast_index = sarima_forecast.index
    
    # Create a DataFrame for the forecast
    sarima_forecast_series = pd.Series(sarima_forecast, index=sarima_forecast_index)
    sarima_forecast_df = pd.DataFrame(sarima_forecast).reset_index().rename({"index":"date",0: "forecast"}, axis=1)
    sarima_conf_df = pd.DataFrame({
        'lower_bound': sarima_conf_int[:, 0],
        'upper_bound': sarima_conf_int[:, 1]
    })
    
    sarima_output_df = pd.concat([sarima_forecast_df,sarima_conf_df], axis=1)

    prophet_data = data.copy()
    prophet_data = prophet_data.reset_index().rename(columns={'date': 'ds', 'value': 'y'})
    prophet_model = Prophet()
    prophet_model.fit(prophet_data)
    
    # Forecasting with Prophet
    # future = prophet_model.make_future_dataframe(periods=n_periods, freq='M')
    future = pd.DataFrame(sarima_forecast.index, columns=['ds'])
    prophet_forecast = prophet_model.predict(future)

    prophet_forecast_df = prophet_forecast[['ds','yhat','yhat_lower','yhat_upper']].rename(columns={"ds":"date","yhat":"forecast","yhat_lower":"lower_bound","yhat_upper":"upper_bound"})

    # Assume you have actual values for validation (e.g., last n_periods in historical data)
    actual_values = data[value_column][-forecast_periods:]
    
    # SARIMA Performance
    sarima_mape = mean_absolute_percentage_error(actual_values, sarima_forecast)
    
    # Prophet Performance
    prophet_mape = mean_absolute_percentage_error(actual_values, prophet_forecast['yhat'][-forecast_periods:])

    if sarima_mape<=prophet_mape:
        return sarima_output_df
    else:
        return prophet_forecast_df

In [10]:
auto_forecast(path, 'date', 'value', 12)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[value_column].fillna(method='ffill', inplace=True)
  data[value_column].fillna(method='ffill', inplace=True)


Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,1,1)[12]             : AIC=526.837, Time=0.72 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=659.245, Time=0.02 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=560.558, Time=0.04 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=529.539, Time=0.09 sec
 ARIMA(2,1,2)(0,1,1)[12]             : AIC=525.356, Time=0.40 sec
 ARIMA(2,1,2)(0,1,0)[12]             : AIC=551.838, Time=0.08 sec
 ARIMA(2,1,2)(0,1,2)[12]             : AIC=526.365, Time=1.24 sec
 ARIMA(2,1,2)(1,1,0)[12]             : AIC=530.219, Time=0.33 sec
 ARIMA(2,1,2)(1,1,2)[12]             : AIC=inf, Time=2.04 sec
 ARIMA(1,1,2)(0,1,1)[12]             : AIC=527.568, Time=0.31 sec
 ARIMA(2,1,1)(0,1,1)[12]             : AIC=529.582, Time=0.23 sec
 ARIMA(3,1,2)(0,1,1)[12]             : AIC=520.313, Time=0.32 sec
 ARIMA(3,1,2)(0,1,0)[12]             : AIC=544.911, Time=0.10 sec
 ARIMA(3,1,2)(1,1,1)[12]             : AIC=521.966, Time=0.46 sec
 ARIMA(3,1,2)(0,1,2)[12]             

13:09:34 - cmdstanpy - INFO - Chain [1] start processing
13:09:34 - cmdstanpy - INFO - Chain [1] done processing


 ARIMA(4,1,1)(0,1,1)[12] intercept   : AIC=520.121, Time=0.60 sec

Best model:  ARIMA(4,1,1)(0,1,1)[12]          
Total fit time: 18.362 seconds


Unnamed: 0,date,forecast,lower_bound,upper_bound
0,2008-07-01,23.493592,22.234075,24.766088
1,2008-08-01,23.685948,22.367942,24.998877
2,2008-09-01,23.900691,22.619146,25.287085
3,2008-10-01,24.797127,23.578588,26.157157
4,2008-11-01,25.393568,24.04388,26.740961
5,2008-12-01,26.727173,25.407978,28.044394
6,2009-01-01,28.012234,26.714692,29.330874
7,2009-02-01,22.347943,21.013663,23.694761
8,2009-03-01,23.559579,22.320856,24.818734
9,2009-04-01,23.925055,22.544073,25.252587
