### Set up for Forecasting

In [1]:
import pandas as pd
df = pd.read_csv('forecasting_dataset.csv')
df.head()

  from pandas.core import (


Unnamed: 0,Date,GDP ($ Billions),Federal Interest rate,Unemployment Rate,Temperature (US Average),Consumer Price Index,S&P House price index
0,1/1/2010,14611.11,0.11,9.8,30.67,216.687,145.0
1,2/1/2010,14653.075,0.13,9.8,31.8,216.741,143.06
2,3/1/2010,14779.004,0.16,9.9,43.57,217.631,143.6
3,4/1/2010,14842.404,0.2,9.9,53.24,218.009,145.39
4,5/1/2010,14875.794,0.2,9.6,59.88,218.178,147.02


In [2]:
import plotly.io as pio
pio.renderers.default='iframe'

In [3]:
df['target'] = df['GDP ($ Billions)']
df.head()

Unnamed: 0,Date,GDP ($ Billions),Federal Interest rate,Unemployment Rate,Temperature (US Average),Consumer Price Index,S&P House price index,target
0,1/1/2010,14611.11,0.11,9.8,30.67,216.687,145.0,14611.11
1,2/1/2010,14653.075,0.13,9.8,31.8,216.741,143.06,14653.075
2,3/1/2010,14779.004,0.16,9.9,43.57,217.631,143.6,14779.004
3,4/1/2010,14842.404,0.2,9.9,53.24,218.009,145.39,14842.404
4,5/1/2010,14875.794,0.2,9.6,59.88,218.178,147.02,14875.794


In [4]:
df_preds = df[['Date','target']]
df_preds.head()

Unnamed: 0,Date,target
0,1/1/2010,14611.11
1,2/1/2010,14653.075
2,3/1/2010,14779.004
3,4/1/2010,14842.404
4,5/1/2010,14875.794


In [5]:
import pandas as pd
df_preds['Date'] = pd.to_datetime(df_preds['Date'])
df_preds = df_preds.groupby('Date').sum()
df_preds['target'] = df_preds['target'].fillna(0)
df_preds.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,target
Date,Unnamed: 1_level_1
2010-01-01,14611.11
2010-02-01,14653.075
2010-03-01,14779.004
2010-04-01,14842.404
2010-05-01,14875.794


## PyCaret Time Series

Version: 3.0.0.dev1638407153
https://pypi.org/project/pycaret-ts-alpha/#history

Must have sklearn version 0.23.0

- https://pypi.org/project/pycaret-ts-alpha/
- https://pycaret.readthedocs.io/en/time_series/api/time_series.html
- https://github.com/pycaret/pycaret/issues/1588
- https://github.com/pycaret/pycaret/discussions/1942
- https://github.com/pycaret/pycaret/discussions/1757
- https://github.com/pycaret/pycaret/discussions/1795
- https://github.com/pycaret/pycaret/discussions/1791

#### Things to figure out with setup still
- Can we add outlier detection (Holidays)
- Can we add regressors or multivariate (Currently not available)
- Can we add multiple seasonalities (Daily of 7, Weekly of 52, Monthly of 12) or can we plot out tests of seasonality prior to setup (Peds has Weekly and Monthly)
- Can we set the training test split date as a date, percentage of days.  Currently only number of days
    - Could maybe code a function to get the number of days from a date

In [6]:
# with functional API
from pycaret.time_series import *
setup(df_preds, 
      fh = 7,  #  # Originally 14
      fold = 3, 
      n_jobs = None,
#       seasonal_period = 7, #7, 52
#       enforce_pi = True, # Not currenty working
#       profile = True, # Pandas Profile Report
      session_id = 123)

AttributeError: module 'pandas' has no attribute 'Int64Index'

In [None]:
# check_stats()

In [None]:
plot_model(plot = 'ts', fig_kwargs={'big_data_threshold': 1000})

In [None]:
plot_model(plot = 'cv')

In [None]:
plot_model(plot = 'diagnostics')

#### Things to figure out with setup still
- Can we add custom models into this section
    - Would allow setting hyperparameters

In [None]:
best = compare_models(sort='RMSE',
                      exclude=['br_cds_dt','catboost_cds_dt',
                               'en_cds_dt','ridge_cds_dt','lar_cds_dt',
                               'lr_cds_dt','lasso_cds_dt','gbr_cds_dt',
                               'rf_cds_dt','huber_cds_dt','xgboost_cds_dt',
                               'et_cds_dt','lightgbm_cds_dt','omp_cds_dt',
                               'knn_cds_dt','dt_cds_dt','llar_cds_dt','ada_cds_dt',
                               'par_cds_dt',
                               'prophet','exp_smooth','snaive'], # These don't have full prediction functionality yet
                      # include = ['tbats'],
                      turbo=False)

In [None]:
best

### Tune Model

In [None]:
# Random Grid Search
tuned_model = tune_model(best)
# Not currently tuning model
# tuned_model = best
print(tuned_model)

In [None]:
# forecast in unknown future
plot_model(tuned_model, plot = 'forecast')

In [None]:
#in-sample plot
plot_model(tuned_model, plot = 'insample')

In [None]:
# residuals plot
plot_model(tuned_model, plot = 'residuals')

In [None]:
# Get the first and last date that we have allocation data for
import datetime

days_to_pred = datetime.datetime.now() + datetime.timedelta(days=14)
days_since_allocation = ((days_to_pred - pd.to_datetime('2022-01-14')).days) + 1

# generate predictions
preds = predict_model(tuned_model, fh = days_since_allocation, return_pred_int=True, round=0, alpha = 0.10)
preds = preds.reset_index()
preds = preds.rename(columns={"index": "Date"})
preds['Date'] = pd.to_datetime(preds['Date'].dt.to_timestamp('s').dt.strftime('%Y-%m-%d %H:%M:%S.000'))
preds['Version'] = pd.to_datetime(datetime.datetime.now().strftime('%Y-%m-%d'))
preds['Version'] = preds['Version'].astype(str)
preds = preds.rename(columns={"y_pred": "Predicted_Usage",
                              "lower": "Lower_Usage_Confidence",
                              "upper": "Upper_Usage_Confidence"})

preds