In [152]:
import pandas as pd

In [153]:
df = pd.read_csv('Items Shipped.csv')
df.columns

Index(['Customer Name', 'Parent', 'Account Manager', 'Item',
       'Item Description', 'Product Group', 'Parent Product Group',
       'Netsuite Link', 'Sales Order', 'Customer PO', 'Tracking Numbers',
       'Invoice Status', 'Date Created', 'Date', 'Ship Date',
       'Import/Domestic', 'Location', 'Is Individual', 'Quantity', 'Amount',
       'Sell Price', 'Shipping State/Province', 'Online Flag', 'Internal ID',
       'Shipping Address'],
      dtype='object')

### Setting Up Data

In [154]:
# converting ship date to pandas datetime
df['Ship Date'] = pd.to_datetime(df['Ship Date'])

# filtering to the conditioner item
item = '4114'
filtered_df = df[(df['Item'] == item) & (df['Import/Domestic'] == 'Domestic')]
# # grouping quantity to frequency of month
new_df = filtered_df.groupby(pd.Grouper(key='Ship Date', freq="M"))['Quantity'].sum().reset_index()
new_df.set_index('Ship Date', inplace=True)
new_df.head()

Unnamed: 0_level_0,Quantity
Ship Date,Unnamed: 1_level_1
2020-01-31,14981.0
2020-02-29,30057.0
2020-03-31,17657.0
2020-04-30,13723.0
2020-05-31,39238.0


In [155]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 53 entries, 2020-01-31 to 2024-05-31
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Quantity  53 non-null     float64
dtypes: float64(1)
memory usage: 848.0 bytes


In [156]:
from pycaret.time_series import *

exp = TSForecastingExperiment()
exp.setup(data = new_df, target='Quantity', fh=3, coverage=0.90)

Unnamed: 0,Description,Value
0,session_id,8412
1,Target,Quantity
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(53, 1)"
5,Transformed data shape,"(53, 1)"
6,Transformed train set shape,"(50, 1)"
7,Transformed test set shape,"(3, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


<pycaret.time_series.forecasting.oop.TSForecastingExperiment at 0x284e016a4a0>

In [157]:
exp.plot_model(plot="ts")

In [158]:
exp.plot_model(plot="train_test_split")

In [159]:
exp.plot_model(plot="cv")

In [160]:
best_model = exp.compare_models(sort='RMSE') #sort RMSE

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.7328,0.6526,35848.4948,41341.2529,0.4679,0.3039,-8.0255,0.0867
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.8084,0.6864,39348.5919,43299.2425,0.4601,0.3463,-13.6365,0.06
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.7597,0.6832,37512.3476,43452.2141,0.5159,0.3103,-5.9698,0.08
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.7584,0.6869,37226.5743,43595.743,0.4727,0.32,-7.129,0.1233
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8032,0.6932,39480.3483,43993.7048,0.5174,0.3324,-7.4685,0.1033
polytrend,Polynomial Trend Forecaster,0.8058,0.6943,39539.365,44021.5839,0.513,0.3337,-8.3518,0.8333
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8333,0.7212,41276.3295,45970.5086,0.5317,0.343,-4.6589,0.0767
croston,Croston,0.8513,0.739,41039.0209,46391.0372,0.4363,0.3614,-23.5485,0.0167
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.867,0.7645,43029.5393,48756.0032,0.5983,0.3352,-5.3464,0.1267
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.9245,0.7839,45310.4817,49688.8943,0.5637,0.3655,-11.5422,0.1233


In [161]:
exp.plot_model(plot="diff", 
               data_kwargs={"order_list": [1, 2], "acf": True, "pacf": True})

In [162]:
exp.plot_model(plot="diff", 
               data_kwargs={"lags_list": [[1, 12]], "acf": True, "pacf": True})

In [163]:
exp.plot_model(plot = 'decomp', 
               data_kwargs = {'type' : 'multiplicative'})

In [164]:
exp.plot_model(plot = 'diagnostics')

## ETS Model

In [165]:
ets_dt = exp.create_model('ets')

exp.plot_model(estimator = ets_dt, 
               plot = 'forecast', 
               data_kwargs = {'fh' : 3})

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-05,2.1306,1.6209,98057.6291,98477.1961,0.6058,0.8741,-198.5211
1,2023-08,3.1996,2.5841,160794.6803,166633.8243,2.596,0.9918,-17.8339
2,2023-11,1.3196,1.1339,66642.2772,72641.7045,0.4933,0.675,-8.4933
Mean,NaT,2.2166,1.7796,108498.1955,112584.2417,1.2317,0.847,-74.9494
SD,NaT,0.7699,0.6026,39140.1143,39647.4984,0.9658,0.1307,87.4615


In [166]:
ets_dt

In [167]:
exp.check_stats(ets_dt, test = 'all')

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Summary,Statistics,Residual,Length,,50.0
1,Summary,Statistics,Residual,# Missing Values,,0.0
2,Summary,Statistics,Residual,Mean,,-5015.58649
3,Summary,Statistics,Residual,Median,,-10022.75855
4,Summary,Statistics,Residual,Standard Deviation,,39597.447652
5,Summary,Statistics,Residual,Variance,,1567957860.58753
6,Summary,Statistics,Residual,Kurtosis,,1.765764
7,Summary,Statistics,Residual,Skewness,,1.046468
8,Summary,Statistics,Residual,# Distinct Values,,50.0
9,White Noise,Ljung-Box,Residual,Test Statictic,"{'alpha': 0.05, 'K': 24}",40.519761


## ARIMA Model

In [168]:
arima = exp.create_model('arima')
exp.plot_model(estimator = arima, plot = 'forecast', 
               data_kwargs = {'fh' : 3})

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-05,1.3215,1.0518,60819.4292,63900.7739,0.38,0.4879,-83.0098
1,2023-08,1.1757,0.9945,59085.7604,64131.0108,1.0259,0.6031,-1.7897
2,2023-11,0.594,0.5682,29998.508,36399.4575,0.2189,0.2621,-1.3836
Mean,NaT,1.0304,0.8715,49967.8992,54810.4141,0.5416,0.451,-28.7277
SD,NaT,0.3143,0.2157,14138.2187,13018.8515,0.3487,0.1416,38.3836


In [169]:
exp.check_stats(arima, test = 'all')

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Summary,Statistics,Residual,Length,,50.0
1,Summary,Statistics,Residual,# Missing Values,,0.0
2,Summary,Statistics,Residual,Mean,,-938.44953
3,Summary,Statistics,Residual,Median,,-2005.84405
4,Summary,Statistics,Residual,Standard Deviation,,46934.077041
5,Summary,Statistics,Residual,Variance,,2202807587.649582
6,Summary,Statistics,Residual,Kurtosis,,3.039468
7,Summary,Statistics,Residual,Skewness,,-0.382761
8,Summary,Statistics,Residual,# Distinct Values,,50.0
9,White Noise,Ljung-Box,Residual,Test Statictic,"{'alpha': 0.05, 'K': 24}",34.408203


## Taking Best Model and Forecasting

In [170]:
tuned_model = exp.tune_model(arima) # auto arima

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-05,0.1139,0.0911,5244.4117,5537.1399,0.0325,0.0325,0.3692
1,2023-08,1.3528,1.2852,67984.981,82873.433,1.2877,0.6199,-3.6585
2,2023-11,0.3222,0.3118,16272.7551,19978.9188,0.1302,0.1235,0.2819
Mean,NaT,0.5963,0.5627,29834.0493,36129.8306,0.4835,0.2586,-1.0025
SD,NaT,0.5416,0.5187,27349.909,33574.4399,0.5701,0.2581,1.8784


Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.0s finished


In [171]:
final_model = exp.finalize_model(tuned_model)
pred_holdout = exp.predict_model(final_model)

In [172]:
exp.plot_model(final_model)