In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('Items Shipped.csv')
df.columns

  df = pd.read_csv('Items Shipped.csv')


Index(['Customer Name', 'Parent', 'Account Manager', 'Item',
       'Item Description', 'Product Group', 'Parent Product Group',
       'Netsuite Link', 'Sales Order', 'Customer PO', 'Tracking Numbers',
       'Invoice Status', 'Date Created', 'Date', 'Ship Date',
       'Import/Domestic', 'Location', 'Is Individual', 'Quantity', 'Amount',
       'Sell Price', 'Shipping State/Province', 'Online Flag', 'Internal ID',
       'Shipping Address'],
      dtype='object')

### Setting Up Data

In [3]:
# converting ship date to pandas datetime
df['Ship Date'] = pd.to_datetime(df['Ship Date'])

# filtering to the conditioner item
item = '4123'
filtered_df = df[(df['Item'] == item) & (df['Import/Domestic'] == 'Domestic')]
# # grouping quantity to frequency of month
new_df = filtered_df.groupby(pd.Grouper(key='Ship Date', freq="M"))['Quantity'].sum().reset_index()
new_df.set_index('Ship Date', inplace=True)
new_df.head()

Unnamed: 0_level_0,Quantity
Ship Date,Unnamed: 1_level_1
2020-01-31,1790.0
2020-02-29,28846.0
2020-03-31,4420.0
2020-04-30,5347.0
2020-05-31,307.0


In [4]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 53 entries, 2020-01-31 to 2024-05-31
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Quantity  53 non-null     float64
dtypes: float64(1)
memory usage: 848.0 bytes


In [5]:
from pycaret.time_series import *

exp = TSForecastingExperiment()
exp.setup(data = new_df, target='Quantity', fh=3, coverage=0.90)

Unnamed: 0,Description,Value
0,session_id,2019
1,Target,Quantity
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(53, 1)"
5,Transformed data shape,"(53, 1)"
6,Transformed train set shape,"(50, 1)"
7,Transformed test set shape,"(3, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


<pycaret.time_series.forecasting.oop.TSForecastingExperiment at 0x15b089558d0>

In [6]:
exp.plot_model(plot="ts")

In [7]:
exp.plot_model(plot="train_test_split")

In [8]:
exp.plot_model(plot="cv")

In [9]:
best_model = exp.compare_models(sort='RMSE') #sort RMSE

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.4794,0.3683,7402.903,8089.0278,6.450803125057764e+17,1.163,-29.4889,0.0667
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.7203,0.5181,11014.3128,11291.5061,6.067145300258254e+18,1.2084,-70.4075,0.06
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.5852,0.5272,9051.6493,11585.3679,3.7418153337864736e+17,1.1817,-103.0586,0.0967
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.7382,0.62,11443.4743,13626.6086,3.7418153337866016e+17,1.147,-232.154,0.05
arima,ARIMA,0.9054,0.6465,13786.0535,14048.3673,9.391032299687156e+18,1.2528,-116.7394,0.9933
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.8629,0.6621,13080.2632,14351.2153,1.4574924588233253e+19,1.2272,-107.3794,0.1233
croston,Croston,0.9787,0.6882,14931.2985,14973.9548,8.442790918690144e+18,1.2903,-131.987,0.0167
grand_means,Grand Means Forecaster,0.9845,0.6965,14988.0641,15136.8689,9.420039867390915e+18,1.2962,-125.5853,0.9533
auto_arima,Auto ARIMA,0.9845,0.6965,14988.0641,15136.8689,9.420039867390915e+18,1.2962,-125.5853,1.0267
theta,Theta Forecaster,1.0366,0.7285,15800.4925,15840.5089,9.425796062163993e+18,1.2961,-151.6376,0.0333


In [10]:
exp.plot_model(plot="diff", 
               data_kwargs={"order_list": [1, 2], "acf": True, "pacf": True})

In [11]:
exp.plot_model(plot="diff", 
               data_kwargs={"lags_list": [[1, 12]], "acf": True, "pacf": True})

In [22]:
exp.plot_model(plot = 'decomp', 
               data_kwargs = {'type' : 'multiplicative'})

In [13]:
exp.plot_model(plot = 'diagnostics')

## ETS Model

In [14]:
ets_dt = exp.create_model('ets')

exp.plot_model(estimator = ets_dt, 
               plot = 'forecast', 
               data_kwargs = {'fh' : 3})

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-05,0.4561,0.3638,7024.8973,8004.393,0.388,0.3058,-0.0887
1,2023-08,1.6235,1.1494,25239.8884,25288.7086,22.5464,1.7458,-377.1439
2,2023-11,1.0372,0.7219,15376.9654,15397.7589,2.321625367284608e+19,1.812,-134.407
Mean,NaT,1.0389,0.745,15880.5837,16230.2868,7.738751224282027e+18,1.2879,-170.5465
SD,NaT,0.4766,0.3212,7444.761,7080.8059,1.0944246937211036e+19,0.6949,156.0389


In [15]:
ets_dt

In [16]:
exp.check_stats(ets_dt, test = 'all')

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Summary,Statistics,Residual,Length,,50.0
1,Summary,Statistics,Residual,# Missing Values,,0.0
2,Summary,Statistics,Residual,Mean,,-2740.63515
3,Summary,Statistics,Residual,Median,,-7077.5891
4,Summary,Statistics,Residual,Standard Deviation,,17422.368268
5,Summary,Statistics,Residual,Variance,,303538916.072186
6,Summary,Statistics,Residual,Kurtosis,,3.758112
7,Summary,Statistics,Residual,Skewness,,1.695233
8,Summary,Statistics,Residual,# Distinct Values,,50.0
9,White Noise,Ljung-Box,Residual,Test Statictic,"{'alpha': 0.05, 'K': 24}",52.007227


## ARIMA Model

In [17]:
arima = exp.create_model('arima')
exp.plot_model(estimator = arima, plot = 'forecast', 
               data_kwargs = {'fh' : 3})

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-05,0.4473,0.3349,6889.6646,7369.5457,0.2998,0.3024,0.0772
1,2023-08,1.1526,0.818,17919.5529,17996.387,16.2477,1.6616,-190.5025
2,2023-11,1.1162,0.7866,16548.9431,16779.1691,2.8173096899061465e+19,1.7944,-159.793
Mean,NaT,0.9054,0.6465,13786.0535,14048.3673,9.391032299687156e+18,1.2528,-116.7394
SD,NaT,0.3242,0.2207,4908.481,4748.712,1.328092524290137e+19,0.6742,83.5478


In [18]:
exp.check_stats(arima, test = 'all')

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Summary,Statistics,Residual,Length,,50.0
1,Summary,Statistics,Residual,# Missing Values,,0.0
2,Summary,Statistics,Residual,Mean,,-107.248702
3,Summary,Statistics,Residual,Median,,-5834.34825
4,Summary,Statistics,Residual,Standard Deviation,,16544.670933
5,Summary,Statistics,Residual,Variance,,273726136.280974
6,Summary,Statistics,Residual,Kurtosis,,2.953024
7,Summary,Statistics,Residual,Skewness,,1.66574
8,Summary,Statistics,Residual,# Distinct Values,,50.0
9,White Noise,Ljung-Box,Residual,Test Statictic,"{'alpha': 0.05, 'K': 24}",32.091016


## Taking Best Model and Forecasting

In [34]:
tuned_model = exp.tune_model(best_model) # auto arima

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-05,0.5557,0.581,8558.5291,12783.7893,0.5585,0.3305,-1.7768
1,2023-08,1.9959,1.4172,31030.3037,31180.6314,28.3117,1.7812,-573.8753
2,2023-11,0.5848,0.4071,8669.76,8684.1484,1.1973159084668316e+19,1.7411,-42.0706
Mean,NaT,1.0455,0.8018,16086.1976,17549.523,3.991053028222772e+18,1.2843,-205.9076
SD,NaT,0.6722,0.4409,10567.1763,9782.8796,5.644201320662856e+18,0.6746,260.7119


Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.1s finished


In [32]:
final_model = exp.finalize_model(tuned_model)
pred_holdout = exp.predict_model(final_model)

In [33]:
exp.plot_model(final_model)