# Vitamix Forecasting Models

# Global
    * Data Load and Preparation

In [25]:
# Packages
import snowflake.connector
import pandas as pd
import os
import numpy as np
import xlsxwriter
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

from tqdm import tqdm

# import the regression module from pycaret   
import pycaret.regression as pycr

# import the time seris module from pycaret

import pycaret.time_series as pycts

# copy and paste in to a new chunk, enter credentials and run to save in environment. Then delete chunk
%env snowflakeuser=<your_snowflake_username>
%env snowflakepass=<your_snowflake_password>

In [3]:
# Query Snowflake

def snowflake_to_pandas(connection_params, query):
    try:
        # Establish a connection to Snowflake
        conn = snowflake.connector.connect(**connection_params)

        # Execute the SQL query and fetch the results into a DataFrame
        df = pd.read_sql_query(query, conn)

        # Close the connection
        conn.close()

        return df
    except Exception as e:
        print(f"Error: {e}")
        return None

# Snowflake connection parameters
connection_params = {
    "user": os.environ['snowflakeuser'],
    "password": os.environ['snowflakepass'],
    "account": "zib52348.us-east-1",
    "role": "ACCOUNTADMIN",
    "warehouse": "REPORTING",
    "database": "ANALYTICS",
    "schema": "FORECASTING",
}

# SQL command 
query = 'SELECT * FROM "ANALYTICS"."FORECASTING"."sales_fcast_combined_v"'

# Call the function to retrieve the data into a Pandas DataFrame
result_df = snowflake_to_pandas(connection_params, query)

if result_df is not None:
    print(result_df.head())  # Display the first few rows of the DataFrame
else:
    print("Failed to retrieve data from Snowflake.")

   DEP_ENT       MONTH     NET_SALES  BUDGET_AMOUNT  FORECAST
0  210_310  2020-09-01  7.449778e+05      1330384.0       NaN
1  210_310  2020-10-01  5.257454e+05       936992.0       NaN
2  210_310  2020-11-01  1.998459e+06      1681287.0       NaN
3  210_310  2020-12-01  4.443028e+05       734573.0       NaN
4  210_310  2021-01-01  3.309476e+05            NaN       NaN


###############
#### NOTES ####
###############

*Change the date below to forecast all of 2023 (8/15/23)

In [36]:
### Data prep ###

result_df["MONTH"] = pd.to_datetime(result_df["MONTH"]) # convert month field to date
#df_all = result_df[result_df['MONTH'] <= pd.Timestamp(2023,6,1)] # we have data through July '23 when we are training these models. 

df_all = result_df[result_df['MONTH'] <= pd.Timestamp(2022,12,1)] # CHANGE FOR 2023

#Some random months will have data that we want to remove (* Want to test without July though)
df_all = df_all[["DEP_ENT", "MONTH", "NET_SALES"]] # select fields of interest
df_all = df_all.sort_values(['DEP_ENT', 'MONTH']) # reorder dataframe

In [37]:
### Spot checks ###

# check the unique time_series. 14 different department-entity combinations
print(df_all['DEP_ENT'].nunique())
print("-----")

# check how many months for each dep-ent. 3 dep-ent do not have all months of data
print(df_all.groupby(['DEP_ENT']).size())
print("-----")

# check for nulls. No nulls
print(df_all.isnull().sum())

14
-----
DEP_ENT
160_155    84
170_155    84
200_155    84
200_310    84
210_155    84
210_165    84
210_310    84
220_155    84
220_310    78
240_155    84
250_155    84
250_165    50
250_310    84
260_155    47
dtype: int64
-----
DEP_ENT      0
MONTH        0
NET_SALES    0
dtype: int64


# Finished with global data work above
__________________________________________________________________________________________________________________________________________________________________________________________________

### 1
# Time Series Forecasting with PyCaret Regression

Pycaret 3.04 regression documentation: https://pycaret.readthedocs.io/en/stable/api/regression.html

In [6]:
### Data Prep for Regression ###

df_all_rg = df_all

# extract month and year from dates
df_all_rg['Month'] = [i.month for i in df_all_rg['MONTH']]
df_all_rg['Year'] = [i.year for i in df_all_rg['MONTH']]

# create a sequence of numbers
df_all_rg['Series'] = np.arange(1,len(df_all_rg)+1)

# Notes on below TS loop
* Manual changes:
  * periods to forecast ahead using the next_dates variable 

In [None]:
### Regression Loop ###

# Create empty dataframes
all_results_rg = pd.DataFrame()
all_predictions_rg = pd.DataFrame()

# list of each dep-ent
all_ts_rg = df_all_rg['DEP_ENT'].unique()

for i in tqdm(all_ts_rg):
    
    # temp df for a dep-ent
    df_subset = df_all_rg[df_all_rg['DEP_ENT'] == i]
    
    # trim
    df_subset_trim = df_subset[['Series', 'Year', 'Month', 'NET_SALES']] 
    
    # initialize setup
    s = pycr.setup(data = df_subset_trim, target = 'NET_SALES', session_id = 123)
    
    # model training and selection
    best_model = pycr.compare_models()
    
    # capture the compare result grid and store best model in list
    p = pycr.pull().iloc[0:1]
    p['DEP_ENT'] = str(i)
    
    all_results_rg = all_results_rg.append(p)
    
    ### New data to predict ###

    # max date from original dataset
    max_timestamp = df_subset['MONTH'].max()

    # create dataframe for future dates, in this case the rest of 2023
    next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 7)] # Need to change range based on how many periods ahead you want to predict
    new_dates_df = pd.DataFrame({'MONTH': next_dates})
    new_dates_df["MONTH"] = pd.to_datetime(new_dates_df["MONTH"]) 

    # extract month and year from dates
    new_dates_df['Month'] = [i.month for i in new_dates_df['MONTH']]
    new_dates_df['Year'] = [i.year for i in new_dates_df['MONTH']]

    # create a sequence of numbers
    new_dates_df['Series'] = np.arange(1,len(new_dates_df)+1)

    # select cols
    new_dates_df = new_dates_df[['Series', 'Year', 'Month']] 
    
    # predict on new data
    predictions = pycr.predict_model(best_model, data = new_dates_df)
    
    ### Combine predictions in to all_predictions dataframe ###
    
    pred_temp = predictions.reset_index()
    
    pred_comb = pd.concat([pd.DataFrame({'MONTH': next_dates}), pred_temp['prediction_label']], axis = 1)
    pred_comb['DEP_ENT'] = i # add dep
    pred_comb.rename(columns = {'prediction_label':'PRED'}, inplace = True)
    
    # append to master dataframe
    all_predictions_rg = all_predictions_rg.append(pred_comb)

In [12]:
### Write Regression to Excel ###

# Create a Pandas Excel writer using XlsxWriter
rg_writer = pd.ExcelWriter('regression.xlsx', engine='xlsxwriter')

# sheet 1
all_results_rg.to_excel(rg_writer, sheet_name='results', index=False)
# sheet 2
all_predictions_rg.to_excel(rg_writer, sheet_name='predictions', index=False)

# Save Data to File
rg_writer.save()

__________________________________________________________________________________________________________________________________________________________________________________________________

### 2
# Time Series Forecasting with PyCaret Time Series

Pycaret 3.04 time series documentation: https://pycaret.readthedocs.io/en/stable/api/time_series.html

In [38]:
### Data Prep for Time Series Anlysis ###

df_all_ts = df_all

# create time series dataset
df_all_ts = df_all_ts[["DEP_ENT", "MONTH", "NET_SALES"]]

# Notes on below TS loop
* Manual changes:
  * periods to include in the dates dataset created using next_dates
  * forecast horizon (fh) in predict_model function

###############
#### NOTES ####
###############

*When changing dataset to forecast all of 2023, we also need to chnage the dates and fh for thew two notes above (8/15/23)

In [44]:
### Time Series Loop ###

# Create empty dataframes
all_results_ts = pd.DataFrame()
all_predictions_ts = pd.DataFrame()

# list of each dep-ent
all_ts_ts = df_all_ts['DEP_ENT'].unique()

for i in tqdm(all_ts_ts):
    
    # temp df for a dep-ent
    df_subset = df_all_ts[df_all_ts['DEP_ENT'] == i]
    
    # trim
    df_subset_trim = df_subset[['MONTH', 'NET_SALES']] 
    
    # create series
    df_subset_s = df_subset_trim.set_index('MONTH')['NET_SALES']
    
    # set frequency of series
    df_subset_s = df_subset_s.asfreq('MS') 

    # test and train
    ts_train = round(len(df_subset_trim.index) * .8) # 80% train
    ts_test = len(df_subset_trim.index) - round(len(df_subset_trim.index) * .8) # 20% test
    
    # initialize setup
    #s = pycts.setup(df_subset_s, numeric_imputation_target = "mean", fh = ts_test, session_id = 123)
    s = pycts.setup(df_subset_s, numeric_imputation_target = "mean", session_id = 123)

    # model training and selection
    best_model = pycts.compare_models(sort = 'MAPE', exclude = ["naive", "grand_means", "croston", ])
    #best_model = pycts.compare_models(sort = 'MAPE', include = ["arima", "auto_arima", "ets", ])
    
    # plot trained model
    pycts.plot_model(best_model, plot = 'forecast')
    
    # capture the compare result grid and store best model in list
    p = pycts.pull().iloc[0:1]
    p['DEP_ENT'] = str(i)
    
    all_results_ts = all_results_ts.append(p)
    
    ### Predict future periods ###
    
    # max date from original dataset
    max_timestamp = df_subset_trim['MONTH'].max()
    
    # create dataframe for future dates, in this case the rest of 2023. This is not for predicting but to join back up with predictions
    #next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 7)] # Need to change range based on how many periods ahead you want to predict
    
    next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 13)] # CHANGE FOR 2023
    
    new_dates_df = pd.DataFrame({'MONTH': next_dates})
    new_dates_df["MONTH"] = pd.to_datetime(new_dates_df["MONTH"]) 
    
    # predict in unseen future
    #predictions = pycts.predict_model(best_model, fh=6) # Need to change range fh for how many periods ahead you want to predict
    
    predictions = pycts.predict_model(best_model, fh=12) # CHANGE FOR 2023
    
    pred_temp = predictions.reset_index()
    
    pred_comb = pd.concat([pd.DataFrame({'MONTH': next_dates}), pred_temp['y_pred']], axis = 1)
    pred_comb['DEP_ENT'] = i # add dep
    pred_comb.rename(columns = {'y_pred':'PRED'}, inplace = True)
    
    # append to master dataframe
    all_predictions_ts = all_predictions_ts.append(pred_comb)

  0%|          | 0/14 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
polytrend,Polynomial Trend Forecaster,0.1212,0.0959,168497.6577,168497.6577,0.0621,0.0595,0.8167
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.1227,0.0971,170614.8954,170614.8954,0.063,0.0602,0.0767
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.1227,0.0971,170614.8954,170614.8954,0.063,0.0602,0.09
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.1227,0.0971,170614.8954,170614.8954,0.063,0.0602,0.0767
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.1227,0.0971,170614.8954,170614.8954,0.063,0.0602,0.08
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.1227,0.0971,170614.8954,170614.8954,0.063,0.0602,0.08
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.1227,0.0971,170614.8954,170614.8954,0.063,0.0602,0.04
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.1246,0.0986,173369.1781,173369.1781,0.0641,0.0611,0.0767
theta,Theta Forecaster,0.1288,0.1019,178883.353,178883.353,0.0655,0.063,0.5733
exp_smooth,Exponential Smoothing,0.1413,0.1118,196377.5076,196377.5076,0.0721,0.0689,0.6133


  7%|▋         | 1/14 [00:20<04:24, 20.38s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.6413,0.4929,512042.1158,512042.1158,0.1517,0.1449,0.0433
polytrend,Polynomial Trend Forecaster,0.695,0.5359,555807.6127,555807.6127,0.2098,0.1687,0.02
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.9046,0.6955,721701.3525,721701.3525,0.2205,0.2025,0.0667
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.021,0.7853,815740.9145,815740.9145,0.2577,0.2347,0.0767
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.0356,0.7965,825995.6924,825995.6924,0.2581,0.2307,0.04
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.0687,0.8228,853582.323,853582.323,0.2902,0.2447,0.0367
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.1163,0.8591,891252.193,891252.193,0.2947,0.2534,0.0367
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,1.1268,0.8674,899908.1855,899908.1855,0.302,0.2563,0.04
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.1268,0.8674,899908.1855,899908.1855,0.302,0.2563,0.0333
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.1268,0.8674,899908.1855,899908.1855,0.302,0.2563,0.0367


 14%|█▍        | 2/14 [00:30<02:53, 14.45s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
snaive,Seasonal Naive Forecaster,0.3939,0.2829,308442.01,308442.01,0.1552,0.138,0.0433
arima,ARIMA,0.5349,0.3844,419054.7369,419054.7369,0.1713,0.1625,0.0367
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.054,0.7579,826928.5857,826928.5857,0.2398,0.2702,0.0467
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.054,0.7579,826928.5858,826928.5858,0.2398,0.2702,0.0433
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.054,0.7579,826928.5857,826928.5857,0.2398,0.2702,0.0467
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.054,0.7579,826928.5858,826928.5858,0.2398,0.2702,0.04
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,1.054,0.7579,826928.5858,826928.5858,0.2398,0.2702,0.0433
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1592,0.8341,911679.8105,911679.8105,0.2485,0.3004,0.06
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.0917,0.7851,856826.0302,856826.0302,0.2589,0.2878,0.05
stlf,STLF,0.8958,0.6435,700590.7988,700590.7988,0.2646,0.3168,0.0333


 21%|██▏       | 3/14 [00:45<02:41, 14.65s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
stlf,STLF,0.669,0.469,93471.8975,93471.8975,0.2873,0.3675,0.03
arima,ARIMA,0.7899,0.55,109104.3894,109104.3894,0.314,0.2832,0.04
auto_arima,Auto ARIMA,0.7594,0.5321,106103.3138,106103.3138,0.4289,0.4172,0.98
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.8905,0.6219,123731.8909,123731.8909,0.4908,0.4715,0.05
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.8905,0.6219,123731.8909,123731.8909,0.4908,0.4715,0.04
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.8905,0.6219,123731.8909,123731.8909,0.4908,0.4715,0.0433
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.8905,0.6219,123731.8909,123731.8909,0.4908,0.4715,0.04
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.8905,0.6219,123731.8909,123731.8909,0.4908,0.4715,0.04
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.7856,0.5496,109562.16,109562.16,0.5335,0.4308,0.0667
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.82,0.5735,114314.4539,114314.4539,0.5638,0.4471,0.04


 29%|██▊       | 4/14 [00:57<02:16, 13.70s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.3559,0.273,1129380.1076,1129380.1076,0.2265,0.2039,0.0367
exp_smooth,Exponential Smoothing,0.7149,0.5495,2276535.2075,2276535.2075,0.3179,0.3377,0.06
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.4615,0.3532,1460949.6403,1460949.6403,0.3607,0.2831,0.0433
stlf,STLF,0.7726,0.5935,2458267.9348,2458267.9348,0.375,0.4631,0.03
polytrend,Polynomial Trend Forecaster,0.9089,0.6983,2893406.4722,2893406.4722,0.4239,0.4474,0.02
ets,ETS,0.7663,0.5881,2434343.5447,2434343.5447,0.4395,0.389,0.04
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.7129,0.5466,2261472.3598,2261472.3598,0.4603,0.3762,0.06
theta,Theta Forecaster,0.904,0.6941,2874595.4092,2874595.4092,0.4717,0.4486,0.0267
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.7189,0.5511,2280596.7031,2280596.7031,0.4739,0.3812,0.0733
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.86,0.6599,2732516.9363,2732516.9363,0.4919,0.4312,0.07


 36%|███▌      | 5/14 [01:11<02:03, 13.67s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
exp_smooth,Exponential Smoothing,0.8102,0.6307,312933.3513,312933.3513,2.0707,1.1612,0.0533
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,1.0101,0.7863,389903.3672,389903.3672,2.5751,1.3243,0.04
theta,Theta Forecaster,0.7899,0.6145,304324.0932,304324.0932,2.5853,0.9164,0.0267
polytrend,Polynomial Trend Forecaster,0.7556,0.5877,290972.7438,290972.7438,2.7337,0.8667,0.0233
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1082,0.8624,427317.2401,427317.2401,2.9488,1.3062,0.0533
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.9999,0.778,385518.4902,385518.4902,3.1325,1.1511,0.0833
ets,ETS,0.9503,0.7394,366506.946,366506.946,3.211,1.1392,0.0333
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.9326,0.7253,358991.1831,358991.1831,3.4018,1.0014,0.0767
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.7647,0.5948,294673.253,294673.253,3.4526,0.9047,0.04
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.0928,0.8502,421121.5212,421121.5212,3.578,1.1938,0.05


 43%|████▎     | 6/14 [01:24<01:47, 13.49s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
polytrend,Polynomial Trend Forecaster,2.4545,1.7765,1994779.3486,1994779.3486,2.7118,1.5165,0.0167
theta,Theta Forecaster,2.4618,1.782,2003459.5007,2003459.5007,3.3334,1.4411,0.03
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,3.0315,2.1932,2469713.1335,2469713.1335,3.8288,1.8434,0.0433
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,3.2959,2.3829,2688686.0765,2688686.0765,3.8568,1.9862,0.0533
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,3.1312,2.2654,2552749.0747,2552749.0747,4.3464,1.8451,0.0833
auto_arima,Auto ARIMA,2.6491,1.9184,2157991.975,2157991.975,4.4155,1.4417,1.5067
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,3.1562,2.2837,2573182.6888,2573182.6888,4.4892,1.8263,0.0467
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,3.1603,2.2861,2580176.3972,2580176.3972,4.8082,1.802,0.0433
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,3.1603,2.2861,2580176.3972,2580176.3972,4.8082,1.802,0.0433
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,3.1603,2.2861,2580176.3972,2580176.3972,4.8082,1.802,0.0433


 50%|█████     | 7/14 [01:38<01:36, 13.73s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
exp_smooth,Exponential Smoothing,0.2539,0.1776,570334.4787,570334.4787,3.3015,0.7612,0.03
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.9588,0.6681,2129798.4803,2129798.4803,3.389,1.4202,0.0367
theta,Theta Forecaster,0.2607,0.1823,585549.3438,585549.3438,3.4185,0.7603,0.0333
polytrend,Polynomial Trend Forecaster,0.2806,0.1963,630339.962,630339.962,3.7418,0.7603,0.0267
arima,ARIMA,0.3075,0.2151,690959.6388,690959.6388,4.1783,0.7554,0.0333
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.311,0.2175,698763.1222,698763.1222,4.2279,0.7584,0.0367
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.7821,0.5453,1741429.9871,1741429.9871,4.3033,1.3224,0.0467
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.3389,0.237,761298.7467,761298.7467,4.4772,0.7776,0.0367
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.3389,0.237,761298.7467,761298.7467,4.4772,0.7776,0.0367
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.3389,0.237,761298.7467,761298.7467,4.4772,0.7776,0.0367


 57%|█████▋    | 8/14 [01:47<01:13, 12.26s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(79, 1)"
5,Transformed data shape,"(79, 1)"
6,Transformed train set shape,"(78, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,1.3%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
stlf,STLF,0.1091,0.0773,22562.2528,22562.2528,0.8712,0.5838,0.0433
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.2845,0.2018,58964.0662,58964.0662,1.9238,1.2642,0.0533
arima,ARIMA,0.2542,0.1804,52865.012,52865.012,2.2402,0.8344,0.0367
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.2001,0.1419,41386.831,41386.831,2.5563,0.9964,0.0533
snaive,Seasonal Naive Forecaster,0.2749,0.195,57086.1356,57086.1356,3.2121,1.2295,0.0467
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.2898,0.2054,59906.639,59906.639,3.2221,1.2466,0.0567
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.2954,0.2094,61122.6904,61122.6904,3.4051,1.4904,0.0567
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.29,0.2056,59938.4322,59938.4322,3.4694,1.3564,0.0533
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.29,0.2056,59938.4322,59938.4322,3.4694,1.3564,0.0433
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.29,0.2056,59938.4322,59938.4322,3.4694,1.3564,0.0533


 64%|██████▍   | 9/14 [01:59<00:59, 11.95s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
stlf,STLF,0.7347,0.5923,664296.4395,664296.4395,0.1745,0.1482,0.04
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.7874,0.6347,711484.9014,711484.9014,0.1852,0.1533,0.0467
arima,ARIMA,0.7727,0.623,699036.8774,699036.8774,0.1866,0.1559,0.05
auto_arima,Auto ARIMA,0.8866,0.715,804688.4173,804688.4173,0.2041,0.1849,0.2567
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.8888,0.7166,805058.6426,805058.6426,0.2045,0.1808,0.0933
ets,ETS,0.9143,0.7372,828650.7657,828650.7657,0.2064,0.1845,0.0467
exp_smooth,Exponential Smoothing,0.9494,0.7655,860565.7887,860565.7887,0.2147,0.1915,0.0733
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.9192,0.7412,833407.2549,833407.2549,0.2154,0.1909,0.0667
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.9608,0.7748,871320.1083,871320.1083,0.2277,0.1996,0.0633
theta,Theta Forecaster,1.0089,0.8135,915062.5876,915062.5876,0.2285,0.2039,0.0333


 71%|███████▏  | 10/14 [02:11<00:47, 11.92s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
snaive,Seasonal Naive Forecaster,0.7969,0.6216,1621569.2667,1621569.2667,0.1319,0.151,0.0533
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.8391,0.655,1710306.5451,1710306.5451,0.135,0.1448,0.05
arima,ARIMA,1.1393,0.8893,2323688.6423,2323688.6423,0.1902,0.2117,0.05
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.2129,0.9468,2472136.1565,2472136.1565,0.1959,0.2167,0.0867
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.2542,0.9789,2555338.0871,2555338.0871,0.2008,0.2277,0.0533
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.2542,0.9789,2555338.0871,2555338.0871,0.2008,0.2277,0.0433
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.2542,0.9789,2555338.0871,2555338.0871,0.2008,0.2277,0.0433
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,1.2542,0.9789,2555338.0871,2555338.0871,0.2008,0.2277,0.0433
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.2542,0.9789,2555338.0871,2555338.0871,0.2008,0.2277,0.04
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.2486,0.9744,2543694.893,2543694.893,0.2009,0.2263,0.09


 79%|███████▊  | 11/14 [02:40<00:51, 17.26s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(51, 1)"
5,Transformed data shape,"(51, 1)"
6,Transformed train set shape,"(50, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,2.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
ets,ETS,0.5276,0.3061,14879.0178,14879.0178,10.4295,0.9493,0.0367
arima,ARIMA,0.506,0.2937,14263.5568,14263.5568,11.1874,0.8993,0.04
auto_arima,Auto ARIMA,0.5204,0.302,14672.1014,14672.1014,11.572,0.9116,0.0767
exp_smooth,Exponential Smoothing,0.5667,0.3288,15983.9354,15983.9354,12.4125,0.9495,0.0267
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.569,0.3302,16048.8245,16048.8245,13.4814,0.9271,0.0367
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.569,0.3302,16048.8245,16048.8245,13.4814,0.9271,0.0367
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.569,0.3302,16048.8245,16048.8245,13.4814,0.9271,0.0367
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.569,0.3302,16048.8245,16048.8245,13.4814,0.9271,0.0433
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.569,0.3302,16048.8245,16048.8245,13.4814,0.9271,0.0367
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.569,0.3302,16048.8245,16048.8245,13.4814,0.9271,0.04


 86%|████████▌ | 12/14 [02:49<00:29, 14.70s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(83, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,1.7381,1.3935,371088.1282,371088.1282,0.2334,0.2843,0.0467
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.6081,1.2896,343047.8552,343047.8552,0.2729,0.2979,0.0633
snaive,Seasonal Naive Forecaster,1.4484,1.161,309659.8816,309659.8816,0.3166,0.3066,0.0433
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.7577,1.4087,375922.2607,375922.2607,0.3176,0.3394,0.08
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.6323,1.3085,348853.4871,348853.4871,0.3265,0.3309,0.0433
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.7113,1.3714,366152.8232,366152.8232,0.3295,0.3409,0.09
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.6835,1.3495,359848.7241,359848.7241,0.3307,0.3389,0.09
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.7141,1.3739,366538.2197,366538.2197,0.3375,0.3459,0.0633
theta,Theta Forecaster,1.5932,1.2763,341361.0188,341361.0188,0.3482,0.338,0.0333
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.6946,1.3585,362120.4615,362120.4615,0.3557,0.352,0.0433


 93%|█████████▎| 13/14 [03:06<00:15, 15.40s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(47, 1)"
5,Transformed data shape,"(47, 1)"
6,Transformed train set shape,"(46, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
theta,Theta Forecaster,0.6756,0.4434,1321.139,1321.139,0.3553,0.3492,0.0367
ets,ETS,0.6845,0.4493,1338.6657,1338.6657,0.3638,0.3535,0.04
exp_smooth,Exponential Smoothing,0.6858,0.4502,1341.2059,1341.2059,0.3651,0.354,0.04
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.5907,0.3897,1157.7629,1157.7629,0.3828,0.3103,0.1
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.578,0.3815,1132.8402,1132.8402,0.3917,0.306,0.05
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.7165,0.4711,1401.7267,1401.7267,0.4258,0.3601,0.0467
arima,ARIMA,0.7305,0.4805,1429.3332,1429.3332,0.444,0.3645,0.0567
auto_arima,Auto ARIMA,0.7305,0.4805,1429.3332,1429.3332,0.444,0.3645,0.1433
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.6984,0.4605,1368.4801,1368.4801,0.4461,0.355,0.0533
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.7498,0.494,1468.657,1468.657,0.4589,0.3778,0.06


100%|██████████| 14/14 [03:16<00:00, 14.06s/it]


In [45]:
### Write Time Series to Excel ###

# Create a Pandas Excel writer using XlsxWriter
ts_writer = pd.ExcelWriter('time_series.xlsx', engine='xlsxwriter')

# sheet 1
all_results_ts.to_excel(ts_writer, sheet_name='results', index=False)
# sheet 2
all_predictions_ts.to_excel(ts_writer, sheet_name='predictions', index=False)

# Save Data to File
ts_writer.save()