# Vitamix Forecasting Models

# Global
    * Data Load and Preparation

In [22]:
# Packages
import snowflake.connector
import pandas as pd
import os
import numpy as np

from datetime import datetime, timedelta

from tqdm import tqdm

# import the regression module from pycaret   
import pycaret.regression as pycr

# import the time seris module from pycaret

import pycaret.time_series as pycts

# copy and paste in to a new chunk, enter credentials and run to save in environment. Then delete chunk
%env snowflakeuser=<your_snowflake_username>
%env snowflakepass=<your_snowflake_password>

In [2]:
# Query Snowflake

def snowflake_to_pandas(connection_params, query):
    try:
        # Establish a connection to Snowflake
        conn = snowflake.connector.connect(**connection_params)

        # Execute the SQL query and fetch the results into a DataFrame
        df = pd.read_sql_query(query, conn)

        # Close the connection
        conn.close()

        return df
    except Exception as e:
        print(f"Error: {e}")
        return None

# Snowflake connection parameters
connection_params = {
    "user": os.environ['snowflakeuser'],
    "password": os.environ['snowflakepass'],
    "account": "zib52348.us-east-1",
    "role": "ACCOUNTADMIN",
    "warehouse": "REPORTING",
    "database": "ANALYTICS",
    "schema": "FORECASTING",
}

# SQL command 
query = 'SELECT * FROM "ANALYTICS"."FORECASTING"."sales_fcast_combined_v"'

# Call the function to retrieve the data into a Pandas DataFrame
result_df = snowflake_to_pandas(connection_params, query)

if result_df is not None:
    print(result_df.head())  # Display the first few rows of the DataFrame
else:
    print("Failed to retrieve data from Snowflake.")

   DEP_ENT       MONTH     NET_SALES  BUDGET_AMOUNT  FORECAST
0  210_310  2020-09-01  7.449778e+05      1330384.0       NaN
1  210_310  2020-10-01  5.257454e+05       936992.0       NaN
2  210_310  2020-11-01  1.998459e+06      1681287.0       NaN
3  210_310  2020-12-01  4.443028e+05       734573.0       NaN
4  210_310  2021-01-01  3.309476e+05            NaN       NaN


In [3]:
### Data prep ###

result_df["MONTH"] = pd.to_datetime(result_df["MONTH"]) # convert month field to date
df_all = result_df[result_df['MONTH'] <= pd.Timestamp(2023,6,1)] # we have data through July '23 when we are training these models. 
#Some random months will have data that we want to remove (* Want to test without July though)
df_all = df_all[["DEP_ENT", "MONTH", "NET_SALES"]] # select fields of interest
df_all = df_all.sort_values(['DEP_ENT', 'MONTH']) # reorder dataframe

In [4]:
### Spot checks ###

# check the unique time_series. 14 different department-entity combinations
print(df_all['DEP_ENT'].nunique())
print("-----")

# check how many months for each dep-ent. 3 dep-ent do not have all months of data
print(df_all.groupby(['DEP_ENT']).size())
print("-----")

# check for nulls. No nulls
print(df_all.isnull().sum())

14
-----
DEP_ENT
160_155    90
170_155    90
200_155    90
200_310    90
210_155    90
210_165    90
210_310    90
220_155    90
220_310    84
240_155    90
250_155    90
250_165    56
250_310    90
260_155    53
dtype: int64
-----
DEP_ENT      0
MONTH        0
NET_SALES    0
dtype: int64


# Finished with global data work above
__________________________________________________________________________________________________________________________________________________________________________________________________

### 1
# Time Series Forecasting with PyCaret Regression

Pycaret 3.04 regression documentation: https://pycaret.readthedocs.io/en/stable/api/regression.html

In [5]:
### Data Prep for Regression ###

df_all_rg = df_all

# extract month and year from dates
df_all_rg['Month'] = [i.month for i in df_all_rg['MONTH']]
df_all_rg['Year'] = [i.year for i in df_all_rg['MONTH']]

# create a sequence of numbers
df_all_rg['Series'] = np.arange(1,len(df_all_rg)+1)

# Notes on below TS loop
* Manual changes:
  * periods to forecast ahead using the next_dates variable 

In [None]:
### Regression Loop ###

# Create empty dataframes
all_results_rg = pd.DataFrame()
all_predictions_rg = pd.DataFrame()

# list of each dep-ent
all_ts_rg = df_all_rg['DEP_ENT'].unique()

for i in tqdm(all_ts_rg):
    
    # temp df for a dep-ent
    df_subset = df_all_rg[df_all_rg['DEP_ENT'] == i]
    
    # trim
    df_subset_trim = df_subset[['Series', 'Year', 'Month', 'NET_SALES']] 
    
    # initialize setup
    s = pycr.setup(data = df_subset_trim, target = 'NET_SALES', session_id = 123)
    
    # model training and selection
    best_model = pycr.compare_models()
    
    # capture the compare result grid and store best model in list
    p = pycr.pull().iloc[0:1]
    p['DEP_ENT'] = str(i)
    
    all_results_rg = all_results_rg.append(p)
    
    ### New data to predict ###

    # max date from original dataset
    max_timestamp = df_subset['MONTH'].max()

    # create dataframe for future dates, in this case the rest of 2023
    next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 7)] # Need to change range based on how many periods ahead you want to predict
    new_dates_df = pd.DataFrame({'MONTH': next_dates})
    new_dates_df["MONTH"] = pd.to_datetime(new_dates_df["MONTH"]) 

    # extract month and year from dates
    new_dates_df['Month'] = [i.month for i in new_dates_df['MONTH']]
    new_dates_df['Year'] = [i.year for i in new_dates_df['MONTH']]

    # create a sequence of numbers
    new_dates_df['Series'] = np.arange(1,len(new_dates_df)+1)

    # select cols
    new_dates_df = new_dates_df[['Series', 'Year', 'Month']] 
    
    # predict on new data
    predictions = pycr.predict_model(best_model, data = new_dates_df)
    
    ### Combine predictions in to all_predictions dataframe ###
    
    pred_temp = predictions.reset_index()
    
    pred_comb = pd.concat([pd.DataFrame({'MONTH': next_dates}), pred_temp['prediction_label']], axis = 1)
    pred_comb['DEP_ENT'] = i # add dep
    pred_comb.rename(columns = {'prediction_label':'PRED'}, inplace = True)
    
    # append to master dataframe
    all_predictions_rg = all_predictions_rg.append(pred_comb)

__________________________________________________________________________________________________________________________________________________________________________________________________

### 2
# Time Series Forecasting with PyCaret Time Series

Pycaret 3.04 time series documentation: https://pycaret.readthedocs.io/en/stable/api/time_series.html

In [24]:
### Data Prep for Time Series Anlysis ###

df_all_ts = df_all

# create time series dataset
df_all_ts = df_all_ts[["DEP_ENT", "MONTH", "NET_SALES"]]

# Notes on below TS loop
* Manual changes:
  * periods to include in the dates dataset created using next_dates
  * forecast horizon (fh) in predict_model function

In [43]:
### Time Series Loop ###

# Create empty dataframes
all_results_ts = pd.DataFrame()
all_predictions_ts = pd.DataFrame()

# list of each dep-ent
all_ts_ts = df_all_ts['DEP_ENT'].unique()

for i in tqdm(all_ts_ts):
    
    # temp df for a dep-ent
    df_subset = df_all_ts[df_all_ts['DEP_ENT'] == i]
    
    # trim
    df_subset_trim = df_subset[['MONTH', 'NET_SALES']] 
    
    # create series
    df_subset_s = df_subset_trim.set_index('MONTH')['NET_SALES']
    
    # set frequency of series
    df_subset_s = df_subset_s.asfreq('MS') 

    # test and train
    ts_train = round(len(df_subset_trim.index) * .8) # 80% train
    ts_test = len(df_subset_trim.index) - round(len(df_subset_trim.index) * .8) # 20% test
    
    # initialize setup
    s = pycts.setup(df_subset_s, numeric_imputation_target = "mean", fh = ts_test, session_id = 123)

    # model training and selection
    # - exclude = ["naive", "grand_means"]
    best_model = pycts.compare_models()
    
    # capture the compare result grid and store best model in list
    p = pycts.pull().iloc[0:1]
    p['DEP_ENT'] = str(i)
    
    all_results_ts = all_results_ts.append(p)
    
    ### Predict future periods ###
    
    # max date from original dataset
    max_timestamp = df_subset_trim['MONTH'].max()
    
    # create dataframe for future dates, in this case the rest of 2023. This is not for predicting but to join back up with predictions
    next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 7)] # Need to change range based on how many periods ahead you want to predict
    new_dates_df = pd.DataFrame({'MONTH': next_dates})
    new_dates_df["MONTH"] = pd.to_datetime(new_dates_df["MONTH"]) 
    
    # predict in unseen future
    predictions = pycts.predict_model(best_model, fh=6) # Need to change range fh for how many periods ahead you want to predict
    
    pred_temp = predictions.reset_index()
    
    pred_comb = pd.concat([pd.DataFrame({'MONTH': next_dates}), pred_temp['y_pred']], axis = 1)
    pred_comb['DEP_ENT'] = i # add dep
    pred_comb.rename(columns = {'y_pred':'PRED'}, inplace = True)
    
    # append to master dataframe
    all_predictions_ts = all_predictions_ts.append(pred_comb)

  0%|          | 0/14 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
grand_means,Grand Means Forecaster,0.7977,0.8547,1053265.9363,1403731.8462,0.7008,0.4233,-0.5374,0.0267
arima,ARIMA,0.8037,0.8586,1061451.186,1410308.2641,0.7087,0.426,-0.564,0.0333
croston,Croston,0.8728,0.925,1151466.3385,1519451.3422,0.7828,0.454,-0.9183,0.02
auto_arima,Auto ARIMA,1.0115,0.9801,1354243.2415,1611787.8779,0.8862,0.4952,-0.9171,0.1
theta,Theta Forecaster,1.18,1.1134,1571089.533,1832350.3154,1.1042,0.5611,-2.164,0.0233
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.3122,1.1909,1760022.2466,1963314.9082,1.2125,0.5963,-2.4527,0.1467
polytrend,Polynomial Trend Forecaster,1.3209,1.2308,1769582.2458,2027109.1468,1.1868,0.6019,-2.5287,0.02
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.3221,1.2317,1771161.5497,2028565.4072,1.1879,0.6023,-2.5371,0.09
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.3263,1.234,1777295.205,2032650.4381,1.1956,0.6026,-2.5751,0.1
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.327,1.2347,1778429.6522,2033861.9546,1.1974,0.6026,-2.5881,0.09


  7%|▋         | 1/14 [00:12<02:39, 12.24s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.9312,1.0491,737308.1653,996583.3509,0.2596,0.2097,-0.31,0.0933
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.933,1.0292,735319.1425,974875.8152,0.2577,0.2109,-0.2061,0.2333
croston,Croston,0.9352,1.0301,731127.802,969403.482,0.2396,0.2143,-0.1312,0.02
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.9369,1.0623,741273.1709,1008614.4614,0.2575,0.2101,-0.3377,0.1
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.9369,1.0623,741273.1709,1008614.4614,0.2575,0.2101,-0.3377,0.0933
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.9369,1.0623,741273.1709,1008614.4614,0.2575,0.2101,-0.3377,0.1
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.9369,1.0623,741273.1709,1008614.4614,0.2575,0.2101,-0.3377,0.0967
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.9369,1.0623,741273.1709,1008614.4614,0.2575,0.2101,-0.3377,0.1067
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.9453,1.0688,747652.2884,1015364.294,0.2654,0.2118,-0.3691,0.1067
grand_means,Grand Means Forecaster,0.9788,1.0161,764344.9872,954732.6748,0.2505,0.2245,-0.0954,0.0233


 14%|█▍        | 2/14 [00:26<02:42, 13.51s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
polytrend,Polynomial Trend Forecaster,1.4476,1.7843,781474.0883,1223134.1037,0.2273,0.2654,-0.2846,0.0233
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.4987,1.7814,788681.9811,1182924.0261,0.248,0.3118,-0.2119,0.1733
croston,Croston,1.4991,1.6396,817267.86,1125951.1509,0.2871,0.288,-0.0955,0.03
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.52,1.8261,803157.5763,1221701.0106,0.2496,0.3186,-0.2887,0.25
snaive,Seasonal Naive Forecaster,1.5242,1.5974,837899.8572,1132948.2511,0.2839,0.3037,-0.1849,0.0333
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.5483,1.7988,818473.7089,1201594.0124,0.2608,0.3271,-0.2409,0.3433
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.5919,1.8769,842639.1688,1264850.2839,0.2597,0.3381,-0.3685,0.38
stlf,STLF,1.6038,1.7392,854411.5502,1166993.6512,0.3238,0.3998,-0.1519,0.0267
grand_means,Grand Means Forecaster,1.6682,1.7418,916254.0694,1222055.1282,0.3118,0.3245,-0.3263,0.02
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.683,1.8817,892516.4542,1252690.4335,0.3052,0.3756,-0.3299,0.19


 21%|██▏       | 3/14 [00:45<02:56, 16.08s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,4.4587,5.4291,131439.3254,193782.6463,0.3915,0.4533,-0.1145,0.1633
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,4.5285,5.4642,138260.8338,198599.8208,0.4043,0.469,-0.1569,0.3133
polytrend,Polynomial Trend Forecaster,4.5497,5.4189,147277.8888,202943.0276,0.4534,0.4841,-0.223,0.0167
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,4.5581,5.4379,143119.0031,195944.3938,0.4272,0.4786,-0.1408,0.25
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,4.5772,5.4693,147114.0494,205637.2499,0.4364,0.4834,-0.2414,0.3633
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,4.6487,5.4587,155290.0501,204953.8535,0.4858,0.5012,-0.2397,0.3567
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,4.6509,5.4792,153289.4226,208097.3215,0.4768,0.4983,-0.2962,0.1667
croston,Croston,4.7212,5.7602,141043.3668,221985.9964,0.3556,0.4967,-0.4168,0.0267
stlf,STLF,4.7327,5.4327,160671.5271,207827.4508,0.5268,0.5228,-0.3008,0.03
snaive,Seasonal Naive Forecaster,4.7362,5.5076,146963.3695,196579.8868,0.4227,0.5927,-0.1668,0.04


 29%|██▊       | 4/14 [01:03<02:48, 16.82s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
croston,Croston,0.9425,0.7996,2707278.2967,3269980.6229,2.8923,0.4871,-0.1316,0.0167
stlf,STLF,0.9496,0.8474,2698550.7478,3424341.7108,1.1575,0.6856,-0.3832,0.03
polytrend,Polynomial Trend Forecaster,0.9759,0.8665,2790492.0261,3524931.4295,2.4417,0.5119,-0.3898,0.02
grand_means,Grand Means Forecaster,0.9945,0.8117,2869376.3769,3335455.3024,3.3229,0.5008,-0.1438,0.0267
naive,Naive Forecaster,1.0179,0.9602,2939039.8287,3977987.0612,1.7482,0.5734,-0.6369,0.0267
snaive,Seasonal Naive Forecaster,1.0295,0.8968,2930109.1522,3641950.6035,1.1727,0.5368,-0.5363,0.0367
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1085,0.9391,3147599.4636,3816522.9704,1.3602,0.7713,-0.6179,0.2467
arima,ARIMA,1.117,0.9893,3184073.7402,4018921.9652,0.652,0.8646,-0.8664,0.04
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.1313,0.9329,3208337.6866,3774804.6814,1.7457,0.8254,-0.6532,0.3167
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1368,0.9426,3215416.0864,3795184.0016,2.3612,0.8807,-0.6943,0.1633


 36%|███▌      | 5/14 [01:27<02:53, 19.30s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
grand_means,Grand Means Forecaster,0.8086,0.8461,340449.6958,455827.8008,1.24,0.6255,-0.1842,0.02
croston,Croston,0.8193,0.8455,345043.6208,455681.7374,1.2463,0.6302,-0.1622,0.0233
auto_arima,Auto ARIMA,0.8424,0.8716,354447.8115,466581.0408,1.2578,0.6556,-0.2475,0.26
theta,Theta Forecaster,0.8753,0.8534,368680.0127,460685.474,1.3925,0.6548,-0.1865,0.0267
snaive,Seasonal Naive Forecaster,0.8778,0.8897,369532.1249,477226.5829,1.1889,0.681,-0.2643,0.0433
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.9399,0.9365,395405.0049,502781.7611,1.4457,0.6794,-0.6238,0.1433
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.953,0.9414,401061.9601,506993.5477,1.4106,0.7088,-0.6468,0.1333
arima,ARIMA,0.9696,0.9447,408287.624,507813.6172,1.4157,0.7051,-0.388,0.0367
polytrend,Polynomial Trend Forecaster,0.9699,0.9088,408471.4234,490421.1254,1.6093,0.6923,-0.3588,0.02
naive,Naive Forecaster,0.9718,0.98,409603.0478,529470.7086,1.3267,0.7103,-0.4949,0.0267


 43%|████▎     | 6/14 [01:42<02:22, 17.76s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
croston,Croston,1.3912,1.3751,924171.9384,1163084.4323,3.4242,0.7072,-0.1894,0.0167
snaive,Seasonal Naive Forecaster,1.4493,1.5711,935015.6527,1307572.2354,4.0164,0.7481,-0.5243,0.0333
grand_means,Grand Means Forecaster,1.5378,1.4351,1038650.9544,1222207.9214,3.8547,0.76,-0.3172,0.0233
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.5845,1.5907,957212.6517,1261357.3351,3.9917,0.7856,-0.5122,0.16
theta,Theta Forecaster,1.6047,1.576,1000883.7723,1286731.5958,3.7804,0.7129,-0.4832,0.0233
naive,Naive Forecaster,1.6427,1.5319,1035140.7458,1253563.2242,3.6197,0.7592,-0.4107,0.0333
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.6775,1.6619,1044733.174,1352050.6536,4.1871,0.8365,-0.6555,0.3467
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.6942,1.6621,1028792.6347,1322736.0871,4.0868,0.8458,-0.6569,0.1733
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.7178,1.6829,1061905.9883,1360819.8195,4.0298,0.8486,-0.6941,0.3433
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.7252,1.695,1083249.0036,1386322.1836,3.9636,0.8885,-0.7302,0.31


 50%|█████     | 7/14 [02:03<02:13, 19.05s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
exp_smooth,Exponential Smoothing,0.8221,0.8802,1532262.352,2257967.2191,4.9763,1.3099,-0.0598,0.0267
arima,ARIMA,0.8764,0.8562,1630209.3911,2197687.3336,6.842,1.3001,-0.0026,0.04
grand_means,Grand Means Forecaster,0.8774,0.8574,1633026.144,2202745.9083,6.8555,1.3019,-0.0084,0.0233
auto_arima,Auto ARIMA,0.8782,0.8536,1634725.4288,2190271.6418,7.1263,1.2911,0.0048,0.1167
theta,Theta Forecaster,0.8922,0.8564,1661053.9666,2200221.8538,7.2245,1.3056,-0.0063,0.0233
naive,Naive Forecaster,0.893,0.9627,1657083.2015,2465088.347,4.5893,1.5481,-0.3039,0.03
croston,Croston,0.898,0.8545,1669338.9999,2196259.5028,7.3592,1.3014,-0.0023,0.02
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8996,0.8551,1676250.1442,2193870.4263,7.468,1.3073,0.0007,0.15
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.9043,0.8572,1686532.9138,2200598.3189,7.5484,1.3094,-0.0064,0.0933
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.9053,0.8566,1687567.076,2198028.3422,7.5684,1.3082,-0.0035,0.1033


 57%|█████▋    | 8/14 [02:16<01:42, 17.02s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(85, 1)"
5,Transformed data shape,"(85, 1)"
6,Transformed train set shape,"(68, 1)"
7,Transformed test set shape,"(17, 1)"
8,Rows with missing values,1.2%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
theta,Theta Forecaster,0.7593,0.658,203932.038,239083.425,17.5496,1.2278,-0.034,0.03
polytrend,Polynomial Trend Forecaster,0.764,0.7101,206032.0042,261008.8061,12.7071,1.3648,-0.2468,0.02
grand_means,Grand Means Forecaster,0.7753,0.6535,208500.637,237018.2701,21.1734,1.1745,-0.0183,0.0233
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.7766,0.7283,217790.4439,274824.0527,14.7614,1.3306,-0.5425,0.27
croston,Croston,0.7966,0.6675,215013.0021,242671.3411,22.932,1.1665,-0.0704,0.02
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.8265,0.7796,233360.0416,294971.7216,8.1706,1.4051,-0.7795,0.13
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.827,0.7435,232755.4843,283070.5657,11.4646,1.4491,-0.7114,0.2433
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.836,0.8083,235234.4345,304891.5003,16.1299,1.3479,-0.8734,0.1133
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.8758,0.7779,249754.7132,298435.1684,13.6113,1.3333,-0.9758,0.0967
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.8774,0.7775,250184.8638,298318.6898,13.7571,1.3348,-0.9765,0.0933


 64%|██████▍   | 9/14 [02:30<01:20, 16.07s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
grand_means,Grand Means Forecaster,1.2368,1.309,920082.2451,1143575.2413,0.2201,0.2007,-0.116,0.0233
arima,ARIMA,1.2931,1.3378,961908.8034,1168650.5619,0.2256,0.2103,-0.1662,0.0433
auto_arima,Auto ARIMA,1.2931,1.3378,961908.8034,1168650.5619,0.2256,0.2103,-0.1662,0.0867
croston,Croston,1.3085,1.4133,974675.1369,1237215.7674,0.2212,0.2133,-0.3274,0.0167
polytrend,Polynomial Trend Forecaster,1.4581,1.5577,1082297.8903,1360852.9445,0.2585,0.2333,-0.5768,0.02
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.4975,1.5856,1111507.1988,1385033.8662,0.2643,0.2396,-0.6309,0.1033
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.507,1.5892,1118335.856,1387789.2165,0.2665,0.2407,-0.6349,0.0967
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.5258,1.6011,1132105.8987,1397937.5245,0.2696,0.2435,-0.6581,0.0967
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.5258,1.6011,1132105.8987,1397937.5245,0.2696,0.2435,-0.6581,0.0933
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.5258,1.6011,1132105.8987,1397937.5245,0.2696,0.2435,-0.6581,0.1


 71%|███████▏  | 10/14 [02:43<01:00, 15.04s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
stlf,STLF,1.3642,1.2086,2171171.4693,2758954.8394,0.4559,0.372,0.0673,0.03
snaive,Seasonal Naive Forecaster,1.3819,1.2327,2249459.8569,2836506.1491,0.5226,0.3762,-0.0532,0.0367
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.4081,1.278,2221810.9794,2902284.339,0.4453,0.3631,-0.031,0.3467
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.4119,1.2385,2251577.3122,2831217.4238,0.5514,0.3666,-0.0024,0.1767
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.4202,1.2661,2256051.0484,2882531.0763,0.4683,0.3693,-0.0254,0.1567
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.4395,1.2813,2291098.1834,2918275.247,0.4843,0.3739,-0.0422,0.3433
arima,ARIMA,1.4486,1.2808,2328867.707,2934536.1048,0.4603,0.3952,-0.0885,0.0333
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.4575,1.3042,2311330.3258,2964932.7316,0.4602,0.3731,-0.0822,0.25
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.4624,1.3636,2313506.8903,3102624.1068,0.4678,0.376,-0.1603,0.3167
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.4671,1.3131,2315709.4559,2981673.1044,0.4398,0.3786,-0.0794,0.1567


 79%|███████▊  | 11/14 [03:01<00:48, 16.13s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(57, 1)"
5,Transformed data shape,"(57, 1)"
6,Transformed train set shape,"(46, 1)"
7,Transformed test set shape,"(11, 1)"
8,Rows with missing values,1.8%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,1.4134,1.243,26679.4414,35796.2666,7.0594,1.0379,-0.1367,0.0433
grand_means,Grand Means Forecaster,1.4285,1.2299,26772.0156,35220.0468,6.9329,1.0579,-0.0596,0.0233
auto_arima,Auto ARIMA,1.4285,1.2299,26772.0156,35220.0468,6.9329,1.0579,-0.0596,0.0967
theta,Theta Forecaster,1.4402,1.292,26774.5523,36557.5179,7.911,1.0362,-0.242,0.0267
croston,Croston,1.5887,1.347,29464.6918,38095.1962,11.8642,1.0113,-0.6571,0.0233
naive,Naive Forecaster,1.6526,1.4382,31832.6745,41779.3983,17.8214,0.9438,-0.5651,0.0367
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.7869,1.5138,34240.3071,44155.7903,13.0114,1.3598,-1.3309,0.1967
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.7941,1.5058,33609.2749,42768.011,11.9112,1.3577,-1.8177,0.15
polytrend,Polynomial Trend Forecaster,1.7963,1.5109,33259.7545,42833.0598,6.9379,1.4597,-1.7843,0.02
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.834,1.5366,33966.5592,43693.1313,5.4084,1.5394,-1.7905,0.0733


 86%|████████▌ | 12/14 [03:12<00:29, 14.59s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(90, 1)"
5,Transformed data shape,"(90, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(18, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.152,1.2578,217230.5206,280220.8537,27.4182,0.5796,0.1917,0.26
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.1663,1.3033,220167.3744,290744.132,31.2027,0.5532,0.1155,0.3167
snaive,Seasonal Naive Forecaster,1.1985,1.2402,225701.7823,277107.6545,22.33,0.605,0.2107,0.0367
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.2718,1.357,239878.2232,303201.4166,29.5957,0.6273,0.0456,0.37
grand_means,Grand Means Forecaster,1.2868,1.5113,244710.6215,341262.4117,31.3057,0.5591,-0.2241,0.0233
arima,ARIMA,1.3121,1.3212,246952.4698,295379.777,22.4059,0.6668,0.1125,0.04
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.3237,1.3771,248377.149,305749.0899,26.5473,0.6498,0.0338,0.1833
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.3337,1.3994,250614.9861,310898.0155,29.5308,0.634,-0.0108,0.3433
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.3718,1.4318,257707.9642,318600.0111,36.357,0.6389,-0.0639,0.1733
stlf,STLF,1.3756,1.3508,260639.7985,304162.2741,27.2601,0.6941,0.0046,0.03


 93%|█████████▎| 13/14 [03:34<00:16, 16.79s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(53, 1)"
5,Transformed data shape,"(53, 1)"
6,Transformed train set shape,"(42, 1)"
7,Transformed test set shape,"(11, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
auto_arima,Auto ARIMA,1.3929,1.5466,2028.6996,2750.1185,0.4794,0.4173,-0.3022,0.0767
arima,ARIMA,1.393,1.5452,2029.576,2748.5019,0.4812,0.4176,-0.3076,0.04
grand_means,Grand Means Forecaster,1.3957,1.5475,2034.4576,2752.8021,0.4805,0.4185,-0.3049,0.0233
naive,Naive Forecaster,1.3968,1.529,2065.3003,2754.2822,0.5376,0.4235,-0.4375,0.0333
exp_smooth,Exponential Smoothing,1.4467,1.5711,2099.0318,2785.0159,0.4583,0.4389,-0.3085,0.0267
theta,Theta Forecaster,1.448,1.5847,2112.0139,2818.624,0.4825,0.4358,-0.3289,0.0267
croston,Croston,1.4544,1.5587,2131.1461,2784.165,0.4828,0.441,-0.2806,0.0167
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.5908,1.6825,2409.8932,3104.5106,0.6196,0.482,-0.7383,0.0767
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.6036,1.6307,2449.5356,3028.6899,0.6169,0.489,-0.6282,0.16
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.6348,1.7064,2490.149,3174.2738,0.6397,0.4946,-0.8044,0.0733


100%|██████████| 14/14 [03:45<00:00, 16.13s/it]
