use newpycaret env

In [5]:
# Packages
import snowflake.connector
import pandas as pd
import os
import numpy as np
import xlsxwriter
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.impute import SimpleImputer
from tqdm import tqdm

# import the regression module from pycaret   
#import pycaret.regression as pycr

# import the time seris module from pycaret
import pycaret.time_series as pycts

2. Load Data
_______________________________________________________________

### To Do: copy and paste in to a new chunk, enter credentials and run to save in environment. Then delete chunk
%env snowflakeuser=<your_snowflake_username> <br>
%env snowflakepass=<your_snowflake_password>

In [3]:
# Query Snowflake

# Snowflake connection parameters
connection_params = {
    "user": os.environ['snowflakeuser'],
    "password": os.environ['snowflakepass'],
    "account": "zib52348.us-east-1",
    "role": "ACCOUNTADMIN",
    "warehouse": "REPORTING",
    "database": "ANALYTICS",
    "schema": "FORECASTING",
}

# Establish a connection to Snowflake
conn = snowflake.connector.connect(**connection_params)

with open('net_sales_query.sql', 'r') as query:
    # connection == the connection to your database, in your case prob_db
    df_in = pd.read_sql_query(query.read(),conn)

# Close the connection
conn.close()

In [4]:
### 1. Data prep ###

df = df_in

# convert month field to date
df["MONTH"] = pd.to_datetime(df["MONTH"])

# ***** ADJUSTABLE ***** #
# current params - want to get forecasts for 2023 (Jan - Dec) so holding out data 
df = df[df['MONTH'] <= pd.Timestamp(2022,12,1)]

# Some random months will have data that we want to remove (* Want to test without July though)
df = df[["DEP_ENT", "MONTH", "NET_SALES"]] # select fields of interest
df = df.sort_values(['DEP_ENT', 'MONTH']) # reorder dataframe


### 2. Spot checks ###

# check data types
print(df.dtypes)
print("-----")

# check the unique time_series. 14 different department-entity combinations
print(df['DEP_ENT'].nunique())
print("-----")

# check how many months for each dep-ent. 3 dep-ent do not have all months of data
print(df.groupby(['DEP_ENT']).size())
print("-----")

# check for nulls. No nulls
print(df.isnull().sum())

DEP_ENT              object
MONTH        datetime64[ns]
NET_SALES           float64
dtype: object
-----
14
-----
DEP_ENT
160_155    84
170_155    84
200_155    84
200_310    84
210_155    84
210_165    84
210_310    84
220_155    84
220_310    78
240_155    84
250_155    84
250_165    50
250_310    84
260_155    47
dtype: int64
-----
DEP_ENT      0
MONTH        0
NET_SALES    0
dtype: int64


# Multiple Series with Pycaret
Pycaret 3.04 time series documentation: https://pycaret.readthedocs.io/en/stable/api/time_series.html

* could do with regression as well: https://pycaret.readthedocs.io/en/stable/api/regression.html

In [8]:
### Additional data prep ###

# create series
df_s = df.set_index(['DEP_ENT','MONTH'])['NET_SALES']
# convert back to dataframe
df_d = df_s.to_frame()
# impute
mean_imputer = SimpleImputer(strategy='mean')
df_d['NET_SALES'] = mean_imputer.fit_transform(df_d['NET_SALES'].values.reshape(-1,1))
#reset index
df_d.reset_index(inplace=True)

In [13]:
### Time Series Loop ###

# Create empty dataframes
all_results_ts = pd.DataFrame()
all_predictions_ts = pd.DataFrame()

# list of each dep-ent
all_ts_ts = df_d['DEP_ENT'].unique()

for i in tqdm(all_ts_ts):
    
    # temp df for a dep-ent
    df_subset = df_d[df_d['DEP_ENT'] == i]
    
    # trim
    df_subset_trim = df_subset[['MONTH', 'NET_SALES']] 
    
    # create series
    df_subset_s = df_subset_trim.set_index('MONTH')['NET_SALES']
    
    # set frequency of series
    df_subset_s = df_subset_s.asfreq('MS') 

    # test and train
    ts_train = round(len(df_subset_trim.index) * .8) # 80% train
    ts_test = len(df_subset_trim.index) - round(len(df_subset_trim.index) * .8) # 20% test
    
    # initialize setup
    #s = pycts.setup(df_subset_s, numeric_imputation_target = "mean", fh = ts_test, session_id = 123)
    s = pycts.setup(df_subset_s, fh=12, numeric_imputation_target = "mean", session_id = 123)

    # model training and selection
    best_model = pycts.compare_models(sort = 'MAPE', exclude = ["naive", "grand_means", "croston", ])
    #best_model = pycts.compare_models(sort = 'MAPE', include = ["arima", "auto_arima", "ets", ])
    
    # plot trained model
    pycts.plot_model(best_model, plot = 'forecast')
    
    # capture the compare result grid and store best model in list
    p = pycts.pull().iloc[0:1]
    p['DEP_ENT'] = str(i)
    
    all_results_ts = all_results_ts.append(p)
    
    ### Predict future periods ###
    
    # max date from original dataset
    max_timestamp = df_subset_trim['MONTH'].max()
    
    # create dataframe for future dates, in this case the rest of 2023. This is not for predicting but to join back up with predictions
    #next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 7)] # Need to change range based on how many periods ahead you want to predict
    
    next_dates = [max_timestamp.replace(day=1) + pd.DateOffset(months=i) for i in range(1, 13)] # *** CHANGE IF WANT TO FORECAST DIFFERENT THAN 12 ***
    
    new_dates_df = pd.DataFrame({'MONTH': next_dates})
    new_dates_df["MONTH"] = pd.to_datetime(new_dates_df["MONTH"]) 
    
    # predict in unseen future
    #predictions = pycts.predict_model(best_model, fh=6) # Need to change range fh for how many periods ahead you want to predict
    
    predictions = pycts.predict_model(best_model, fh=12) # *** CHANGE IF WANT TO FORECAST DIFFERENT THAN 12 ***
    
    pred_temp = predictions.reset_index()
    
    pred_comb = pd.concat([pd.DataFrame({'MONTH': next_dates}), pred_temp['y_pred']], axis = 1)
    pred_comb['DEP_ENT'] = i # add dep
    pred_comb.rename(columns = {'y_pred':'PRED'}, inplace = True)
    
    # append to master dataframe
    all_predictions_ts = all_predictions_ts.append(pred_comb)

  0%|          | 0/14 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
auto_arima,Auto ARIMA,0.8269,0.8019,1050726.2749,1301393.0242,0.8359,0.4476,-0.6113,0.88
arima,ARIMA,0.8361,0.8097,1062904.8315,1314337.4607,0.8466,0.4515,-0.6535,0.8467
polytrend,Polynomial Trend Forecaster,1.0438,0.9633,1339894.2434,1569133.3983,1.1036,0.5241,-1.7843,1.03
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.0805,1.0483,1385230.2809,1710670.3708,1.1046,0.522,-2.3625,0.09
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.0441,0.9635,1340480.5478,1569563.7466,1.1047,0.524,-1.7929,0.0767
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.1839,1.0985,1524289.2788,1798450.85,1.1068,0.6066,-2.7834,0.19
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.0487,0.9648,1346755.5098,1571943.4278,1.1147,0.5253,-1.821,0.1267
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.053,0.9681,1352377.9911,1577265.2655,1.1189,0.5269,-1.8437,0.1333
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.053,0.9681,1352377.9911,1577265.2655,1.1189,0.5269,-1.8437,0.1367
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.053,0.9681,1352377.9911,1577265.2655,1.1189,0.5269,-1.8437,0.14


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,0.4405,0.4535,620399.5242,816569.1645,0.2003,0.2146,-0.2699


  7%|▋         | 1/14 [00:28<06:08, 28.38s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
auto_arima,Auto ARIMA,1.425,1.363,897557.9381,1086251.9565,0.3276,0.2677,-2.1373,0.2967
theta,Theta Forecaster,1.5971,1.5436,1005427.2993,1228365.606,0.3557,0.3024,-2.406,0.0333
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.6292,1.5551,1029211.814,1245073.7152,0.3681,0.3025,-3.7996,0.5133
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.6557,1.567,1041451.7377,1246053.8027,0.3774,0.3081,-2.6678,0.2
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.6168,1.5341,1014828.7998,1218464.3773,0.3802,0.2926,-2.9101,0.08
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,1.6291,1.5405,1022786.5114,1223420.0892,0.3809,0.296,-2.8689,0.0867
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,1.6291,1.5405,1022786.5114,1223420.0892,0.3809,0.296,-2.8689,0.0833
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,1.6291,1.5405,1022786.5114,1223420.0892,0.3809,0.296,-2.8689,0.0833
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,1.6291,1.5405,1022786.5114,1223420.0892,0.3809,0.296,-2.8689,0.08
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,1.6291,1.5405,1022786.5114,1223420.0892,0.3809,0.296,-2.8689,0.08


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,1.4581,1.3985,1033690.3146,1306252.1809,0.2351,0.2523,-0.1992


 14%|█▍        | 2/14 [00:44<04:16, 21.34s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
ets,ETS,1.7039,1.7516,775296.3733,1003066.1056,0.2363,0.2766,0.1209,0.06
theta,Theta Forecaster,1.9184,1.9038,902748.3539,1176315.5894,0.2652,0.2889,-0.5369,0.03
polytrend,Polynomial Trend Forecaster,2.0622,2.2202,932636.6831,1266844.5048,0.2681,0.3026,-0.3958,0.0233
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,2.227,2.1228,1001041.7135,1222064.0515,0.2989,0.3798,-0.3169,0.12
snaive,Seasonal Naive Forecaster,2.0631,1.8998,994582.1911,1190315.2471,0.3025,0.3293,-0.7098,0.0467
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,2.1734,2.04,990194.9687,1178863.8396,0.3058,0.3783,-0.2268,0.2267
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,2.1984,2.1185,998141.771,1222535.431,0.3089,0.3977,-0.3198,0.13
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,2.2517,2.1396,1014426.35,1227948.4722,0.3094,0.3927,-0.3191,0.1467
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,2.2555,2.1218,1017054.6039,1216517.1591,0.3094,0.3858,-0.2945,0.1233
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,2.2805,2.1272,1038202.3838,1227156.5254,0.3188,0.39,-0.3329,0.25


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,ETS,0.6203,0.5515,484153.7721,614095.3267,0.178,0.1829,0.3482


 21%|██▏       | 3/14 [01:01<03:30, 19.12s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
snaive,Seasonal Naive Forecaster,1.2744,1.0399,143273.4681,187177.869,0.3404,0.3971,-0.1389,0.0467
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.233,1.1068,138126.5565,199746.3656,0.3531,0.3081,-0.2013,0.14
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.3043,1.0674,145781.5195,192633.3658,0.3951,0.3315,-0.0974,0.2233
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.3075,1.0874,146514.589,196649.5083,0.3988,0.3303,-0.1776,0.2667
polytrend,Polynomial Trend Forecaster,1.3121,1.0722,146680.2601,193420.0843,0.4087,0.3377,-0.114,0.0233
stlf,STLF,1.2908,1.066,146090.122,194336.9492,0.4115,0.337,-0.2857,0.0333
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.4021,1.1554,157605.5978,208900.8217,0.4202,0.3536,-0.3982,0.28
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,1.3858,1.2541,156623.0997,228786.7228,0.4204,0.339,-0.9382,0.12
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.4333,1.1799,161858.163,214293.5227,0.4363,0.3489,-0.5586,0.13
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.4886,1.144,166488.0184,206998.6676,0.4742,0.3861,-0.2774,0.12


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Seasonal Naive Forecaster,1.4455,1.2146,190035.3499,234810.7016,0.8006,0.5005,-6.2908


 29%|██▊       | 4/14 [01:17<02:59, 17.96s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,1.2522,1.1589,4043154.4272,5078639.2853,2.0841,0.7726,-3.0554,0.1067
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.9888,0.9405,3182473.1275,4108322.5237,2.2821,0.6217,-1.3629,0.1033
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.9826,0.9279,3186360.9365,4089805.979,2.6299,0.6202,-2.1924,0.1133
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.045,0.9989,3363387.6713,4363862.5067,2.6313,0.6598,-1.6733,0.25
exp_smooth,Exponential Smoothing,0.939,0.9207,3018712.0812,4017163.556,2.7981,0.5668,-1.1845,0.0633
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.9068,0.8875,2919351.2222,3894490.0024,2.8278,0.5666,-1.4481,0.2467
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1298,1.0773,3636539.5869,4725226.1198,2.8762,0.6874,-2.5497,0.5
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.9442,0.8884,3047927.2073,3894392.9412,3.18,0.5969,-1.3813,0.0967
polytrend,Polynomial Trend Forecaster,0.7733,0.7243,2478329.7128,3152049.9015,3.2891,0.4578,-0.248,0.0267
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.9274,0.8765,2992740.4277,3850088.6835,3.3316,0.5741,-1.4855,0.26


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,DecisionTreeRegressor,1.1454,0.9912,3533207.1196,4055081.5266,3.4236,0.8374,-0.0679


 36%|███▌      | 5/14 [01:37<02:48, 18.71s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
exp_smooth,Exponential Smoothing,0.8401,0.8162,354312.8147,451863.8861,1.0555,0.739,-1.0524,0.0633
theta,Theta Forecaster,0.781,0.7393,329520.0477,410786.7195,1.2959,0.6352,-0.458,0.03
ets,ETS,0.9,0.8895,379795.4153,493746.8344,1.303,0.6981,-1.1867,0.0433
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.864,0.8536,364269.9958,472746.6021,1.3989,0.6487,-1.2691,0.1133
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.9541,0.9097,402128.1129,503172.0999,1.4071,0.7236,-1.6718,0.25
polytrend,Polynomial Trend Forecaster,0.8279,0.7749,349218.5489,430259.5189,1.4076,0.6573,-0.6417,0.0267
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.9499,0.9048,400547.0723,501382.2824,1.4099,0.7073,-1.5627,0.2467
auto_arima,Auto ARIMA,0.7729,0.7399,325586.7817,408782.8273,1.4118,0.6417,-0.7269,0.2933
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.8847,0.8477,372960.14,469303.9807,1.4239,0.6711,-1.2694,0.0967
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.8847,0.8477,372960.14,469303.9807,1.4239,0.6711,-1.2694,0.1


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Exponential Smoothing,0.69,0.6139,271958.2815,313408.332,1.3012,0.8401,-0.6539


 43%|████▎     | 6/14 [01:52<02:19, 17.49s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,0.9002,0.9969,717661.2116,1015809.1676,1.3294,0.8416,-0.6255,0.0433
stlf,STLF,0.8619,0.9456,685324.1178,957861.0615,1.3894,0.7694,-0.5524,0.04
exp_smooth,Exponential Smoothing,0.9591,1.0301,766819.0106,1061851.3095,1.4,0.899,-0.3123,0.0633
auto_arima,Auto ARIMA,0.958,1.0441,764900.0127,1067115.1078,1.4957,0.8169,-0.6753,0.77
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.1895,1.3404,951638.8921,1376624.8733,1.6155,0.9054,-0.8913,0.13
ets,ETS,0.8946,0.9453,712034.1862,959322.9747,1.6803,0.7381,-0.4258,0.0433
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.0477,1.1677,838412.2976,1202147.0701,1.6921,0.828,-0.4519,0.12
snaive,Seasonal Naive Forecaster,0.9347,0.9985,745043.5492,1015750.8391,1.6985,0.782,-0.5279,0.0433
polytrend,Polynomial Trend Forecaster,1.0084,1.0908,810046.6719,1130949.3834,1.7197,0.7031,-0.1466,0.03
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.9944,1.1384,796640.013,1172713.9374,1.7223,0.7999,-0.5267,0.1533


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,ARIMA,1.6542,1.6766,1271362.9952,1744487.1021,13.502,1.2106,-1.042


 50%|█████     | 7/14 [02:10<02:03, 17.64s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.9512,0.8889,1930690.4976,2620319.7921,5.5604,1.5041,-0.41,0.2067
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8498,0.7879,1731786.4192,2350362.1153,6.5668,1.4159,-0.2047,0.0967
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.8863,0.801,1816372.5226,2402518.3695,7.1417,1.3581,-0.304,0.2233
arima,ARIMA,0.8501,0.7463,1730037.4555,2210640.9764,7.9116,1.3364,-0.0277,0.04
exp_smooth,Exponential Smoothing,0.8563,0.7494,1729444.8849,2221812.0488,8.2626,1.3326,-0.0403,0.03
auto_arima,Auto ARIMA,0.8764,0.7404,1787965.77,2191850.9808,8.3784,1.3569,-0.009,0.15
theta,Theta Forecaster,0.8771,0.7552,1787262.2748,2241060.7289,8.4655,1.3502,-0.0651,0.03
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8904,0.7498,1815260.5565,2224977.576,8.9112,1.3498,-0.0536,0.2267
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.9047,0.7587,1847988.6031,2254276.7679,9.1786,1.3537,-0.0892,0.09
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.9046,0.7575,1847684.256,2250476.0161,9.1955,1.3521,-0.0861,0.0867


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,ExtraTreesRegressor,0.7687,0.7569,1695014.0925,2402983.1974,7.8813,1.3057,-0.5116


 57%|█████▋    | 8/14 [02:24<01:39, 16.61s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(79, 1)"
5,Transformed data shape,"(79, 1)"
6,Transformed train set shape,"(67, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,1.3%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.7419,0.7163,189684.7339,245707.7065,5.1809,1.1752,-0.2612,0.1067
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,0.5577,0.5466,139880.5638,185025.3059,6.103,1.1289,0.3015,0.09
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.7245,0.6552,181545.468,221301.1755,6.8483,1.3164,0.0637,0.1867
ets,ETS,0.8877,0.9239,230160.0023,323558.6003,7.4333,1.2975,-1.7234,0.04
snaive,Seasonal Naive Forecaster,0.9026,0.8543,234216.9534,297029.5889,8.1322,1.3619,-1.1104,0.0433
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.6869,0.667,174603.3096,227452.8384,8.3264,1.2858,-0.1091,0.2233
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.5888,0.546,147795.8042,184594.0451,9.5485,1.074,0.3221,0.09
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.5888,0.546,147795.8042,184594.0451,9.5485,1.074,0.3221,0.0867
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.5888,0.546,147795.8042,184594.0451,9.5485,1.074,0.3221,0.0933
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.5888,0.546,147795.8042,184594.0451,9.5485,1.074,0.3221,0.0867


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,AdaBoostRegressor,0.6669,0.5549,150195.607,171441.7275,8.372,1.4111,-5.083


 64%|██████▍   | 9/14 [02:39<01:19, 15.87s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.9112,0.8867,809698.4753,983227.8222,0.1791,0.1807,-0.4486,0.1133
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8966,0.8928,795259.3474,988690.3758,0.1849,0.1763,-0.4547,0.2267
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.0921,1.0729,972260.007,1190295.8347,0.2047,0.2173,-1.3205,0.1733
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,1.1187,1.057,992649.951,1171061.973,0.2243,0.2193,-1.0584,0.26
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.2258,1.128,1088380.0795,1249005.335,0.2431,0.2448,-1.4515,0.2433
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.2816,1.1684,1136511.3765,1294575.5264,0.2516,0.2526,-1.5087,0.0933
polytrend,Polynomial Trend Forecaster,1.2169,1.1597,1078221.9872,1283767.1609,0.2531,0.2396,-1.4649,0.02
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,1.2645,1.193,1121164.4567,1320918.299,0.2587,0.2497,-1.5928,0.0833
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.2526,1.175,1109879.7185,1300609.8643,0.2598,0.2467,-1.5259,0.0933
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,1.2762,1.2022,1131670.6305,1331206.2427,0.2606,0.2523,-1.6351,0.0867


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,AdaBoostRegressor,1.1999,1.1204,1080258.9827,1264250.3066,0.2367,0.2093,-0.0734


 71%|███████▏  | 10/14 [02:54<01:02, 15.59s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.9948,0.925,1808125.1641,2225854.7723,0.2687,0.2919,0.1356,0.1233
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.9956,0.9617,1808816.6841,2312795.0083,0.2774,0.2995,0.1053,0.2767
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,1.0065,0.9678,1828259.3469,2327079.1269,0.281,0.2893,0.104,0.2367
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.0304,0.935,1871390.2511,2250251.2136,0.2863,0.2993,0.0752,0.2667
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,1.0257,0.9587,1864512.9254,2306664.4579,0.2872,0.305,0.0716,0.1467
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,1.0537,0.9905,1913077.7486,2382286.7801,0.3009,0.3147,0.0303,0.2833
stlf,STLF,1.1147,1.0414,2010115.5901,2498804.2591,0.3075,0.346,0.0128,0.0367
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,1.1056,1.0708,2007027.4526,2574663.1451,0.3081,0.3376,-0.1041,0.1467
arima,ARIMA,1.1602,1.1269,2097606.8529,2706819.3628,0.3107,0.3562,-0.2099,0.0433
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,1.1056,1.0458,1998103.202,2511082.7213,0.3163,0.3411,-0.0161,0.1467


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,BayesianRidge,0.8932,0.7509,1783304.1642,1970141.9954,0.2388,0.2271,0.5686


 79%|███████▊  | 11/14 [03:15<00:51, 17.24s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(51, 1)"
5,Transformed data shape,"(51, 1)"
6,Transformed train set shape,"(39, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,2.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,1.1436,0.9963,25300.2059,35286.1421,12.7462,1.0303,-0.4589,0.0567
auto_arima,Auto ARIMA,1.3374,1.1292,33639.1662,43251.5044,32.5163,1.1309,-4.4522,0.1333
ets,ETS,2.5892,2.0475,74994.8117,88684.4887,39.5121,1.6286,-74.9515,0.03
theta,Theta Forecaster,2.2375,1.8513,70575.2809,85585.3144,41.6883,1.5771,-80.6318,0.0267
exp_smooth,Exponential Smoothing,2.9568,2.3508,90653.2732,106603.781,52.9016,1.6354,-134.8732,0.04
polytrend,Polynomial Trend Forecaster,3.3663,2.728,115249.1398,135593.7487,77.4335,1.733,-287.9826,0.0333
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,3.5282,2.8059,119657.4236,139178.2257,82.4026,1.8751,-300.0693,0.2033
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,3.4713,2.7875,119204.1257,139553.5162,82.5079,1.7925,-298.1482,1.23
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,3.5305,2.8004,119713.7643,138858.7629,84.5627,1.8685,-298.3896,0.0967
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,3.5621,2.8082,120427.4955,139161.063,84.8162,1.8523,-298.4164,0.0833


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,ARIMA,0.7586,0.4874,21324.0073,24661.5117,18.0433,1.1922,-0.0368


 86%|████████▌ | 12/14 [03:52<00:46, 23.34s/it]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,NET_SALES
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(84, 1)"
5,Transformed data shape,"(84, 1)"
6,Transformed train set shape,"(72, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
snaive,Seasonal Naive Forecaster,1.1404,1.1202,235253.2089,285035.5118,33.2292,0.6875,0.2163,0.0467
arima,ARIMA,1.1772,1.1622,242439.9851,295122.0235,33.2697,0.7555,0.182,0.0433
auto_arima,Auto ARIMA,0.9066,0.9055,186977.2438,230448.9618,34.03,0.576,0.486,0.98
theta,Theta Forecaster,0.8978,1.0049,184175.3197,253030.9731,35.2769,0.5134,0.4281,0.03
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,1.0471,1.0723,215985.6677,272497.7411,36.1851,0.6328,0.2933,0.1133
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.9397,1.0145,193522.4645,256898.6564,39.9866,0.5253,0.386,0.1467
omp_cds_dt,Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending,0.9501,1.0233,195396.4318,259125.8505,40.0933,0.5374,0.3697,0.1167
stlf,STLF,1.2569,1.216,257577.3311,307365.0046,40.4817,0.7976,0.1341,0.0367
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.9005,0.9938,185361.9319,251785.5191,40.7671,0.5213,0.4155,0.2533
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.9449,1.0071,194806.1775,255258.7798,40.9852,0.5415,0.3966,0.1267


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Seasonal Naive Forecaster,0.8471,0.9593,188432.1611,263875.3922,0.3794,0.3583,0.6769


 93%|█████████▎| 13/14 [04:11<00:19, 19.33s/it]


ValueError: Not Enough Data Points, set a lower number of folds or fh

* Error above - 26_155 didnt have enough data
Merge data

In [18]:
# join the model info (just name for now)
model_names = all_results_ts[['Model', 'DEP_ENT']]
merged = pd.merge(all_predictions_ts, model_names, on =['DEP_ENT'])

In [19]:
#initial sales pull
sales = df_in
# select cols
sales = sales[["DEP_ENT", "MONTH", "NET_SALES"]]
# only include sales data for the full months we have
first_of_month = datetime.today().replace(day=1).date()
sales = sales[sales['MONTH'] < pd.to_datetime(first_of_month)]

# combine prediction data and original sales data
merged2 = pd.merge(merged, sales, how = 'left', on =['DEP_ENT', 'MONTH'])

In [21]:
### Budget ###

# Establish a connection to Snowflake
conn = snowflake.connector.connect(**connection_params)

with open('budget_query.sql', 'r') as query:
    # connection == the connection to your database, in your case prob_db
    budg = pd.read_sql_query(query.read(),conn)

# Close the connection
conn.close()

# convert month to datetime
budg["MONTH"] = pd.to_datetime(budg["MONTH"])
# select cols
budg = budg[["MONTH", "BUDGET_AMOUNT", "DEP_ENT"]]

In [22]:
# combine prediction/sales data with budget data
merged3 = pd.merge(merged2, budg, how = 'left', on =['DEP_ENT', 'MONTH'])

In [23]:
# write csv
merged3.to_csv('pycaret_results.csv')