In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
import pickle
import warnings

from sklearn.model_selection import TimeSeriesSplit
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA
import warnings
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import plot_cross_validation_metric

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


# 0 Importing Data

In [2]:
# Load your dataset
merge_df_scaled = pd.read_csv('../raw_data/merge_df_resize.csv')
merge_df_scaled['date'] = pd.to_datetime(merge_df_scaled['date'])
merge_df_scaled.set_index('date', inplace=True)

merge_df_scaled
# 382600 rows × 64 columns

Unnamed: 0_level_0,id,item_id,dept_id,state_id,sales,wday,month,year,event_name_2,snap_CA,...,event_name_1_StPatricksDay,event_name_1_SuperBowl,event_name_1_Thanksgiving,event_name_1_ValentinesDay,event_name_1_VeteransDay,event_name_1_missing,wday_sin,wday_cos,month_sin,month_cos
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-29,FOODS_2_197_CA_1_validation,FOODS_2_197,FOODS_2,CA,38,1,1,0.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.781832,0.623490,0.500000,0.866025
2011-01-29,FOODS_3_080_CA_1_validation,FOODS_3_080,FOODS_3,CA,33,1,1,0.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.781832,0.623490,0.500000,0.866025
2011-01-29,FOODS_3_090_CA_1_validation,FOODS_3_090,FOODS_3,CA,107,1,1,0.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.781832,0.623490,0.500000,0.866025
2011-01-29,FOODS_3_120_CA_1_validation,FOODS_3_120,FOODS_3,CA,0,1,1,0.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.781832,0.623490,0.500000,0.866025
2011-01-29,FOODS_3_252_CA_1_validation,FOODS_3_252,FOODS_3,CA,19,1,1,0.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.781832,0.623490,0.500000,0.866025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-04-24,FOODS_3_555_CA_1_validation,FOODS_3_555,FOODS_3,CA,24,2,4,1.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.974928,-0.222521,0.866025,-0.500000
2016-04-24,FOODS_3_586_CA_1_validation,FOODS_3_586,FOODS_3,CA,54,2,4,1.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.974928,-0.222521,0.866025,-0.500000
2016-04-24,FOODS_3_587_CA_1_validation,FOODS_3_587,FOODS_3,CA,26,2,4,1.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.974928,-0.222521,0.866025,-0.500000
2016-04-24,FOODS_3_714_CA_1_validation,FOODS_3_714,FOODS_3,CA,27,2,4,1.0,missing,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.974928,-0.222521,0.866025,-0.500000


In [3]:
def feature_extraction(merge_df_scaled):
    # Ignore all warnings within this function
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
    
        #lagged features
        for i in range(1, 8):
            merge_df_scaled[f'sales_lag_{i}'] = merge_df_scaled['sales'].shift(i)
    
        #lagged features per years
        for i in range(1, 4):
            merge_df_scaled[f'sales_lag_{i}years'] = merge_df_scaled['sales'].shift(i * 365)
    
            #rolling sum
        merge_df_scaled['rolling_sum_7'] = merge_df_scaled['sales'].rolling(window=7).sum()
        merge_df_scaled['rolling_sum_30'] = merge_df_scaled['sales'].rolling(window=30).sum()
        merge_df_scaled['rolling_sum_60'] = merge_df_scaled['sales'].rolling(window=60).sum()
        merge_df_scaled['rolling_sum_90'] = merge_df_scaled['sales'].rolling(window=90).sum()
        merge_df_scaled['rolling_sum_120'] = merge_df_scaled['sales'].rolling(window=120).sum()
    
        #rolling average
        merge_df_scaled['rolling_mean_7'] = merge_df_scaled['sales'].rolling(window=7).mean()
        merge_df_scaled['rolling_mean_30'] = merge_df_scaled['sales'].rolling(window=30).mean()
        merge_df_scaled['rolling_mean_60'] = merge_df_scaled['sales'].rolling(window=60).mean()
        merge_df_scaled['rolling_mean_90'] = merge_df_scaled['sales'].rolling(window=90).mean()
        merge_df_scaled['rolling_mean_120'] = merge_df_scaled['sales'].rolling(window=120).mean()
    
        #rolling stdv
        merge_df_scaled['rolling_stdv_7'] = merge_df_scaled['sales'].rolling(window=7).std()
        merge_df_scaled['rolling_stdv_30'] = merge_df_scaled['sales'].rolling(window=30).std()
        merge_df_scaled['rolling_stdv_60'] = merge_df_scaled['sales'].rolling(window=60).std()
        merge_df_scaled['rolling_stdv_90'] = merge_df_scaled['sales'].rolling(window=90).std()
        merge_df_scaled['rolling_stdv_120'] = merge_df_scaled['sales'].rolling(window=120).std()
    
        merge_df_scaled.fillna(0,inplace=True)
        

    return merge_df_scaled


In [4]:
def feature_extraction_transfer_test(train_df,test_df):

    # Ignore all warnings within this function
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
           
    
        #lagged features per years
        for i in range(1, 4):
            test_df[f'sales_lag_{i}years'] = train_df[f'sales_lag_{i}years'].iloc[-1]
    
            #rolling sum
        test_df['rolling_sum_7'] = train_df['rolling_sum_7'].iloc[-1]
        test_df['rolling_sum_30'] = train_df['rolling_sum_30'].iloc[-1]
        test_df['rolling_sum_60'] = train_df['rolling_sum_60'].iloc[-1]
        test_df['rolling_sum_90'] = train_df['rolling_sum_90'].iloc[-1]
        test_df['rolling_sum_120'] = train_df['rolling_sum_120'].iloc[-1]
        
        # Rolling average
        test_df['rolling_mean_7'] = train_df['rolling_mean_7'].iloc[-1]
        test_df['rolling_mean_30'] = train_df['rolling_mean_30'].iloc[-1]
        test_df['rolling_mean_60'] = train_df['rolling_mean_60'].iloc[-1]
        test_df['rolling_mean_90'] = train_df['rolling_mean_90'].iloc[-1]
        test_df['rolling_mean_120'] = train_df['rolling_mean_120'].iloc[-1]
        
        # Rolling standard deviation
        test_df['rolling_stdv_7'] = train_df['rolling_stdv_7'].iloc[-1]
        test_df['rolling_stdv_30'] = train_df['rolling_stdv_30'].iloc[-1]
        test_df['rolling_stdv_60'] = train_df['rolling_stdv_60'].iloc[-1]
        test_df['rolling_stdv_90'] = train_df['rolling_stdv_90'].iloc[-1]
        test_df['rolling_stdv_120'] = train_df['rolling_stdv_120'].iloc[-1]
    
        # Identify the last available date in the training data
        last_date_train = train_df.index[-1]
    
        # Fill in lagged features for the first few rows where future knowledge is available
        #for i in range(1, 8):
        #    # Identify the lagged date for the current lag
        #    lagged_date = last_date_train - pd.Timedelta(days=i)
            
            # Fill in the lagged sales values for corresponding lagged days from the training data
        #    test_df[f'sales_lag_{i}'] = test_df.index.map(lambda x: train_df.loc[x - pd.Timedelta(days=i), 'sales'] if x <= last_date_train else train_df[f'sales_lag_{i}'].iloc[-1])


        # Fill in lagged features for the first few rows where future knowledge is available
        for i in range(1, 8):
            test_df[f'sales_lag_{i}'] = np.nan  # Initialize with NaN
            
            # Iterate over each row in the test DataFrame
            for idx, row in test_df.iterrows():
                lagged_date = idx - pd.Timedelta(days=i)  # Calculate the lagged date
                
                # Check if the lagged date is within the training data range
                if lagged_date in train_df.index:
                    test_df.at[idx, f'sales_lag_{i}'] = train_df.loc[lagged_date, 'sales']
                else:
                    test_df.at[idx, f'sales_lag_{i}'] = train_df['sales'].iloc[-1]
    
    return test_df

In [5]:
random_df = merge_df_scaled

In [6]:
merge_df_scaled.drop(columns=["item_id","dept_id","state_id","event_name_2"],inplace=True)

In [7]:
merge_df_scaled.fillna(0,inplace=True)

In [None]:
merge_df_scaled["id"].unique()

# 1. Defining Model Functions

In [13]:
def perform_prophet(product_data):

    product_data.reset_index(inplace=True,names="date")
    
    prophet_product_df = product_data[["id","date","sales"]]
    prophet_product_df.columns = ["id","ds","y"]
    prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
    
    data_train = prophet_product_df.iloc[:-28]
    data_test = prophet_product_df.iloc[-28:]
    X_train = data_train["ds"]
    y_train = data_train["y"]
    X_test = data_test["ds"]
    y_test = data_test["y"]
    
    fbp = Prophet()

    model = fbp.fit(data_train)
    
    predict_placeholder = fbp.make_future_dataframe(28,freq="D")
    
    # Predict on the test data
    y_pred = fbp.predict(predict_placeholder[-28:])
    

    # Calculate and return the error metric for the current fold
    mae = mean_absolute_error(y_test, y_pred["yhat"])
    
    return model, mae

In [14]:
def perform_auto_arima(product_data):
    data_train = product_data.iloc[:-28]
    data_test = product_data.iloc[-28:]
    y_train = data_train["sales"]
    y_test = data_test["sales"]

    # Fit ARIMA model on the training data using auto_arima to find the best (p, d, q)
    model = auto_arima(y_train, start_p=0, start_q=0, max_p=5, max_q=5, d=1,
                       seasonal=True, trace=False, error_action='ignore', 
                       suppress_warnings=True, stepwise=True)
    
    # Predict on the test data
    predictions = model.predict(n_periods=len(y_test))

    # Calculate and return the error metric for the current fold
    mae = mean_absolute_error(y_test, predictions)
    
    return model, mae

In [15]:
def objective_optuna(trial, y_train, y_test):
    
    trend = trial.suggest_categorical('trend', ['add'])
    seasonal = trial.suggest_categorical('seasonal', [None, 'add'])
    seasonal_periods = trial.suggest_categorical('seasonal_periods', [None, 4, 7, 12])
    
    product_results = []

    # Fit Holt-Winters model on the training data
    model = ExponentialSmoothing(y_train, trend=trend, seasonal=seasonal, seasonal_periods=seasonal_periods,freq='D')
    fitted_model = model.fit(optimized=True)

    # Predict on the test data
    predictions = fitted_model.forecast(steps=len(y_test))

    # Calculate and store the error metric
    mae = mean_absolute_error(y_test, predictions)
    product_results.append(mae)

    # Average MAE for this product
    average_mae = np.mean(product_results)
    return average_mae

In [16]:
def perform_exp_smoothing(product_data):
    data_train = product_data.iloc[:-28]
    data_test = product_data.iloc[-28:]
    y_train = data_train["sales"]
    y_test = data_test["sales"]
    # Create a study object
    study = optuna.create_study(direction='minimize')
    
    print(f"Optimizing hyperparameters for product: {id}")
    
    
    # Run the optimization process for the current product
    study.optimize(lambda trial: objective_optuna(trial, y_train, y_test), n_trials=10, n_jobs=-1)

    # Get the best hyperparameters and the corresponding best MAE
    best_params = study.best_params
    best_mae = study.best_value

    # Create the best model with the obtained hyperparameters
    best_model = ExponentialSmoothing(y_train, **best_params).fit()
    
    return best_model, best_mae

In [133]:
import lightgbm as lgb

def perform_lightgbm(product_data):
    
    data_train_val = product_data.iloc[:-28]
    data_train_val = feature_extraction(data_train_val)
    data_test = product_data.iloc[-28:]
    data_test = feature_extraction_transfer_test(data_train_val,data_test)

    data_train = data_train_val.iloc[:-112]
    data_val = data_train_val.iloc[-112:]
    
    X_train = data_train.drop(columns="sales")
    y_train = data_train["sales"]
    X_val = data_val.drop(columns="sales")
    y_val = data_val["sales"]
    X_test = data_test.drop(columns="sales")
    y_test = data_test["sales"]

    
    
    
    # Define LightGBM parameters
    params = {
        "n_estimators": 1000,
        'objective': 'regression',
        'metric': 'rmse',
        "boosting_type": "gbdt",
        "max_depth": -1,
        'learning_rate': 0.01,
        'feature_fraction': 0.4,
        "lambda_l1": 1,
        "lambda_l2": 1,
        "seed": 46,
    }

    model = lgb.LGBMRegressor(**params)
    
    # Create dataset for LightGBM
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_val, y_val, reference=lgb_train)
    
    # Train the model
    num_round = 1000

    bst = lgb.train(params, lgb_train, num_round, valid_sets=lgb_eval, callbacks=[lgb.early_stopping(stopping_rounds=50)])
     
    # Make predictions for the next 28 days
    predictions = bst.predict(X_test, num_iteration=bst.best_iteration)

    # Calculate and return the error metric for the current fold
    mae = mean_absolute_error(y_test, predictions)
    
    return bst, mae

# Example usage:
# sales_forecast = forecast_sales(product_data)
# print(sales_forecast)


# 2.Running all models in a loop to find for each product with lowest score

In [19]:
models_list = ["LightGBM"]

In [None]:
from pmdarima import auto_arima

# Dictionary to store MAE results for each unique time-series identified by id
product_results = {}
average_mae = []

# Iterate over each unique product series identified by id
for id in merge_df_scaled['id'].unique()[:10]:
    print(f"Analyzing product: {id}")
    product_data = merge_df_scaled[merge_df_scaled['id'] == id].drop(columns="id")
    product_data_with_id = merge_df_scaled[merge_df_scaled['id'] == id]

    # Results list for the current product time-series
    results = {}
    best_score = 999.99
    best_model_name = ""



    #Looping all models
    for model_name in models_list:

        if model_name == "ARIMA":
            #TODO: Add 5-fold split here for another loop (or inside the model function?) and then take the average score per model as their mae score
            
            # Fit ARIMA model on the training data using auto_arima to find the best (p, d, q)
            model, mae = perform_auto_arima(product_data)
            results[model_name] = {"mae": mae, "model": model}
            if mae < best_score:
                best_score = mae
                best_model = model
                best_model_name = model_name

        elif model_name == "ExponentialSmoothing":

            # To be built
            model, mae = perform_exp_smoothing(product_data)
            results[model_name] = {"mae": mae, "model": model}
            if mae < best_score:
                best_score = mae
                best_model = model
                best_model_name = model_name

        elif model_name == "Prophet":

            model, mae = perform_prophet(product_data_with_id)
            results[model_name] = {"mae": mae, "model": model}
            if mae < best_score:
                best_score = mae
                best_model = model
                best_model_name = model_name


        elif model_name == "LightGBM":

            model, mae = perform_lightgbm(product_data)
            results[model_name] = {"mae": mae, "model": model}
            if mae < best_score:
                best_score = mae
                best_model = model
                best_model_name = model_name


    #Printing results for this product
    print(results)
    print(f"Model results for {id}")
    print(f"Best model: {best_model_name}")
    print(f"Best score: {best_score}")

    average_mae.append(best_score)

    # Store the average MAE for the current product time-series
    product_results[id] = {"best_score": best_score, "best_model": best_model_name, "model": best_model}

    #Store the best model in a pkl file
    filename = f'../models/{id}_model.pkl'
    with open(filename, 'wb') as f:
        pickle.dump(best_model, f)

# Create a DataFrame to store the results
results_df_arima = pd.DataFrame(product_results.items(), columns=['id', 'MAE'])

# Set the 'id' column as the index
results_df_arima.set_index('id', inplace=True)

average_mae = np.mean(average_mae)

print(f"Total average MAE: {average_mae}")


Analyzing product: FOODS_2_197_CA_1_validation


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:26:41 - cmdstanpy - INFO - Chain [1] start processing
15:26:41 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:26:41,975] A new study created in memory with name: no-name-afbaf00f-6b93-4474-af2e-243c66442434


Optimizing hyperparameters for product: FOODS_2_197_CA_1_validation


[I 2024-05-10 15:26:43,606] Trial 1 finished with value: 9.388827437788668 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 1 with value: 9.388827437788668.
[I 2024-05-10 15:26:43,681] Trial 4 finished with value: 9.783006525295232 and parameters: {'trend': 'add', 'seasonal': 'add', 'seasonal_periods': 4}. Best is trial 1 with value: 9.388827437788668.
[I 2024-05-10 15:26:43,702] Trial 2 finished with value: 9.388827437788668 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 1 with value: 9.388827437788668.
[I 2024-05-10 15:26:43,751] Trial 3 finished with value: 9.388827437788668 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 1 with value: 9.388827437788668.
[I 2024-05-10 15:26:43,810] Trial 9 finished with value: 9.388827437788668 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 1 with value: 9.388827437788668.
[I 2024-05-

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004308 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4460
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 21.664975
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[998]	valid_0's rmse: 4.79687
      with_intercept=False)}, 'Prophet': {'mae': 11.367010469983766, 'model': <prophet.forecaster.Prophet object at 0x7fa432ce5420>}, 'ExponentialSmoothing': {'mae': 9.388827437788668, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa432c71030>}, 'LightGBM': {'mae': 17.73258393721617, 'model': <lightgbm.basic.Booster object at 0x7fa432f2d060>}}
Model results for FOODS_2_197_CA_1_validation
Best model: ARIMA
Best score: 8.726633436944372
Analyzing product: FOODS_3_080_CA_1_vali

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:27:01 - cmdstanpy - INFO - Chain [1] start processing
15:27:01 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:27:01,720] A new study created in memory with name: no-name-6d07663b-88b6-40c6-8da0-d778b67f7a7e


Optimizing hyperparameters for product: FOODS_3_080_CA_1_validation


[I 2024-05-10 15:27:03,685] Trial 3 finished with value: 5.603251743760496 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 3 with value: 5.603251743760496.
[I 2024-05-10 15:27:03,723] Trial 7 finished with value: 5.603251743760496 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 12}. Best is trial 3 with value: 5.603251743760496.
[I 2024-05-10 15:27:03,770] Trial 1 finished with value: 5.603251743760496 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 3 with value: 5.603251743760496.
[I 2024-05-10 15:27:03,778] Trial 9 finished with value: 5.603251743760496 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 3 with value: 5.603251743760496.
[I 2024-05-10 15:27:03,807] Trial 5 finished with value: 5.603251743760496 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 3 with value: 5.603251743760496.
[I 2024-

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003783 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4072
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 20.687535
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 5.37814
      with_intercept=False)}, 'Prophet': {'mae': 4.776500194398421, 'model': <prophet.forecaster.Prophet object at 0x7fa432f060b0>}, 'ExponentialSmoothing': {'mae': 4.851012052269259, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa4323bbaf0>}, 'LightGBM': {'mae': 8.694071175621318, 'model': <lightgbm.basic.Booster object at 0x7fa432c72770>}}
Model results for FOODS_3_080_CA_1_validation
Best model: Prophet
Best

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:27:56 - cmdstanpy - INFO - Chain [1] start processing
15:27:56 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:27:56,504] A new study created in memory with name: no-name-2076b8fd-c138-43b6-9476-dd959ed522ad


Optimizing hyperparameters for product: FOODS_3_090_CA_1_validation


[I 2024-05-10 15:27:56,781] Trial 0 finished with value: 37.241681706408414 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 0 with value: 37.241681706408414.
[I 2024-05-10 15:27:57,128] Trial 1 finished with value: 37.241681706408414 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 0 with value: 37.241681706408414.
[I 2024-05-10 15:27:57,217] Trial 3 finished with value: 37.241681706408414 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 0 with value: 37.241681706408414.
[I 2024-05-10 15:27:57,281] Trial 4 finished with value: 37.241681706408414 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 12}. Best is trial 0 with value: 37.241681706408414.
[I 2024-05-10 15:27:57,522] Trial 9 finished with value: 37.241681706408414 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 0 with value: 37.241681706408414.
[

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000933 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5426
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 68.591089
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[347]	valid_0's rmse: 13.4825
      with_intercept=False)}, 'Prophet': {'mae': 16.664135902250575, 'model': <prophet.forecaster.Prophet object at 0x7fa438716cb0>}, 'ExponentialSmoothing': {'mae': 37.241681706408414, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa432341330>}, 'LightGBM': {'mae': 19.488855121143693, 'model': <lightgbm.basic.Booster object at 0x7fa432c71240>}}
Model results for FOODS_3_090_CA_1_validation
Best model: Prophet
Best score: 16.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:28:44 - cmdstanpy - INFO - Chain [1] start processing
15:28:44 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:28:44,735] A new study created in memory with name: no-name-1ae6cf0a-1981-4423-a64a-7f8429643b4b


Optimizing hyperparameters for product: FOODS_3_120_CA_1_validation


[I 2024-05-10 15:28:45,368] Trial 1 finished with value: 27.01694491780205 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 1 with value: 27.01694491780205.
[I 2024-05-10 15:28:45,421] Trial 3 finished with value: 27.01694491780205 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 1 with value: 27.01694491780205.
[I 2024-05-10 15:28:45,494] Trial 5 finished with value: 27.01694491780205 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 1 with value: 27.01694491780205.
[I 2024-05-10 15:28:45,557] Trial 4 finished with value: 27.01694491780205 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 1 with value: 27.01694491780205.
[I 2024-05-10 15:28:45,651] Trial 2 finished with value: 27.01694491780205 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 12}. Best is trial 1 with value: 27.01694491780205.
[I 2024-05-

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000873 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4981
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 32.275240
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 18.9941
      with_intercept=False)}, 'Prophet': {'mae': 21.07083458938636, 'model': <prophet.forecaster.Prophet object at 0x7fa433074550>}, 'ExponentialSmoothing': {'mae': 25.157870794592952, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa4330a3010>}, 'LightGBM': {'mae': 32.09723713760365, 'model': <lightgbm.basic.Booster object at 0x7fa433106320>}}
Model results for FOODS_3_120_CA_1_validation
Best model: Prophet
Best score: 21.07083458938636
Analyzing product: FOODS_3_252_CA_1_v

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:29:45 - cmdstanpy - INFO - Chain [1] start processing
15:29:45 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:29:45,617] A new study created in memory with name: no-name-d2aee288-e043-4e48-b3b9-3c5781c563df


Optimizing hyperparameters for product: FOODS_3_252_CA_1_validation


[I 2024-05-10 15:29:46,657] Trial 2 finished with value: 10.92925276421992 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 2 with value: 10.92925276421992.
[I 2024-05-10 15:29:46,996] Trial 5 finished with value: 10.92925276421992 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 2 with value: 10.92925276421992.
[I 2024-05-10 15:29:47,021] Trial 4 finished with value: 10.92925276421992 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 2 with value: 10.92925276421992.
[I 2024-05-10 15:29:47,116] Trial 6 finished with value: 10.92925276421992 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 2 with value: 10.92925276421992.
[I 2024-05-10 15:29:47,121] Trial 8 finished with value: 10.92925276421992 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 12}. Best is trial 2 with value: 10.92925276421992.
[I 2024-05-

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000565 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4793
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 39.639594
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 6.55657
      with_intercept=False)}, 'Prophet': {'mae': 6.5637324544171625, 'model': <prophet.forecaster.Prophet object at 0x7fa438643fd0>}, 'ExponentialSmoothing': {'mae': 6.591632596564805, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa432c70f10>}, 'LightGBM': {'mae': 17.689703570355366, 'model': <lightgbm.basic.Booster object at 0x7fa4323b8970>}}
Model results for FOODS_3_252_CA_1_validation
Best model: Prophet
Best score: 6.5637324544171625
Analyzing product: FOODS_3_555_CA_1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:30:52 - cmdstanpy - INFO - Chain [1] start processing
15:30:52 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:30:52,525] A new study created in memory with name: no-name-1d26988b-2f56-41e6-85f9-8649ed6a92e5


Optimizing hyperparameters for product: FOODS_3_555_CA_1_validation


[I 2024-05-10 15:30:54,697] Trial 0 finished with value: 5.214282383016028 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 12}. Best is trial 0 with value: 5.214282383016028.
[I 2024-05-10 15:30:54,840] Trial 4 finished with value: 5.214282383016028 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 0 with value: 5.214282383016028.
[I 2024-05-10 15:30:54,877] Trial 5 finished with value: 5.214282383016028 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 0 with value: 5.214282383016028.
[I 2024-05-10 15:30:54,891] Trial 6 finished with value: 5.214282383016028 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 0 with value: 5.214282383016028.
[I 2024-05-10 15:30:54,933] Trial 9 finished with value: 5.214282383016028 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 0 with value: 5.214282383016028.
[I 20

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001519 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4065
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 20.000564
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 3.96733
      with_intercept=False)}, 'Prophet': {'mae': 4.261908361923384, 'model': <prophet.forecaster.Prophet object at 0x7fa4330a0e80>}, 'ExponentialSmoothing': {'mae': 3.234239988928919, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa4330a2aa0>}, 'LightGBM': {'mae': 11.110303566214885, 'model': <lightgbm.basic.Booster object at 0x7fa432c72200>}}
Model results for FOODS_3_555_CA_1_validation
Best model: Exponential

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:31:40 - cmdstanpy - INFO - Chain [1] start processing
15:31:41 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:31:41,058] A new study created in memory with name: no-name-a6aafdf3-c790-4bb7-af64-1fcc778370ed


Optimizing hyperparameters for product: FOODS_3_586_CA_1_validation


[I 2024-05-10 15:31:42,597] Trial 0 finished with value: 10.693124076299275 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 0 with value: 10.693124076299275.
[I 2024-05-10 15:31:42,834] Trial 2 finished with value: 10.693124076299275 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 4}. Best is trial 0 with value: 10.693124076299275.
[I 2024-05-10 15:31:43,190] Trial 6 finished with value: 10.693124076299275 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 0 with value: 10.693124076299275.
[I 2024-05-10 15:31:43,672] Trial 7 finished with value: 10.693124076299275 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 0 with value: 10.693124076299275.
[I 2024-05-10 15:31:43,886] Trial 5 finished with value: 10.693124076299275 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 7}. Best is trial 0 with value: 10.693124076299275.
[I

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012548 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4653
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 46.627750
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[676]	valid_0's rmse: 8.59475
      with_intercept=False)}, 'Prophet': {'mae': 6.241775733242483, 'model': <prophet.forecaster.Prophet object at 0x7fa438173430>}, 'ExponentialSmoothing': {'mae': 6.407874992421177, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa432342bf0>}, 'LightGBM': {'mae': 9.255567869387699, 'model': <lightgbm.basic.Booster object at 0x7fa4323b9540>}}
Model results for FOODS_3_586_CA_1_validation
Best model: Prophet
Best score: 6.2417

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_product_df['ds'] = pd.to_datetime(prophet_product_df['ds'])
15:32:52 - cmdstanpy - INFO - Chain [1] start processing
15:32:52 - cmdstanpy - INFO - Chain [1] done processing
[I 2024-05-10 15:32:52,681] A new study created in memory with name: no-name-3cd29add-05a1-46c3-981e-87956a114c18


Optimizing hyperparameters for product: FOODS_3_587_CA_1_validation


[I 2024-05-10 15:32:54,681] Trial 7 finished with value: 17.481420998759102 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 7 with value: 17.481420998759102.
[I 2024-05-10 15:32:54,726] Trial 3 finished with value: 17.481420998759102 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 7 with value: 17.481420998759102.
[I 2024-05-10 15:32:54,838] Trial 5 finished with value: 17.481420998759102 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 7 with value: 17.481420998759102.
[I 2024-05-10 15:32:54,870] Trial 9 finished with value: 17.481420998759102 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 7 with value: 17.481420998759102.
[I 2024-05-10 15:32:54,889] Trial 8 finished with value: 17.481420998759102 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': None}. Best is trial 7 with value: 17.4814209

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002223 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4710
[LightGBM] [Info] Number of data points in the train set: 1773, number of used features: 41
[LightGBM] [Info] Start training from score 27.139876
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1000]	valid_0's rmse: 8.68011
      with_intercept=False)}, 'Prophet': {'mae': 10.36622612532744, 'model': <prophet.forecaster.Prophet object at 0x7fa432f38940>}, 'ExponentialSmoothing': {'mae': 17.311694281927434, 'model': <statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper object at 0x7fa432425b40>}, 'LightGBM': {'mae': 8.650968999189798, 'model': <lightgbm.basic.Booster object at 0x7fa43229e4d0>}}
Model results for FOODS_3_587_CA_1_validation
Best model: LightGBM
Be