In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

import time
import math
import datetime
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import plot_cross_validation_metric


In [12]:
merge_df_scaled = pd.read_csv("../raw_data/cleaned_merge_df_top10.csv")

In [13]:
prophet_df = merge_df_scaled[["id","date","sales"]]

In [15]:
prophet_df.columns = ["id","ds","y"]

In [16]:
prophet_df

Unnamed: 0,id,ds,y
0,FOODS_2_197_CA_1_validation,2011-01-29,38
1,FOODS_3_080_CA_1_validation,2011-01-29,33
2,FOODS_3_090_CA_1_validation,2011-01-29,107
3,FOODS_3_120_CA_1_validation,2011-01-29,0
4,FOODS_3_252_CA_1_validation,2011-01-29,19
...,...,...,...
19125,FOODS_3_555_CA_1_validation,2016-04-24,24
19126,FOODS_3_586_CA_1_validation,2016-04-24,54
19127,FOODS_3_587_CA_1_validation,2016-04-24,26
19128,FOODS_3_714_CA_1_validation,2016-04-24,27


In [17]:
prophet_df['ds'] = pd.to_datetime(prophet_df['ds'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prophet_df['ds'] = pd.to_datetime(prophet_df['ds'])


In [18]:
from sklearn.metrics import mean_absolute_error
import warnings


# Auto_ARIMA with MAE

In [29]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
import os
import pickle

# Dictionary to store MAE results for each unique time-series identified by id
product_results = {}
models_list = {}

# Define a function to perform Auto ARIMA
def perform_prophet(product_data):
    data_train = product_data.iloc[:-28]
    data_test = product_data.iloc[-28:]
    X_train = data_train["ds"]
    y_train = data_train["y"]
    X_test = data_test["ds"]
    y_test = data_test["y"]
    
    fbp = Prophet()

    model = fbp.fit(data_train)
    
    predict_placeholder = fbp.make_future_dataframe(28,freq="D")
    
    # Predict on the test data
    y_pred = fbp.predict(predict_placeholder[-28:])
    

    # Calculate and return the error metric for the current fold
    mae = mean_absolute_error(y_test, y_pred["yhat"])
    
    return model, mae

# Iterate over each unique product series identified by id
for id in merge_df_scaled['id'].unique():
    print(f"Analyzing product: {id}")
    product_data = prophet_df[prophet_df['id'] == id]

    model, mae = perform_prophet(product_data)

    # Store the model in the dictionary with id as key
    models_list[id] = model

    # Store the average MAE for the current product time-series
    product_results[id] = mae
    print(f'Mean Absolute Error for {id}: {mae}')
    filename = f'../models/{id}_model.pkl'
    with open(filename, 'wb') as f:
        pickle.dump(model, f)

# Optionally, you could print or analyze the average MAE across all products
average_mae_across_products = np.mean(list(product_results.values()))
print(f'Average Mean Absolute Error across all products: {average_mae_across_products}')


18:16:39 - cmdstanpy - INFO - Chain [1] start processing


Analyzing product: FOODS_2_197_CA_1_validation


18:16:39 - cmdstanpy - INFO - Chain [1] done processing
18:16:39 - cmdstanpy - INFO - Chain [1] start processing
18:16:39 - cmdstanpy - INFO - Chain [1] done processing


Mean Absolute Error for FOODS_2_197_CA_1_validation: 11.367010469983766
Analyzing product: FOODS_3_080_CA_1_validation


18:16:39 - cmdstanpy - INFO - Chain [1] start processing


Mean Absolute Error for FOODS_3_080_CA_1_validation: 4.776500194398421
Analyzing product: FOODS_3_090_CA_1_validation


18:16:39 - cmdstanpy - INFO - Chain [1] done processing
18:16:39 - cmdstanpy - INFO - Chain [1] start processing


Mean Absolute Error for FOODS_3_090_CA_1_validation: 16.664135902250575
Analyzing product: FOODS_3_120_CA_1_validation


18:16:40 - cmdstanpy - INFO - Chain [1] done processing
18:16:40 - cmdstanpy - INFO - Chain [1] start processing
18:16:40 - cmdstanpy - INFO - Chain [1] done processing


Mean Absolute Error for FOODS_3_120_CA_1_validation: 21.07083458938636
Analyzing product: FOODS_3_252_CA_1_validation


18:16:40 - cmdstanpy - INFO - Chain [1] start processing


Mean Absolute Error for FOODS_3_252_CA_1_validation: 6.5637324544171625
Analyzing product: FOODS_3_555_CA_1_validation


18:16:40 - cmdstanpy - INFO - Chain [1] done processing


Mean Absolute Error for FOODS_3_555_CA_1_validation: 4.261908361923384
Analyzing product: FOODS_3_586_CA_1_validation


18:16:40 - cmdstanpy - INFO - Chain [1] start processing
18:16:41 - cmdstanpy - INFO - Chain [1] done processing
18:16:41 - cmdstanpy - INFO - Chain [1] start processing


Mean Absolute Error for FOODS_3_586_CA_1_validation: 6.241775733242483
Analyzing product: FOODS_3_587_CA_1_validation


18:16:41 - cmdstanpy - INFO - Chain [1] done processing
18:16:41 - cmdstanpy - INFO - Chain [1] start processing
18:16:41 - cmdstanpy - INFO - Chain [1] done processing


Mean Absolute Error for FOODS_3_587_CA_1_validation: 10.36622612532744
Analyzing product: FOODS_3_714_CA_1_validation


18:16:41 - cmdstanpy - INFO - Chain [1] start processing


Mean Absolute Error for FOODS_3_714_CA_1_validation: 5.523726278892037
Analyzing product: FOODS_3_808_CA_1_validation


18:16:41 - cmdstanpy - INFO - Chain [1] done processing


Mean Absolute Error for FOODS_3_808_CA_1_validation: 10.665453491107941
Average Mean Absolute Error across all products: 9.750130360092957


In [30]:
average_mae_across_products

9.750130360092957