In [1]:
from prophet import Prophet
from prophet.serialize import model_to_json
import pandas as pd
import json
from tqdm import tqdm
from datetime import date
from llaves_more_then_q95 import llaves_more_then_q95
import numpy as np
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')
import logging

# Set the logging level for cmdstanpy to WARNING
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)


In [2]:
master_sales_by_week = pd.read_parquet('./datasets/master_sales_by_week_curated.parquet')

In [3]:
# add year column
master_sales_by_week['year'] = master_sales_by_week['date'].apply(lambda x: x.strftime('%Y'))

In [4]:
# get product_ids without any sale
total_sales_by_product_id = master_sales_by_week.groupby(['product_id'], as_index=False).agg({'quantity':'sum', })
products_without_sales = total_sales_by_product_id[total_sales_by_product_id['quantity'] == 0]
product_ids_without_sales = products_without_sales['product_id'].unique()
print(f'Total products_ids without sales: {len(product_ids_without_sales)}')
# remove products_id without any sale from the dataset
master_sales_by_week = master_sales_by_week[~(master_sales_by_week['product_id'].isin(product_ids_without_sales))]

Total products_ids without sales: 266


In [5]:
master_sales_by_week.head(1)

Unnamed: 0,combination,date,cod_fami,quantity,store_id,price_taxes_excluded,product_id,description_fami,description,event,discount,year
0,201AA3,2018-01-08,201,1,1010100,7554.62,229254.1001.EST,ACCESORIOS BEBE,PEZONERA GBC5103 EN SILICONA,NO EVENT,0,2018


In [6]:
for year in master_sales_by_week['year'].unique():
    n_unique_keys = master_sales_by_week[master_sales_by_week['date'].between(f'{year}-01-01',f'{year}-12-31')]['combination'].nunique()
    print(f"Año:{year}\n\t# Llaves Unicas: {n_unique_keys}")

Año:2018
	# Llaves Unicas: 6552
Año:2019
	# Llaves Unicas: 6517
Año:2020
	# Llaves Unicas: 5822
Año:2021
	# Llaves Unicas: 5552
Año:2022
	# Llaves Unicas: 5665
Año:2023
	# Llaves Unicas: 5539
Año:2024
	# Llaves Unicas: 4384


In [7]:
unique_keys_in_2022 = master_sales_by_week[master_sales_by_week['date'].between('2022-01-01','2022-12-31')]['combination'].unique()
unique_keys_in_2023 = master_sales_by_week[master_sales_by_week['date'].between('2023-01-01','2023-12-31')]['combination'].unique()

keys_to_forecast = list(set(unique_keys_in_2022) & set(unique_keys_in_2023)) # we going to forecast all the keys in 2022, and 2023 for testing


#### Create dataset for train (2018-2022) and test(2023)

In [8]:
master_sales_by_week_train = master_sales_by_week[master_sales_by_week['date'].between('2018-01-01','2022-12-31')]
master_sales_by_week_test = master_sales_by_week[master_sales_by_week['date'].between('2023-01-01','2023-12-31')]

In [9]:
weeks_of_information_by_combination = []
for i in tqdm(keys_to_forecast):
    df_temp = master_sales_by_week_train[master_sales_by_week_train['combination'] == i]
    dict_temp = {
        'combination' : i,
        'n_weeks' : df_temp['date'].nunique()
    }
    weeks_of_information_by_combination.append(dict_temp)
weeks_of_information_by_combination_df = pd.DataFrame(weeks_of_information_by_combination)

  0%|          | 0/4871 [00:00<?, ?it/s]

100%|██████████| 4871/4871 [01:11<00:00, 68.23it/s]


In [10]:
keys_with_less_than_12_weeks_of_info = weeks_of_information_by_combination_df[weeks_of_information_by_combination_df['n_weeks'] <= 12]['combination'].unique()

# keep just the keys present in 2022 and 2023
master_sales_by_week_train = master_sales_by_week_train[master_sales_by_week_train['combination'].isin(keys_to_forecast)]
master_sales_by_week_test = master_sales_by_week_test[master_sales_by_week_test['combination'].isin(keys_to_forecast)]
# drop all the combination withot enoff information in train and test 
master_sales_by_week_train = master_sales_by_week_train[~(master_sales_by_week_train['combination'].isin(keys_with_less_than_12_weeks_of_info))]
master_sales_by_week_test = master_sales_by_week_test[~(master_sales_by_week_test['combination'].isin(keys_with_less_than_12_weeks_of_info))]
print(f'#Keys with less than 12 weeks of info: {len(keys_with_less_than_12_weeks_of_info)}')

#Keys with less than 12 weeks of info: 477


In [14]:
master_sales_by_week_train.head()

Unnamed: 0,combination,date,cod_fami,quantity,store_id,price_taxes_excluded,product_id,description_fami,description,event,discount,year
0,201AA3,2018-01-08,201,1,1010100,7554.62,229254.1001.EST,ACCESORIOS BEBE,PEZONERA GBC5103 EN SILICONA,NO EVENT,0,2018
1,201AA3,2018-01-15,201,1,1010100,12596.64,206906.1001.EST,ACCESORIOS BEBE,RECOLECTOR IMP2476 INFANT X2,NO EVENT,0,2018
2,201AA3,2018-01-22,201,4,1010100,10075.63,233106.1001.EST,ACCESORIOS BEBE,RECOLECTOR IMP2476 INFANT X2,NO EVENT,0,2018
3,201AA3,2018-01-29,201,1,1010100,10075.63,206895.1001.EST,ACCESORIOS BEBE,ASPIRADOR NASAL BSL015 INFANT,NO EVENT,0,2018
4,201AA3,2018-02-05,201,6,1010100,10075.63,233106.1001.EST,ACCESORIOS BEBE,VASO GBC5047 ENTRENADOR ANTIGT,TIJERETAZO I,30,2018


In [98]:
final_prediction_2023 = pd.DataFrame()
dataframe_ytrue_ytest = pd.DataFrame()
rmse_por_llave = pd.DataFrame()
for combination in tqdm(master_sales_by_week_train['combination'].unique()):
    sales_combination = master_sales_by_week_train[master_sales_by_week_train['combination'] == combination]
    sales_combination = sales_combination[['date','combination','quantity','cod_fami']]

    first_date = sales_combination['date'].min() # Get the first date in the dataframe
    last_date = sales_combination['date'].max() # Get the last date in the dataframe

    df_dates = pd.DataFrame({'date': pd.date_range(start=f'{first_date.year}-01-08', end='2022-12-31', freq='W-MON')})

    sales_combination = df_dates.merge(sales_combination, on='date', how='left') # Merge the dataframes
    sales_combination['quantity'] = sales_combination['quantity'].fillna(0) # Replace NaN values with 0
    sales_combination = sales_combination[~(sales_combination['combination'].isnull())]

    # Create the prophet dataframe
    prophet_dataframe = pd.DataFrame()
    prophet_dataframe['ds'] = sales_combination['date']
    prophet_dataframe['y'] = sales_combination['quantity']
    prophet_dataframe['y'] = prophet_dataframe['y'].astype(int)

    n_unique_years = prophet_dataframe['ds'].dt.year.nunique() # Count the number of different years in the dataframe

    # Train Model
    model = Prophet(weekly_seasonality = 13)
    model.fit(prophet_dataframe)

    # -----------
    # Test Model
    # -----------
    # Calculate how many weeks are missing from the last date in the dataframe to the last week of 2023
    weeks = (date(2023, 12, 31) - prophet_dataframe['ds'].max().date()).days // 7

    # Create a dataframe with the dates from the last date in the dataframe to the last week of 2023
    sales_by_week_test = master_sales_by_week_test.copy()
    df_combination_test = sales_by_week_test[sales_by_week_test['combination'] == combination][['date','quantity']] # get the y_true values of the combination selected

    # future Dataframe
    future_2023 = model.make_future_dataframe(periods=weeks, freq='W-MON')
    future_2023 = future_2023[future_2023['ds'] >= '2023-01-01']
    future_2023 = future_2023.merge(df_combination_test, right_on='date',left_on= 'ds', how = 'left')
    del future_2023['date']
    familia = combination[:3]
    future_2023['familia'] = familia
    future_2023['quantity'].fillna(0,inplace=True)

    # Get Predictions
    forecast = model.predict(future_2023) # Make the predictions
    forecast_2023 = forecast[['ds', 'yhat']] # Get the predictions for 2023
    forecast_2023.columns = ['date', 'demand_yhat'] # Rename columns
    forecast_2023['llave'] = combination # Add the combination column
    # Add to the final predictions dataframe
    final_prediction_2023 = pd.concat([final_prediction_2023, forecast_2023])
    final_prediction_2023['demand_yhat'] = final_prediction_2023['demand_yhat'].apply(lambda x: 0 if x < 0 else x)
    final_prediction_2023['demand_yhat'] = final_prediction_2023['demand_yhat'].apply(lambda x: np.ceil(x))

    # Generate dataframe with y_true and y_hat
    df_combination_test.columns = ['date','y_true']
    df_ytrue_yhat = final_prediction_2023.merge(df_combination_test,on= 'date', how = 'left')
    df_ytrue_yhat['y_true'].fillna(0, inplace=True)
    dataframe_ytrue_ytest = pd.concat([dataframe_ytrue_ytest, df_ytrue_yhat])
    RMSE = np.sqrt(mean_squared_error(df_ytrue_yhat['y_true'], df_ytrue_yhat['demand_yhat']))
    rmse_temp = {
        'llave' : combination,
        'rsme' : RMSE
    }
    df_rmse = pd.DataFrame([rmse_temp])
    rmse_por_llave = pd.concat([rmse_por_llave,df_rmse])

  0%|          | 0/4394 [00:00<?, ?it/s]

 91%|█████████ | 3994/4394 [10:54:12<14:00:22, 126.06s/it]python(25292) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
 91%|█████████ | 3995/4394 [10:56:29<14:21:27, 129.54s/it]python(25486) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
 91%|█████████ | 3996/4394 [10:58:48<14:37:11, 132.24s/it]python(25754) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
 91%|█████████ | 3997/4394 [11:01:13<14:59:48, 135.99s/it]python(26041) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
 91%|█████████ | 3998/4394 [11:03:53<15:46:46, 143.45s/it]python(26307) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
 91%|█████████ | 3999/4394 [11:06:45<16:38:59, 151.75s/it]python(26771) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
 91%|█████████ | 4000/4394 [11:09:34<17:11:21, 157.06s/it]python

In [None]:
final_prediction_2023.to_parquet('./new_experiments/seasonality_52/final_prediction_2023.parquet', index = False)
dataframe_ytrue_ytest.to_parquet('./new_experiments/seasonality_52/dataframe_ytrue_ytest.parquet', index = False)
rmse_por_llave.to_parquet('./new_experiments/seasonality_52/rmse_por_llave.parquet', index = False)

In [83]:
rmse_por_llave

Unnamed: 0,llave,rsme
0,201AA3,2.961289


In [None]:
final_prediction_2023 = pd.DataFrame()
rmse_por_llave = pd.DataFrame()
test_model = True

for combination in tqdm(llaves_con_informacion_historica_suficiente_para_forecast):

    # Get the dataframe for the combination
    df_combination = df_grouped_by_week[df_grouped_by_week['combination'] == combination]
    df_combination = df_combination[['date_week','combination','quantity']]
    
    if combination in llaves_more_then_q95 and len(df_combination[df_combination['date_week'] >= '2022-01-01']) > 0: # si la llave esta en las llaves q95 y tiene data en el 2022, se entrena con data 2022, de lo contrario, se entra con todo lo disponible
        df_combination = df_combination[df_combination['date_week'] >= '2022-01-01']
    # df_combination_test = df_grouped_by_week_test[df_grouped_by_week_test['combination'] == combination][['date_week','quantity','campaign','discount_for_event']] # get the y_true values of the combination selected

    familia = str(combination[:3])
    df_discount_and_campaings = archivo_campanias[archivo_campanias['familia'] == familia]

    # df_discount_and_campaings = df_grouped_by_week[df_grouped_by_week['date_week'] >= '2024-01-01']
    # df_discount_and_campaings = df_discount_and_campaings[['combination','date_week','discount_for_event','campaign']]
    # Complete the missing weeks with 0
    first_date = df_combination['date_week'].min() # Get the first date in the dataframe
    last_date = df_combination['date_week'].max() # Get the last date in the dataframe
    
    if test_model == True and first_date.year <= 2022:
        # Create a dataframe with all the weeks between the year of the first date and the year of the last date
        df_dates = pd.DataFrame({'date_week': pd.date_range(start=f'{first_date.year}-01-08', end=f'2022-12-31', freq='W-MON')})    

        df_combination = df_dates.merge(df_combination, on='date_week', how='left') # Merge the dataframes
        df_combination['quantity'] = df_combination['quantity'].fillna(0) # Replace NaN values with 0
        df_combination = df_combination[~(df_combination['combination'].isnull())]
        df_combination = df_combination.merge(df_discount_and_campaings, on='date_week', how='left') # Merge the dataframes

        # Create the prophet dataframe
        prophet_dataframe = pd.DataFrame()
        prophet_dataframe['ds'] = df_combination['date_week']
        prophet_dataframe['y'] = df_combination['quantity']
        prophet_dataframe['campaign'] = df_combination['campaign']
        prophet_dataframe['discount_for_event'] = df_combination['discount_for_event']
        prophet_dataframe['y'] = prophet_dataframe['y'].astype(int)

        n_unique_years = prophet_dataframe['ds'].dt.year.nunique() # Count the number of different years in the dataframe

        # Create the model
        if n_unique_years == 1:
            model = Prophet(weekly_seasonality = 13)
            model.add_regressor('campaign')
            model.add_regressor('discount_for_event')

        else:
            model = Prophet(weekly_seasonality = 52)
            model.add_regressor('campaign')
            model.add_regressor('discount_for_event')
            
            
        try:
            model.fit(prophet_dataframe)
            with open(f'./serialized_models/{str(combination)}.json', 'w') as file:
                file.write(model_to_json(model))  # Save model
            
            
            if test_model:
                # Calculate how many weeks are missing from the last date in the dataframe to the last week of 2023
                weeks = (date(2023, 12, 31) - prophet_dataframe['ds'].max().date()).days // 7

                # Create a dataframe with the dates from the last date in the dataframe to the last week of 2023
                df_grouped_by_week_test = df_grouped_by_week[df_grouped_by_week['date_week'].between('2023-01-01','2023-12-31')]
                df_combination_test = df_grouped_by_week_test[df_grouped_by_week_test['combination'] == combination][['date_week','quantity']] # get the y_true values of the combination selected
                df_combination_test = df_combination_test.merge(df_discount_and_campaings, on='date_week', how='left') # Merge the dataframes

                future_2023 = model.make_future_dataframe(periods=weeks, freq='W-MON')
                future_2023 = future_2023[future_2023['ds'] >= '2023-01-01']
                future_2023 = future_2023.merge(df_combination_test, right_on='date_week',left_on= 'ds', how = 'left')
                del future_2023['date_week']
                future_2023['familia'] = familia
                future_2023['campaign'].fillna(0,inplace=True)
                future_2023['quantity'].fillna(0,inplace=True)
                future_2023['discount_for_event'].fillna(0,inplace=True)
                
                
                forecast = model.predict(future_2023) # Make the predictions
                forecast_2023 = forecast[['ds', 'yhat']] # Get the predictions for 2023
                forecast_2023.columns = ['date', 'demand_yhat'] # Rename columns
                forecast_2023['llave'] = combination # Add the combination column
                # Add to the final predictions dataframe
                final_prediction_2023 = pd.concat([final_prediction_2023, forecast_2023])
                final_prediction_2023['demand_yhat'] = final_prediction_2023['demand_yhat'].apply(lambda x: 0 if x < 0 else x)
                final_prediction_2023['demand_yhat'] = final_prediction_2023['demand_yhat'].apply(lambda x: np.ceil(x))

                X_test = df_grouped_by_week[df_grouped_by_week['combination'] == combination][['date_week','quantity']]
                X_test.columns = ['date','y_true']
                df_ytrue_yhat = final_prediction_2023.merge(X_test,on= 'date', how = 'left')
                df_ytrue_yhat['y_true'].fillna(0, inplace=True)

                RMSE = np.sqrt(mean_squared_error(df_ytrue_yhat['y_true'], df_ytrue_yhat['demand_yhat']))
                dict_temp = {
                    'llave' : combination,
                    'rsme' : RMSE
                }
                df_rmse = pd.DataFrame([dict_temp])
                rmse_por_llave = pd.concat([rmse_por_llave,df_rmse])
        except Exception as e:
            print(f'problems with this key: {combination}\n{e}')



        
    else:
        print(combination)

### Train with all data available

In [None]:
for combination in tqdm(df_grouped_by_week['combination'].unique()):

    # Get the dataframe for the combination
    df_combination = df_grouped_by_week[df_grouped_by_week['combination'] == combination]
    df_combination = df_combination[['date_week','combination','quantity']]
    
    if combination in llaves_more_then_q95 and len(df_combination[df_combination['date_week'] >= '2022-01-01']) > 0: # si la llave esta en las llaves q95 y tiene data en el 2023, se entrena con data 2023, de lo contrario, se entra con todo lo disponible
        df_combination = df_combination[df_combination['date_week'] >= '2022-01-01']
    # df_combination_test = df_grouped_by_week_test[df_grouped_by_week_test['combination'] == combination][['date_week','quantity','campaign','discount_for_event']] # get the y_true values of the combination selected

    familia = str(combination[:3])
    df_discount_and_campaings = archivo_campanias[archivo_campanias['familia'] == familia]

    # df_discount_and_campaings = df_grouped_by_week[df_grouped_by_week['date_week'] >= '2024-01-01']
    # df_discount_and_campaings = df_discount_and_campaings[['combination','date_week','discount_for_event','campaign']]
    # Complete the missing weeks with 0
    first_date = df_combination['date_week'].min() # Get the first date in the dataframe
    last_date = df_combination['date_week'].max() # Get the last date in the dataframe
    

    # Create a dataframe with all the weeks between the year of the first date and the year of the last date
    df_dates = pd.DataFrame({'date_week': pd.date_range(start=f'{first_date.year}-01-08', end=f'2023-12-31', freq='W-MON')})    

    df_combination = df_dates.merge(df_combination, on='date_week', how='left') # Merge the dataframes
    df_combination['quantity'] = df_combination['quantity'].fillna(0) # Replace NaN values with 0
    df_combination = df_combination[~(df_combination['combination'].isnull())]
    df_combination = df_combination.merge(df_discount_and_campaings, on='date_week', how='left') # Merge the dataframes

    # Create the prophet dataframe
    prophet_dataframe = pd.DataFrame()
    prophet_dataframe['ds'] = df_combination['date_week']
    prophet_dataframe['y'] = df_combination['quantity']
    prophet_dataframe['campaign'] = df_combination['campaign']
    prophet_dataframe['discount_for_event'] = df_combination['discount_for_event']
    prophet_dataframe['y'] = prophet_dataframe['y'].astype(int)

    n_unique_years = prophet_dataframe['ds'].dt.year.nunique() # Count the number of different years in the dataframe

    # Create the model
    if n_unique_years == 1:
        model = Prophet(weekly_seasonality = 13)
        model.add_regressor('campaign')
        model.add_regressor('discount_for_event')

    else:
        model = Prophet(weekly_seasonality = 52)
        model.add_regressor('campaign')
        model.add_regressor('discount_for_event')
        
        
    try:
        model.fit(prophet_dataframe)
        with open(f'./serialized_models/{str(combination)}.json', 'w') as file:
            file.write(model_to_json(model))  # Save model
    
    except Exception as e:
        print(f'problems with this key: {combination}\n{e}')




In [147]:
from glob import glob
len(glob('./serialized_models/*'))

7240

In [None]:
# final_prediction_2024 = pd.DataFrame()
# final_prediction_2024_with_test_data = pd.DataFrame()
# rmse_por_llave_resultados = []
# # Get the list of combinations
# for combination in tqdm(llaves_con_informacion_historica_suficiente_para_forecast):
#     # Get the dataframe for the combination
#     df_combination = df_grouped_by_week[df_grouped_by_week['combination'] == combination]
#     if combination in llaves_more_then_q95 and len(df_combination[df_combination['date_week'] >= '2022-01-01']) > 0: # si la llave esta en las llaves q95 y tiene data en el 2022, se entrena con data 2022, de lo contrario, se entra con todo lo disponible
#         df_combination = df_combination[df_combination['date_week'] >= '2022-01-01']
#     # df_combination_test = df_grouped_by_week_test[df_grouped_by_week_test['combination'] == combination][['date_week','quantity','campaign','discount_for_event']] # get the y_true values of the combination selected
#     df_discount_and_campaings = df_grouped_by_week[df_grouped_by_week['date_week'] >= '2024-01-01']
#     df_discount_and_campaings = df_discount_and_campaings[['combination','date_week','discount_for_event','campaign']]
#     # Complete the missing weeks with 0
#     first_date = df_combination['date_week'].min() # Get the first date in the dataframe
#     last_date = df_combination['date_week'].max() # Get the last date in the dataframe
#     # Create a dataframe with all the weeks between the year of the first date and the year of the last date
#     df_dates = pd.DataFrame({'date_week': pd.date_range(start=f'{first_date.year}-01-08', end=f'{last_date.year}-12-31', freq='W-MON')})    
    
#     df_combination = df_dates.merge(df_combination, on='date_week', how='left') # Merge the dataframes
#     df_combination['quantity'] = df_combination['quantity'].fillna(0) # Replace NaN values with 0
#     df_combination['campaign'] = df_combination['campaign'].fillna(0) # Replace NaN values with 0
#     df_combination['discount_for_event'] = df_combination['discount_for_event'].fillna(0) # Replace NaN values with 0

#     df_combination = df_combination[df_combination['date_week'] < '2023-12-31'] # Drop registers from 2024

#     # Create the prophet dataframe
#     prophet_dataframe = pd.DataFrame()
#     prophet_dataframe['ds'] = df_combination['date_week']
#     prophet_dataframe['y'] = df_combination['quantity']
#     prophet_dataframe['campaign'] = df_combination['campaign']
#     prophet_dataframe['discount_for_event'] = df_combination['discount_for_event']
#     prophet_dataframe['y'] = prophet_dataframe['y'].astype(int)

#     n_unique_years = prophet_dataframe['ds'].dt.year.nunique() # Count the number of different years in the dataframe

#     # Create the model
#     if n_unique_years == 1:
#         model = Prophet(weekly_seasonality = 13)
#         model.add_regressor('campaign')
#         model.add_regressor('discount_for_event')

#     else:
#         model = Prophet(weekly_seasonality = 52)
#         model.add_regressor('campaign')
#         model.add_regressor('discount_for_event')
        
        
#     model.fit(prophet_dataframe)

#     # Calculate how many weeks are missing from the last date in the dataframe to the last week of 2023
#     weeks = (date(2024, 12, 31) - prophet_dataframe['ds'].max().date()).days // 7
#     # Create a dataframe with the dates from the last date in the dataframe to the last week of 2023
#     future_2024 = model.make_future_dataframe(periods=weeks, freq='W-MON')
#     future_2024 = future_2024[future_2024['ds'] >= '2024-01-01']
#     future_2024 = future_2024.merge(df_combination_test, right_on='date_week',left_on= 'ds', how = 'left')
#     del future_2024['date_week']
#     future_2024[['quantity','campaign','discount_for_event']] = future_2024[['quantity','campaign','discount_for_event']].fillna(0)



#     forecast = model.predict(future_2024) # Make the predictions
#     forecast_2024 = forecast[['ds', 'yhat']] # Get the predictions for 2023
#     forecast_2024.columns = ['date', 'demand_yhat'] # Rename columns
#     forecast_2024['llave'] = combination # Add the combination column
#     # Add to the final predictions dataframe
#     final_prediction_2024 = pd.concat([final_prediction_2024, forecast_2024])
#     final_prediction_2024['demand_yhat'] = final_prediction_2024['demand_yhat'].apply(lambda x: 0 if x < 0 else x)
#     # # -----------------------------------------------------------------------------------
#     # # Do the same to generate de prophet_df, but with test data(2023) to compare results
#     # # -----------------------------------------------------------------------------------
#     # # Complete the missing weeks with 0
#     # first_date_test = df_combination_test['date_week'].min() # Get the first date in the dataframe
#     # last_date_test = df_combination_test['date_week'].max() # Get the last date in the dataframe
#     # # Create a dataframe with all the weeks between the year of the first date and the year of the last date
#     # df_dates_test = pd.DataFrame({'date_week': pd.date_range(start=f'{first_date_test.year}-01-08', end=f'{last_date_test.year}-12-31', freq='W-MON')})    
#     # df_combination_test = df_dates_test.merge(df_combination_test, on='date_week', how='left') # Merge the dataframes
#     # df_combination_test['quantity'] = df_combination_test['quantity'].fillna(0) # Replace NaN values with 0
#     # df_combination_test = df_combination_test[df_combination_test['date_week'] < '2024-01-01'] # Drop registers from 2024
    
#     # # Create the prophet TEST dataframe
#     # dataframe_test = pd.DataFrame()
#     # dataframe_test['ds'] = df_combination_test['date_week']
#     # dataframe_test['y_true'] = df_combination_test['quantity']

#     # # Merge the forecast_2024 with the y_true values
#     # prediction = forecast[['ds', 'yhat']].merge(dataframe_test, on='ds', how='left')
#     # prediction = prediction[prediction['ds'] >= '2023-01-01']
#     # prediction['y_true'] = prediction['y_true'].fillna(0) # Fill NaN values with 0
#     # prediction['y_true'] = prediction['y_true'].astype(int)
#     # prediction['yhat'] = prediction['yhat'].apply(lambda x: 0 if x < 0 else x) # Replace negative values with 0
    
    
#     # # Calculate the rsme for test_set
#     # df_rsme = prediction.copy()
#     # rsme = np.sqrt(mean_squared_error(df_rsme['y_true'], df_rsme['yhat']))
    
#     # # generate new column error with the abs(error)
#     # prediction['error'] = prediction['y_true'] - prediction['yhat']
#     # prediction['error'] = prediction['error'].apply(lambda x: abs(x))
#     # prediction['llave'] = combination
#     # final_prediction_2024_with_test_data = pd.concat([final_prediction_2024_with_test_data,prediction])
#     # rmse_por_llave = {
#     #     'llave': combination,
#     #     'rmse' : rsme
#     # }
#     # rmse_por_llave_resultados.append(rmse_por_llave)
#     with open(f'.././serialized_models/{str(combination)}.json', 'w') as file:
#         file.write(model_to_json(model))  # Save model

