In [1]:
from datetime import datetime
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder

import xgboost as xgb

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import skforecast
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster
import shap


  from .autonotebook import tqdm as notebook_tqdm


## Cargo Dataset principal

In [2]:
df = pd.read_csv('../../data/Top100_prediction_con.csv')

In [3]:
# Lista para guardar resultados
resultados = []

# Copia del DataFrame original
df_original = df.copy()

# Seleccionar los primeros 5 productos únicos
productos = list(df_original['item'].unique())

for producto in productos:
    # Filtrar el DataFrame para el producto actual
    df_filtrado = df_original[(df_original['store_code'] == 'NYC_3') & (df_original['item'] == producto)]
    
    # Verificar si hay suficientes datos
    if len(df_filtrado) < 30 + 14:  # Necesitas al menos 44 registros
        print(f"Advertencia: No hay suficientes datos para el producto {producto}. Saltando...")
        continue
    
    # Preparar los datos
    ts_predict = df_filtrado[['date', 'sales', 'weekday', 'event']]
    ts_predict = pd.get_dummies(data=ts_predict, columns=['weekday'], dtype=int)
    ts_predict['date'] = pd.to_datetime(ts_predict['date'])
    ts_predict.sort_values('date', ascending=True, inplace=True)
    ts_predict.set_index('date', inplace=True)
    ts_predict = ts_predict.asfreq('D')

    # Separar en entrenamiento y prueba
    y, exog = ts_predict['sales'], ts_predict.drop(columns=['sales'])
    y_train, y_test = y[:-30], y[-30:]
    exog_train, exog_test = exog[:-30], exog[-30:]

    # Inicializar el modelo Forecaster
    forecaster = ForecasterAutoreg(
        regressor=RandomForestRegressor(max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=100),
        lags=7
    )
    
    # Ajustar el modelo
    forecaster.fit(y=y_train, exog=exog_train)

    start_date = '2016-04-25' 
    end_date ='2016-05-30'
    # Crear un rango de fechas
    date_range = pd.date_range(start=start_date, end=end_date)
    # Crear el dataframe
    df1 = pd.DataFrame(date_range, columns=['date'])
    df1['date'] = pd.to_datetime(df1['date'])
    df1['weekday'] = df1['date'].dt.day_name()
    df1['event'] = 0
    df1 = pd.get_dummies(data=df1, columns=['weekday'], dtype=int)
    df1.sort_values('date', inplace=True, ascending=True)
    df1.set_index('date', inplace=True)
    df1 = df1.asfreq('D')
    exog_test = pd.concat([exog_test, df1], ignore_index=False)
    
    # Predecir
    predicciones = forecaster.predict(steps=60, exog=exog_test)
    
    # Almacenar resultados
    resultados.append({
        'producto': producto,
        'prediccion': predicciones.values,
        'test': y_test.values
    })
    
    #print(f"Predicciones generadas para el producto: {producto}")




In [4]:
tiendas_inventario = pd.DataFrame(resultados)
tiendas_inventario['test_suma'] = tiendas_inventario['test'].apply(sum)
suma = tiendas_inventario['test_suma'].sum()
tiendas_inventario['prediccion'] = tiendas_inventario['prediccion'].apply(lambda x: np.round(x, decimals=0))
tiendas_inventario['tiendas_inventario_test'] = tiendas_inventario['prediccion'].apply(lambda x: sum(x[:30]))
tiendas_inventario['tiendas_inventario_prediction'] = tiendas_inventario['prediccion'].apply(lambda x: sum(x[30:]))
tiendas_inventario[['producto','test_suma','tiendas_inventario_test','tiendas_inventario_prediction']]

Unnamed: 0,producto,test_suma,tiendas_inventario_test,tiendas_inventario_prediction
0,ACCESORIES_1_108,15,25.0,22.0
1,HOME_&_GARDEN_1_027,107,188.0,226.0
2,HOME_&_GARDEN_1_053,247,215.0,179.0
3,HOME_&_GARDEN_1_140,106,112.0,106.0
4,HOME_&_GARDEN_1_177,128,130.0,128.0
...,...,...,...,...
95,SUPERMARKET_3_499,1042,249.0,240.0
96,ACCESORIES_1_158,129,116.0,102.0
97,SUPERMARKET_3_282,1366,1706.0,1511.0
98,ACCESORIES_1_354,174,204.0,201.0


In [5]:
total_inventario_productos_real = tiendas_inventario['test_suma'].sum()
total_inventario_productos_predicho = tiendas_inventario['tiendas_inventario_test'].sum()
print(total_inventario_productos_real)
print(total_inventario_productos_predicho)
print(total_inventario_productos_real - total_inventario_productos_predicho)

38574
41226.0
-2652.0


In [4]:
tienda = pd.DataFrame(resultados)
file_name = "NYC_3_unidades.xlsx"
tienda.to_excel(file_name)

In [9]:
pd.DataFrame(resultados) #7

Unnamed: 0,producto,prediccion,test
0,ACCESORIES_1_108,"[1.648548340548341, 1.013238095238095, 0.60840...","[0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 1, 0, 2, 0, ..."
1,HOME_&_GARDEN_1_027,"[4.815116522366523, 5.4951616161616155, 3.8128...","[4, 2, 4, 4, 4, 2, 5, 0, 7, 3, 3, 4, 5, 5, 6, ..."
2,HOME_&_GARDEN_1_053,"[7.8600634920634915, 12.232695526695524, 7.728...","[8, 11, 7, 4, 4, 14, 7, 5, 20, 10, 11, 4, 10, ..."
3,HOME_&_GARDEN_1_140,"[4.836079365079364, 4.730999999999998, 4.21519...","[1, 3, 1, 0, 5, 2, 4, 5, 7, 2, 0, 1, 6, 9, 8, ..."
4,HOME_&_GARDEN_1_177,"[5.317420634920636, 5.915910894660894, 3.69326...","[8, 7, 0, 8, 3, 4, 1, 17, 25, 30, 18, 6, 0, 0,..."
...,...,...,...
95,SUPERMARKET_3_499,"[10.953620573870573, 9.838535714285717, 13.780...","[0, 0, 14, 16, 28, 23, 22, 51, 39, 65, 41, 40,..."
96,ACCESORIES_1_158,"[5.611738095238096, 6.697182539682541, 4.68341...","[5, 4, 5, 2, 7, 5, 10, 6, 1, 8, 4, 3, 8, 9, 5,..."
97,SUPERMARKET_3_282,"[57.68934920634923, 86.80153724053724, 56.4256...","[77, 104, 53, 45, 35, 22, 34, 63, 85, 55, 46, ..."
98,ACCESORIES_1_354,"[4.166196275946276, 8.78524603174603, 6.244698...","[7, 5, 2, 5, 2, 7, 5, 2, 5, 7, 6, 2, 4, 1, 5, ..."


In [7]:
pd.DataFrame(resultados) #14

Unnamed: 0,producto,prediccion,test
0,ACCESORIES_1_108,"[1.1669603174603176, 0.6981865079365077, 0.821...","[0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 1, 0, 2, 0, ..."
1,HOME_&_GARDEN_1_027,"[4.638481240981241, 4.99316341991342, 4.564210...","[4, 2, 4, 4, 4, 2, 5, 0, 7, 3, 3, 4, 5, 5, 6, ..."
2,HOME_&_GARDEN_1_053,"[7.447278499278497, 8.355496753246754, 7.78650...","[8, 11, 7, 4, 4, 14, 7, 5, 20, 10, 11, 4, 10, ..."
3,HOME_&_GARDEN_1_140,"[4.387232212232214, 5.42890873015873, 4.776258...","[1, 3, 1, 0, 5, 2, 4, 5, 7, 2, 0, 1, 6, 9, 8, ..."
4,HOME_&_GARDEN_1_177,"[5.214388888888888, 5.885666666666666, 5.07606...","[8, 7, 0, 8, 3, 4, 1, 17, 25, 30, 18, 6, 0, 0,..."
...,...,...,...
95,SUPERMARKET_3_499,"[8.44593073593074, 9.046761904761905, 12.97389...","[0, 0, 14, 16, 28, 23, 22, 51, 39, 65, 41, 40,..."
96,ACCESORIES_1_158,"[4.739011904761904, 6.031991341991344, 4.72572...","[5, 4, 5, 2, 7, 5, 10, 6, 1, 8, 4, 3, 8, 9, 5,..."
97,SUPERMARKET_3_282,"[60.30745115995115, 80.55711507936506, 59.0159...","[77, 104, 53, 45, 35, 22, 34, 63, 85, 55, 46, ..."
98,ACCESORIES_1_354,"[5.80095634920635, 6.225952380952378, 6.064591...","[7, 5, 2, 5, 2, 7, 5, 2, 5, 7, 6, 2, 4, 1, 5, ..."


## Predigo 30 dias(los de test) 

In [7]:
# Lista para guardar resultados
resultados = []

# Copia del DataFrame original
df_original = df.copy()

# Seleccionar los primeros 5 productos únicos
productos = list(df_original['item'].unique())  

for producto in productos:
    # Filtrar el DataFrame para el producto actual
    df_filtrado = df_original[(df_original['store_code'] == 'NYC_3') & (df_original['item'] == producto)]
    
    # Verificar si hay suficientes datos
    if len(df_filtrado) < 30 + 14:  # Necesitas al menos 44 registros
        print(f"Advertencia: No hay suficientes datos para el producto {producto}. Saltando...")
        continue
    
    # Preparar los datos
    ts_predict = df_filtrado[['date', 'sales', 'weekday', 'event']]
    ts_predict = pd.get_dummies(data=ts_predict, columns=['weekday'], dtype=int)
    ts_predict['date'] = pd.to_datetime(ts_predict['date'])
    ts_predict.sort_values('date', ascending=True, inplace=True)
    ts_predict.set_index('date', inplace=True)
    ts_predict = ts_predict.asfreq('D')

    # Separar en entrenamiento y prueba
    y, exog = ts_predict['sales'], ts_predict.drop(columns=['sales'])
    y_train, y_test = y[:-30], y[-30:]
    exog_train, exog_test = exog[:-30], exog[-30:]

    # Inicializar el modelo Forecaster
    forecaster = ForecasterAutoreg(
        regressor=RandomForestRegressor(max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=100),
        lags=7
    )
    
    # Ajustar el modelo
    forecaster.fit(y=y_train, exog=exog_train)

    # Crear un rango de fechas y el dataframe de exógenas
    start_date = '2016-04-25' 
    end_date ='2016-05-30'
    date_range = pd.date_range(start=start_date, end=end_date)
    df1 = pd.DataFrame(date_range, columns=['date'])
    df1['date'] = pd.to_datetime(df1['date'])
    df1['weekday'] = df1['date'].dt.day_name()
    df1['event'] = 0
    df1 = pd.get_dummies(data=df1, columns=['weekday'], dtype=int)
    df1.sort_values('date', inplace=True, ascending=True)
    df1.set_index('date', inplace=True)
    df1 = df1.asfreq('D')
    exog_test = pd.concat([exog_test, df1], ignore_index=False)
    
    # Predecir
    predicciones = forecaster.predict(steps=60, exog=exog_test)
    
    # Crear un diccionario con claves 'dia1', 'dia2', ..., 'dia60'
    predicciones_dict = {f'dia{i+1}': pred for i, pred in enumerate(predicciones.values)}
    
    # Almacenar los resultados en un diccionario para cada producto
    resultados.append({
        'producto': producto,
        **predicciones_dict  # Desempaquetar el diccionario de predicciones
    })

# Convertir la lista de resultados en un DataFrame
df_resultados = pd.DataFrame(resultados)

# Mostrar el DataFrame final con las columnas dia1, dia2, ..., dia60
print(df_resultados)




               producto       dia1       dia2       dia3       dia4  \
0      ACCESORIES_1_108   1.489312   0.849762   0.661825   1.016492   
1   HOME_&_GARDEN_1_027   4.815583   5.673583   3.779136   4.117865   
2   HOME_&_GARDEN_1_053   7.625048  12.860579   7.830952   7.668986   
3   HOME_&_GARDEN_1_140   4.811423   4.750063   4.035857   3.743768   
4   HOME_&_GARDEN_1_177   5.347289   5.428609   3.717810   4.060202   
..                  ...        ...        ...        ...        ...   
95    SUPERMARKET_3_499   9.157075   8.651262  12.657659   5.891210   
96     ACCESORIES_1_158   5.647520   6.810782   4.932886   4.367747   
97    SUPERMARKET_3_282  59.127747  87.539008  59.520714  53.589683   
98     ACCESORIES_1_354   3.874496   9.363294   5.937497   6.266830   
99  HOME_&_GARDEN_1_106  19.869627  19.836365  18.158690  16.143760   

         dia5       dia6       dia7       dia8       dia9  ...      dia51  \
0    1.066179   0.457575   0.956194   1.142649   0.730929  ...   0.816

In [8]:
df_resultados

Unnamed: 0,producto,dia1,dia2,dia3,dia4,dia5,dia6,dia7,dia8,dia9,...,dia51,dia52,dia53,dia54,dia55,dia56,dia57,dia58,dia59,dia60
0,ACCESORIES_1_108,1.489312,0.849762,0.661825,1.016492,1.066179,0.457575,0.956194,1.142649,0.730929,...,0.816615,1.179000,1.030361,0.335071,0.140143,0.750810,1.128756,0.816615,1.179000,1.030361
1,HOME_&_GARDEN_1_027,4.815583,5.673583,3.779136,4.117865,4.914753,3.775282,4.266543,6.015822,6.868255,...,8.704254,6.714122,7.113096,7.808298,6.927775,4.604861,8.230837,9.989155,7.459508,7.623286
2,HOME_&_GARDEN_1_053,7.625048,12.860579,7.830952,7.668986,6.033165,6.989317,7.391881,7.771643,11.313746,...,11.123251,6.755655,5.729212,5.957810,5.988758,6.801762,7.147076,11.771885,7.139921,6.144736
3,HOME_&_GARDEN_1_140,4.811423,4.750063,4.035857,3.743768,2.997242,3.378464,2.762774,5.200910,4.899670,...,4.862974,2.885522,3.353683,2.932655,3.057437,3.443806,4.119730,4.862974,2.885522,3.353683
4,HOME_&_GARDEN_1_177,5.347289,5.428609,3.717810,4.060202,3.596886,3.503702,2.740297,4.249825,5.632849,...,4.815687,2.870496,3.048754,3.937464,3.579573,3.164933,4.173306,4.815687,2.870496,3.048754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,SUPERMARKET_3_499,9.157075,8.651262,12.657659,5.891210,7.928777,6.358881,9.267393,10.622897,5.234532,...,5.873968,9.288869,4.613083,7.832369,7.102579,9.543988,8.063746,5.734802,9.288869,4.613083
96,ACCESORIES_1_158,5.647520,6.810782,4.932886,4.367747,4.080421,3.864583,3.297310,4.731560,3.994226,...,4.664694,3.225444,3.168687,2.669556,3.442607,3.446232,4.254234,4.664694,3.225444,3.168687
97,SUPERMARKET_3_282,59.127747,87.539008,59.520714,53.589683,45.770462,40.575052,41.952751,61.974611,79.900754,...,61.114095,52.618932,48.459440,41.249044,37.337964,35.938389,49.548087,56.999831,48.845605,46.693694
98,ACCESORIES_1_354,3.874496,9.363294,5.937497,6.266830,5.647413,6.431578,5.692310,5.468369,7.231849,...,6.950742,5.944480,5.815341,5.316707,6.125357,5.504437,5.925266,6.950742,5.944480,5.815341


In [None]:
df[]

In [9]:
df_resultados = df_resultados.apply(np.ceil)
df_resultados.head(1)

TypeError: must be real number, not str