In [1]:
from datetime import datetime
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder

import xgboost as xgb

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import skforecast
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster
import shap


  from .autonotebook import tqdm as notebook_tqdm


## Cargo Dataset principal

In [2]:
df = pd.read_csv('../../data/Top100_prediction_con.csv')

In [3]:
# Lista para guardar resultados
resultados = []

# Copia del DataFrame original
df_original = df.copy()

# Seleccionar los primeros 5 productos únicos
productos = list(df_original['item'].unique())

for producto in productos:
    # Filtrar el DataFrame para el producto actual
    df_filtrado = df_original[(df_original['store_code'] == 'PHI_3') & (df_original['item'] == producto)]
    
    # Verificar si hay suficientes datos
    if len(df_filtrado) < 30 + 14:  # Necesitas al menos 44 registros
        print(f"Advertencia: No hay suficientes datos para el producto {producto}. Saltando...")
        continue
    
    # Preparar los datos
    ts_predict = df_filtrado[['date', 'income', 'weekday', 'event']]
    ts_predict = pd.get_dummies(data=ts_predict, columns=['weekday'], dtype=int)
    ts_predict['date'] = pd.to_datetime(ts_predict['date'])
    ts_predict.sort_values('date', ascending=True, inplace=True)
    ts_predict.set_index('date', inplace=True)
    ts_predict = ts_predict.asfreq('D')

    # Separar en entrenamiento y prueba
    y, exog = ts_predict['income'], ts_predict.drop(columns=['income'])
    y_train, y_test = y[:-30], y[-30:]
    exog_train, exog_test = exog[:-30], exog[-30:]

    # Inicializar el modelo Forecaster
    forecaster = ForecasterAutoreg(
        regressor=RandomForestRegressor(max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=50),
        lags=7
    )
    
    # Ajustar el modelo
    forecaster.fit(y=y_train, exog=exog_train)

    start_date = '2016-04-25' 
    end_date ='2016-05-30'
    # Crear un rango de fechas
    date_range = pd.date_range(start=start_date, end=end_date)
    # Crear el dataframe
    df1 = pd.DataFrame(date_range, columns=['date'])
    df1['date'] = pd.to_datetime(df1['date'])
    df1['weekday'] = df1['date'].dt.day_name()
    df1['event'] = 0
    df1 = pd.get_dummies(data=df1, columns=['weekday'], dtype=int)
    df1.sort_values('date', inplace=True, ascending=True)
    df1.set_index('date', inplace=True)
    df1 = df1.asfreq('D')
    exog_test = pd.concat([exog_test, df1], ignore_index=False)
    
    # Predecir
    predicciones = forecaster.predict(steps=60, exog=exog_test)
    
    # Almacenar resultados
    resultados.append({
        'producto': producto,
        'prediccion': predicciones.values,
        'test': y_test.values
    })
    
    #print(f"Predicciones generadas para el producto: {producto}")




In [4]:
tiendas_inventario = pd.DataFrame(resultados)
tiendas_inventario['test_suma'] = tiendas_inventario['test'].apply(sum)
suma = tiendas_inventario['test_suma'].sum()
tiendas_inventario['prediccion'] = tiendas_inventario['prediccion'].apply(lambda x: np.round(x, decimals=0))
tiendas_inventario['tiendas_inventario_test'] = tiendas_inventario['prediccion'].apply(lambda x: sum(x[:30]))
tiendas_inventario['tiendas_inventario_prediction'] = tiendas_inventario['prediccion'].apply(lambda x: sum(x[30:]))
tiendas_inventario[['producto','test_suma','tiendas_inventario_test','tiendas_inventario_prediction']]

Unnamed: 0,producto,test_suma,tiendas_inventario_test,tiendas_inventario_prediction
0,ACCESORIES_1_108,431.5850,711.0,513.0
1,HOME_&_GARDEN_1_027,432.8250,550.0,601.0
2,HOME_&_GARDEN_1_053,1889.9625,1198.0,1344.0
3,HOME_&_GARDEN_1_140,654.9375,811.0,782.0
4,HOME_&_GARDEN_1_177,287.5125,695.0,735.0
...,...,...,...,...
95,SUPERMARKET_3_499,546.8160,558.0,498.0
96,ACCESORIES_1_158,2139.4380,1938.0,1848.0
97,SUPERMARKET_3_282,2712.5760,1758.0,1892.0
98,ACCESORIES_1_354,855.7752,1055.0,1044.0


In [5]:
total_inventario_productos_real = tiendas_inventario['test_suma'].sum()
total_inventario_productos_predicho = tiendas_inventario['tiendas_inventario_test'].sum()
print(total_inventario_productos_real)
print(total_inventario_productos_predicho)
print(total_inventario_productos_real - total_inventario_productos_predicho)

101026.3662
96013.0
5013.366200000004


In [7]:
tienda =  pd.DataFrame(resultados)
file_name = "PHI_3_ventas.xlsx"
tienda.to_excel(file_name)

In [4]:
pd.DataFrame(resultados) # 14 lags

Unnamed: 0,producto,prediccion,test
0,ACCESORIES_1_108,"[2.76002380952381, 1.561190476190476, 1.251087...","[2, 1, 0, 3, 0, 1, 2, 1, 2, 1, 1, 0, 0, 1, 1, ..."
1,HOME_&_GARDEN_1_027,"[2.1005, 1.684493506493507, 0.9815714285714285...","[1, 0, 5, 1, 2, 5, 2, 4, 0, 2, 0, 3, 2, 1, 0, ..."
2,HOME_&_GARDEN_1_053,"[2.316730158730159, 1.7534761904761904, 0.941,...","[7, 4, 4, 1, 4, 1, 4, 3, 3, 1, 3, 3, 3, 3, 5, ..."
3,HOME_&_GARDEN_1_140,"[1.0296190476190477, 1.7221984126984131, 1.483...","[2, 1, 0, 0, 0, 3, 2, 3, 4, 1, 1, 2, 0, 0, 1, ..."
4,HOME_&_GARDEN_1_177,"[3.8844603174603174, 3.4420714285714293, 3.080...","[2, 2, 1, 2, 1, 3, 9, 5, 5, 0, 1, 1, 1, 0, 0, ..."
...,...,...,...
95,SUPERMARKET_3_499,"[8.86036291486292, 9.694309523809526, 6.283809...","[8, 2, 1, 2, 1, 0, 0, 0, 0, 4, 5, 8, 6, 7, 8, ..."
96,ACCESORIES_1_158,"[2.5095396825396827, 4.191188034188034, 1.8520...","[2, 2, 7, 2, 4, 0, 2, 4, 3, 0, 6, 3, 3, 2, 3, ..."
97,SUPERMARKET_3_282,"[26.444000000000003, 25.08918253968254, 17.499...","[44, 44, 32, 22, 24, 21, 31, 54, 29, 27, 37, 3..."
98,ACCESORIES_1_354,"[1.377888888888889, 1.2541507936507938, 1.0349...","[0, 1, 0, 0, 0, 1, 2, 3, 0, 0, 2, 1, 1, 3, 0, ..."


In [6]:
pd.DataFrame(resultados) # 7 lags

Unnamed: 0,producto,prediccion,test
0,ACCESORIES_1_108,"[3.5989761904761908, 0.9187142857142855, 1.409...","[2, 1, 0, 3, 0, 1, 2, 1, 2, 1, 1, 0, 0, 1, 1, ..."
1,HOME_&_GARDEN_1_027,"[1.9959682539682537, 1.4615021645021642, 1.697...","[1, 0, 5, 1, 2, 5, 2, 4, 0, 2, 0, 3, 2, 1, 0, ..."
2,HOME_&_GARDEN_1_053,"[2.8376666666666663, 2.3627142857142855, 1.472...","[7, 4, 4, 1, 4, 1, 4, 3, 3, 1, 3, 3, 3, 3, 5, ..."
3,HOME_&_GARDEN_1_140,"[1.2505476190476192, 1.6657380952380954, 1.853...","[2, 1, 0, 0, 0, 3, 2, 3, 4, 1, 1, 2, 0, 0, 1, ..."
4,HOME_&_GARDEN_1_177,"[4.193309523809523, 2.1811349206349204, 2.4279...","[2, 2, 1, 2, 1, 3, 9, 5, 5, 0, 1, 1, 1, 0, 0, ..."
...,...,...,...
95,SUPERMARKET_3_499,"[7.197515873015874, 9.987404761904761, 7.12265...","[8, 2, 1, 2, 1, 0, 0, 0, 0, 4, 5, 8, 6, 7, 8, ..."
96,ACCESORIES_1_158,"[2.2590158730158727, 3.551595238095238, 1.9954...","[2, 2, 7, 2, 4, 0, 2, 4, 3, 0, 6, 3, 3, 2, 3, ..."
97,SUPERMARKET_3_282,"[23.22586507936508, 20.463079365079366, 14.007...","[44, 44, 32, 22, 24, 21, 31, 54, 29, 27, 37, 3..."
98,ACCESORIES_1_354,"[1.4654285714285715, 1.519857142857143, 0.9793...","[0, 1, 0, 0, 0, 1, 2, 3, 0, 0, 2, 1, 1, 3, 0, ..."


## Predigo 30 dias(los de test) 