In [1]:
from datetime import datetime
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import skforecast
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster
import shap

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ts= pd.read_csv('../../data/ts_top100_prediction.csv')

In [3]:
print(ts.shape)
ts.sample(1)

(1739323, 6)


Unnamed: 0,date,id,item,sales,income,event
854914,2013-11-14,HOME_&_GARDEN_1_494_PHI_2,HOME_&_GARDEN_1_494,2,19.925,0


In [4]:
ts['date']= pd.to_datetime(ts['date'], format='%Y-%m-%d')

In [5]:
ts['weekday'] = ts['date'].dt.day_name()

In [6]:
ts.sample(5)

Unnamed: 0,date,id,item,sales,income,event,weekday
1197357,2014-10-29,SUPERMARKET_3_090_PHI_1,SUPERMARKET_3_090,23,38.088,0,Wednesday
1395888,2015-05-17,ACCESORIES_1_108_BOS_1,ACCESORIES_1_108,1,17.2634,0,Sunday
1107493,2014-07-30,SUPERMARKET_3_202_PHI_2,SUPERMARKET_3_202,6,30.528,0,Wednesday
1505313,2015-09-03,SUPERMARKET_2_052_PHI_3,SUPERMARKET_2_052,2,33.528,0,Thursday
1324589,2015-03-07,SUPERMARKET_1_012_NYC_1,SUPERMARKET_1_012,3,19.728,0,Saturday


In [7]:
lista_productos100 = list(ts['item'].unique())

In [8]:
resultados = []

# Copia del DataFrame original
df_original = ts.copy()

# Seleccionar los primeros 5 productos únicos
productos = list(df_original['item'].unique())

for i in lista_productos100:
        dataset = df_original[['item','date','sales','weekday','event']]
        df_product = dataset[dataset['item']==i]
        df_product_sin_duplicates = df_product[['date','weekday', 'event']].drop_duplicates()
        ts_predict = df_product.groupby(['date'])['sales'].sum().reset_index()
        ts_predict = ts_predict.merge(df_product_sin_duplicates, on=['date'], how='left')
        ts_predict = pd.get_dummies(data=ts_predict, columns=['weekday'], dtype=int)
        ts_predict['date'] = pd.to_datetime(ts_predict['date'])
        ts_predict.sort_values('date', ascending=True, inplace=True)
        ts_predict.set_index('date', inplace=True)
        steps = 30
        ts_predict = ts_predict.asfreq('D')
        y, exog = ts_predict['sales'], ts_predict.drop(columns=['sales'])
        y_train, y_test = y[:-30], y[-30:]
        exog_train, exog_test = exog[:-30], exog[-30:]        
        forecaster = ForecasterAutoreg(
                    regressor = RandomForestRegressor(random_state=123,
                                                        max_depth=None,
                                                        min_samples_leaf=2, 
                                                        min_samples_split=2,
                                                        n_estimators=50),
                    lags      =  14
                )
        
        start_date = '2016-04-25' 
        end_date ='2016-05-30'
        # Crear un rango de fechas
        date_range = pd.date_range(start=start_date, end=end_date)
        # Crear el dataframe
        df1 = pd.DataFrame(date_range, columns=['date'])
        df1['date'] = pd.to_datetime(df1['date'])
        df1['weekday'] = df1['date'].dt.day_name()
        df1['event'] = 0
        df1 = pd.get_dummies(data=df1, columns=['weekday'], dtype=int)
        df1.sort_values('date', inplace=True, ascending=True)
        df1.set_index('date', inplace=True)
        df1 = df1.asfreq('D')
        exog_test = pd.concat([exog_test, df1], ignore_index=False)
        forecaster.fit(y=y_train, exog=exog_train)
        predictions = forecaster.predict(steps=60, exog=exog_test).to_list()
        resultados.append({'i':i, 'prediction':predictions,'test':y_test.values})



In [9]:
tiendas_inventario = pd.DataFrame(resultados)
tiendas_inventario['test_suma'] = tiendas_inventario['test'].apply(sum)
suma = tiendas_inventario['test_suma'].sum()
tiendas_inventario['prediction'] = tiendas_inventario['prediction'].apply(lambda x: np.round(x, decimals=0))
tiendas_inventario['tiendas_inventario_test'] = tiendas_inventario['prediction'].apply(lambda x: sum(x[:30]))
tiendas_inventario['tiendas_inventario_prediction'] = tiendas_inventario['prediction'].apply(lambda x: sum(x[30:]))
tiendas_inventario[['i','test_suma','tiendas_inventario_test','tiendas_inventario_prediction']]

Unnamed: 0,i,test_suma,tiendas_inventario_test,tiendas_inventario_prediction
0,ACCESORIES_1_108,266,293.0,221.0
1,HOME_&_GARDEN_1_027,624,683.0,703.0
2,HOME_&_GARDEN_1_053,1012,909.0,836.0
3,HOME_&_GARDEN_1_140,485,437.0,397.0
4,HOME_&_GARDEN_1_177,477,892.0,775.0
...,...,...,...,...
95,SUPERMARKET_3_499,3379,2829.0,3190.0
96,ACCESORIES_1_158,1633,1509.0,1381.0
97,SUPERMARKET_3_282,6010,5594.0,5289.0
98,ACCESORIES_1_354,1602,1672.0,1657.0


In [10]:
total_inventario_productos_real = tiendas_inventario['test_suma'].sum()
total_inventario_productos_predicho = tiendas_inventario['tiendas_inventario_test'].sum()
print(total_inventario_productos_real)
print(total_inventario_productos_predicho)
print(total_inventario_productos_real - total_inventario_productos_predicho)

240920
239651.0
1269.0


In [11]:
units_globales = pd.DataFrame(resultados) 

In [14]:
file_name = "units_global.xlsx"
units_globales.to_excel(file_name)