In [14]:
from datetime import datetime
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import skforecast
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster
import shap


## Load Dataset

In [15]:
ts= pd.read_csv('../../../data/ts_top100_prediction.csv')
print(ts.shape)
ts.sample(1)

(1739323, 6)


Unnamed: 0,date,id,item,sales,income,event
1376238,2015-04-27,SUPERMARKET_1_096_PHI_1,SUPERMARKET_1_096,17,144.84,0


### Transfor _date_ columns to _datetime_ format and create **weekday** column (exogenous variable)

In [16]:
ts['date']= pd.to_datetime(ts['date'], format='%Y-%m-%d')
ts['weekday'] = ts['date'].dt.day_name()
ts.head(1)

Unnamed: 0,date,id,item,sales,income,event,weekday
0,2011-01-29,ACCESORIES_1_108_NYC_1,ACCESORIES_1_108,4,64.4784,0,Saturday


### Create **store_code** columns to filter by store

In [17]:
ts['store_code']=ts['id'].apply(lambda x:x[-5:])
ts.head(2)

Unnamed: 0,date,id,item,sales,income,event,weekday,store_code
0,2011-01-29,ACCESORIES_1_108_NYC_1,ACCESORIES_1_108,4,64.4784,0,Saturday,NYC_1
1,2011-01-29,HOME_&_GARDEN_1_027_NYC_1,HOME_&_GARDEN_1_027,4,26.2,0,Saturday,NYC_1


### Run model in top 100 products

In [18]:
# List to save results
resultados = []
store = 'BOS_2'

# Copy of original DataFrame
df_original = ts.copy()

# Select products by item
productos = list(df_original['item'].unique())

for producto in productos:
    # Filter by store_code
    df_filtrado = df_original[(df_original['store_code'] == store) & (df_original['item'] == producto)]
    
    # Verify if there are enough products
    if len(df_filtrado) < 30 + 14:  # Need at least 44 products
        print(f"Warning: There is not enough data for the product {producto}. Skip")
        continue
    
    # Prepare data
    ts_predict = df_filtrado[['date', 'sales', 'weekday', 'event']]
    ts_predict = pd.get_dummies(data=ts_predict, columns=['weekday'], dtype=int)
    ts_predict['date'] = pd.to_datetime(ts_predict['date'])
    ts_predict.sort_values('date', ascending=True, inplace=True)
    ts_predict.set_index('date', inplace=True)
    ts_predict = ts_predict.asfreq('D')

    # Separate test and train dataset, and  also include exogenous variables
    y, exog = ts_predict['sales'], ts_predict.drop(columns=['sales'])
    y_train, y_test = y[:-30], y[-30:]
    exog_train, exog_test = exog[:-30], exog[-30:]

    # Initializing the model
    forecaster = ForecasterAutoreg(
        regressor=RandomForestRegressor(max_depth=None,
                                        min_samples_leaf=2,
                                        min_samples_split=5,
                                        n_estimators=100),
        lags=14
    )
    
    # Fit the model
    forecaster.fit(y=y_train, exog=exog_train)

    # Prepare for prediction(next mounth)
    start_date = '2016-04-25' 
    end_date ='2016-05-30'
    # Create range date
    date_range = pd.date_range(start=start_date, end=end_date)
    # Crate DataFrame
    df1 = pd.DataFrame(date_range, columns=['date'])
    df1['date'] = pd.to_datetime(df1['date'])
    df1['weekday'] = df1['date'].dt.day_name()
    df1['event'] = 0
    df1 = pd.get_dummies(data=df1, columns=['weekday'], dtype=int)
    df1.sort_values('date', inplace=True, ascending=True)
    df1.set_index('date', inplace=True)
    df1 = df1.asfreq('D')
    exog_test = pd.concat([exog_test, df1], ignore_index=False)
    
    # Predictions
    predicciones = forecaster.predict(steps=60, exog=exog_test)
    
    # Store results
    resultados.append({
        'producto': producto,
        'prediccion': predicciones.values,
        'test': y_test.values
    })
    
    #print(f"Predicction generates to the product: {producto}")




In [19]:
tiendas_inventario = pd.DataFrame(resultados)

In [20]:
tiendas_inventario

Unnamed: 0,producto,prediccion,test
0,ACCESORIES_1_108,"[0.3843611111111111, 0.4597142857142857, 0.716...","[1, 1, 0, 0, 0, 2, 3, 1, 3, 1, 0, 0, 0, 4, 0, ..."
1,HOME_&_GARDEN_1_027,"[1.534845238095238, 1.8993266178266177, 2.1580...","[1, 1, 1, 1, 2, 3, 0, 1, 1, 1, 2, 0, 3, 2, 3, ..."
2,HOME_&_GARDEN_1_053,"[5.434753968253968, 4.493805555555556, 3.01996...","[9, 5, 5, 1, 2, 0, 8, 7, 6, 5, 3, 2, 7, 4, 1, ..."
3,HOME_&_GARDEN_1_140,"[2.3733091630591634, 2.6114087301587303, 1.712...","[4, 4, 0, 3, 1, 1, 3, 3, 1, 0, 2, 1, 1, 2, 2, ..."
4,HOME_&_GARDEN_1_177,"[3.040999999999999, 3.1001183261183267, 2.5211...","[6, 6, 0, 2, 2, 1, 1, 3, 5, 2, 4, 1, 1, 0, 0, ..."
...,...,...,...
95,SUPERMARKET_3_499,"[9.029895382395383, 15.510086940836938, 13.171...","[9, 14, 10, 10, 21, 16, 9, 11, 6, 0, 0, 10, 12..."
96,ACCESORIES_1_158,"[9.503571428571423, 11.71071428571429, 7.61305...","[16, 9, 8, 6, 4, 11, 4, 7, 10, 11, 9, 6, 3, 9,..."
97,SUPERMARKET_3_282,"[23.09711111111111, 25.612444444444442, 23.354...","[14, 33, 22, 12, 11, 20, 19, 31, 52, 36, 35, 2..."
98,ACCESORIES_1_354,"[11.340029581529587, 14.423282828282828, 11.29...","[14, 7, 9, 7, 8, 9, 12, 5, 11, 15, 13, 12, 8, ..."


In [21]:
tiendas_inventario['test_suma'] = tiendas_inventario['test'].apply(sum)
suma = tiendas_inventario['test_suma'].sum()
tiendas_inventario['prediccion'] = tiendas_inventario['prediccion'].apply(lambda x: np.round(x, decimals=0))
tiendas_inventario['tiendas_inventario_test'] = tiendas_inventario['prediccion'].apply(lambda x: sum(x[:30]))
tiendas_inventario['tiendas_inventario_prediction'] = tiendas_inventario['prediccion'].apply(lambda x: sum(x[30:]))
tiendas_inventario[['producto','test_suma','tiendas_inventario_test','tiendas_inventario_prediction']]

Unnamed: 0,producto,test_suma,tiendas_inventario_test,tiendas_inventario_prediction
0,ACCESORIES_1_108,27,18.0,21.0
1,HOME_&_GARDEN_1_027,43,47.0,42.0
2,HOME_&_GARDEN_1_053,115,96.0,78.0
3,HOME_&_GARDEN_1_140,64,59.0,57.0
4,HOME_&_GARDEN_1_177,34,64.0,59.0
...,...,...,...,...
95,SUPERMARKET_3_499,388,375.0,382.0
96,ACCESORIES_1_158,229,255.0,248.0
97,SUPERMARKET_3_282,758,503.0,410.0
98,ACCESORIES_1_354,298,337.0,348.0


In [22]:
tiendas_inventario['tiendas_inventario_prediction']

0      21.0
1      42.0
2      78.0
3      57.0
4      59.0
      ...  
95    382.0
96    248.0
97    410.0
98    348.0
99    183.0
Name: tiendas_inventario_prediction, Length: 100, dtype: float64

In [23]:
total_inventario_productos_real = tiendas_inventario['test_suma'].sum()
total_inventario_productos_predicho = tiendas_inventario['tiendas_inventario_test'].sum()
print(total_inventario_productos_real)
print(total_inventario_productos_predicho)
print(total_inventario_productos_real - total_inventario_productos_predicho)

27640
29339.0
-1699.0


In [24]:
file_name = f"{store}_units.xlsx"
tiendas_inventario.to_excel(file_name)