In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import sys
import os
import pickle

In [2]:
# Setting path to load util functions
from pathlib import Path
parent_dir = Path.cwd().parents[0]
sys.path.append(os.path.abspath(parent_dir))

In [3]:
train_path = '../../data/training_data.csv'
train_df = pd.read_csv(train_path)
train_df['date_id'] = pd.to_datetime(train_df['date_id'])

In [4]:
train_df.head()

Unnamed: 0,date_id,item_dept,item_qty,net_sales,store,item,invoice_num
0,2021-11-01,Grocery,1.0,160.0,XYZ,16620,1475459.0
1,2021-11-01,Grocery,2.0,480.0,XYZ,32365,1475459.0
2,2021-11-01,Grocery,1.0,127.0,XYZ,31349,1475459.0
3,2021-11-01,Household,2.0,110.0,XYZ,1266,1475475.0
4,2021-11-01,Household,1.0,150.0,XYZ,114920,1475475.0


In [5]:
train_df_gb = train_df.groupby(['date_id', 'store', 'item_dept'])[['item_qty', 'net_sales']].sum().reset_index()

In [6]:
train_df_gb.head(10)

Unnamed: 0,date_id,store,item_dept,item_qty,net_sales
0,2021-11-01,ABC,Beverages,917.0,254547.79901
1,2021-11-01,ABC,Grocery,2360.399,437286.62402
2,2021-11-01,ABC,Household,1043.0,247336.26601
3,2021-11-01,XYZ,Beverages,779.0,187931.76902
4,2021-11-01,XYZ,Grocery,2987.06,491450.723
5,2021-11-01,XYZ,Household,1124.0,247156.009988
6,2021-11-02,ABC,Beverages,1140.0,277904.838
7,2021-11-02,ABC,Grocery,3021.684,582206.592985
8,2021-11-02,ABC,Household,1195.0,292020.462024
9,2021-11-02,XYZ,Beverages,922.0,251330.13


In [7]:
loaded_preprocessor = pickle.load(open('../models/preprocessor.pkl', 'rb'))
trained_model = pickle.load(open('../models/final_model.pkl', 'rb'))

In [8]:
train_df_gb = loaded_preprocessor._groupby_df(train_df)

In [9]:
train_df_gb

Unnamed: 0,date_id,item_dept,store,item_qty,net_sales
0,2021-11-01,Beverages,ABC,917.000,254547.79901
1,2021-11-01,Beverages,XYZ,779.000,187931.76902
2,2021-11-01,Grocery,ABC,2360.399,437286.62402
3,2021-11-01,Grocery,XYZ,2987.060,491450.72300
4,2021-11-01,Household,ABC,1043.000,247336.26601
...,...,...,...,...,...
547,2022-01-31,Beverages,XYZ,791.000,179122.36501
548,2022-01-31,Grocery,ABC,2603.874,542643.93100
549,2022-01-31,Grocery,XYZ,2996.909,563043.73400
550,2022-01-31,Household,ABC,1115.000,314800.26999


In [10]:
train_df_processed = loaded_preprocessor.transform(train_df)
train_df_processed.head(20)

Unnamed: 0,date_id,item_dept,store,item_qty,net_sales,lag_item_qty_1,lag_item_qty_2,lag_item_qty_3,rolling_mean_item_qty_3,rolling_std_item_qty_3,cumsum_item_qty,cummean_item_qty,expanding_min_item_qty,expanding_max_item_qty,diff_item_qty,diff_item_qty_7,lag_net_sales_1,lag_net_sales_2,lag_net_sales_3,rolling_mean_net_sales_3,rolling_std_net_sales_3,cumsum_net_sales,cummean_net_sales,expanding_min_net_sales,expanding_max_net_sales,diff_net_sales,diff_net_sales_7,day_of_week,isWeekend,Is_Holiday,store_XYZ,item_dept_Grocery,item_dept_Household
42,2021-11-08,Beverages,ABC,974.0,246984.682,937.0,884.0,991.0,937.333333,53.500779,6962.0,994.571429,837.0,1256.0,53.0,57.0,218668.693,241963.0,255695.365,238775.686,18717.981669,1703852.0,243407.457717,199145.59601,277904.838,-23294.307,57.0,0,0,0,0.0,0.0,0.0
43,2021-11-08,Beverages,XYZ,677.0,201386.081,1043.0,1086.0,910.0,1013.0,91.755109,6495.0,927.857143,779.0,1086.0,-43.0,-102.0,289993.0,262426.08,217746.0,256721.693333,36459.734951,1675697.0,239385.355431,187931.76902,289993.0,27566.92,-102.0,0,0,0,1.0,0.0,0.0
44,2021-11-08,Grocery,ABC,2326.638,438841.43801,2646.82,2614.006,2654.582,2638.469333,21.538411,18100.925,2585.846429,2155.128,3021.684,32.814,-33.761,505512.74297,548660.9,479383.66397,511185.76898,34985.300857,3473914.0,496273.373853,397946.965024,582206.592985,-43148.15703,-33.761,0,0,0,0.0,1.0,0.0
45,2021-11-08,Grocery,XYZ,2879.414,498037.791,3608.962,3625.864,3287.828,3507.551333,190.473559,23689.611,3384.230143,2987.06,3625.864,-16.902,-107.646,661655.00001,622856.562004,557072.729,613861.430338,52868.205452,4010397.0,572913.907431,491450.723,661655.00001,38798.438006,-107.646,0,0,0,1.0,1.0,0.0
46,2021-11-08,Household,ABC,883.0,240294.89201,998.0,1152.0,986.0,1045.333333,92.570694,7114.0,1016.285714,819.0,1195.0,-154.0,-160.0,233781.880016,284326.0,265589.69202,261232.524012,25552.215188,1775416.0,253630.794154,209100.513996,292020.462024,-50544.119984,-160.0,0,0,0,0.0,0.0,1.0
47,2021-11-08,Household,XYZ,944.0,237517.072,1252.0,1315.0,963.0,1176.666667,187.702779,7906.0,1129.428571,948.0,1315.0,-63.0,-180.0,283841.0,305140.577002,222311.011,270430.862667,43012.301595,1798679.0,256954.189714,222311.011,305140.577002,-21299.577002,-180.0,0,0,0,1.0,0.0,1.0
48,2021-11-09,Beverages,ABC,815.0,199577.0,974.0,937.0,884.0,931.666667,45.236416,7936.0,992.0,837.0,1256.0,37.0,-325.0,246984.682,218668.693,241963.0,235872.125,15108.702028,1950837.0,243854.610752,199145.59601,277904.838,28315.989,-325.0,1,0,0,0.0,0.0,0.0
49,2021-11-09,Beverages,XYZ,971.0,263813.854,677.0,1043.0,1086.0,935.333333,224.75394,7172.0,896.5,677.0,1086.0,-366.0,49.0,201386.081,289993.0,262426.08,251268.387,45344.978867,1877084.0,234635.446128,187931.76902,289993.0,-88606.919,49.0,1,0,0,1.0,0.0,0.0
50,2021-11-09,Grocery,ABC,2489.046,497413.33899,2326.638,2646.82,2614.006,2529.154667,176.150333,20427.563,2553.445375,2155.128,3021.684,-320.182,-532.638,438841.43801,505512.74297,548660.9,497671.69366,55328.022725,3912755.0,489094.381872,397946.965024,582206.592985,-66671.30496,-532.638,1,0,0,0.0,1.0,0.0
51,2021-11-09,Grocery,XYZ,3608.112,622938.069,2879.414,3608.962,3625.864,3371.413333,426.167722,26569.025,3321.128125,2879.414,3625.864,-729.548,-15.604,498037.791,661655.00001,622856.562004,594183.117671,85494.283894,4508435.0,563554.392877,491450.723,661655.00001,-163617.20901,-15.604,1,0,0,1.0,1.0,0.0


In [11]:
from utils.model_helpers import generate_forecasting_df
generate_forecasting_df(start_date='2022-02-01', end_date='2022-02-28', depts_list=train_df['item_dept'].unique(),
                        stores_list=train_df['store'].unique(), historical_df=train_df_gb, preprocessor=loaded_preprocessor,
                        dual_model=trained_model)

Unnamed: 0,date_id,item_dept,store,net_sales,item_qty
0,2022-02-01,Grocery,XYZ,683009.846470,3512.437157
1,2022-02-01,Grocery,ABC,697563.119719,3086.222626
2,2022-02-01,Household,XYZ,247067.375887,1160.606818
3,2022-02-01,Household,ABC,274217.838980,1202.294195
4,2022-02-01,Beverages,XYZ,222458.745834,898.002637
...,...,...,...,...,...
163,2022-02-28,Grocery,ABC,588028.037395,2995.321694
164,2022-02-28,Household,XYZ,240592.911774,1212.149357
165,2022-02-28,Household,ABC,261885.076127,1159.244110
166,2022-02-28,Beverages,XYZ,216969.282702,948.449825
