# Final Notebook: Data Preprocessing, Modelling, and Inference

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import sys
import os

In [2]:
# Setting path to load util functions
from pathlib import Path
parent_dir = Path.cwd().parents[0]
sys.path.append(os.path.abspath(parent_dir))

### Loading Train and Test Data

In [3]:
train_df = pd.read_csv('../../data/training_data.csv')
test_df = pd.read_csv('../../data/test_data.csv')

In [4]:
train_df['date_id'] = pd.to_datetime(train_df['date_id'])
test_df['date_id'] = pd.to_datetime(test_df['date_id'])

In [5]:
#Instantiating preprocessor object using DataPreprocessPipeline class
from utils.DataPreprocessPipeline import DataPreprocessPipeline
preprocessor = DataPreprocessPipeline(num_lags=3, rolling_window_size=3)



In [6]:
#merging train_df and test_df to preprocess the data together (so time-series features would be created)
full_df = pd.concat([train_df, test_df])

In [7]:
full_df

Unnamed: 0,date_id,item_dept,item_qty,net_sales,store,item,invoice_num
0,2021-11-01,Grocery,1.0,160.0,XYZ,16620,1475459.0
1,2021-11-01,Grocery,2.0,480.0,XYZ,32365,1475459.0
2,2021-11-01,Grocery,1.0,127.0,XYZ,31349,1475459.0
3,2021-11-01,Household,2.0,110.0,XYZ,1266,1475475.0
4,2021-11-01,Household,1.0,150.0,XYZ,114920,1475475.0
...,...,...,...,...,...,...,...
247619,2022-02-28,Grocery,5.0,500.0,XYZ,114873,
247620,2022-02-28,Household,4.0,2361.0,XYZ,117497,
247621,2022-02-28,Grocery,2.0,480.0,XYZ,77298,
247622,2022-02-28,Beverages,1.0,1900.0,XYZ,1444,


In [12]:
#preprocessing the data
full_df_processed = preprocessor.transform(full_df)

In [9]:
np.polyfit(x = [1, 2, 3], y = [917, 1140, 837], deg=1)

array([ -40.        , 1044.66666667])

In [13]:
full_df_processed.head(20)

Unnamed: 0,date_id,item_dept,store,item_qty,net_sales,lag_item_qty_1,lag_item_qty_2,lag_item_qty_3,rolling_mean_item_qty_3,rolling_std_item_qty_3,cumsum_item_qty,cummean_item_qty,expanding_min_item_qty,expanding_max_item_qty,diff_item_qty,diff_item_qty_7,trend_item_qty,lag_net_sales_1,lag_net_sales_2,lag_net_sales_3,rolling_mean_net_sales_3,rolling_std_net_sales_3,cumsum_net_sales,cummean_net_sales,expanding_min_net_sales,expanding_max_net_sales,diff_net_sales,diff_net_sales_7,trend_net_sales,day_of_week,isWeekend,Is_Holiday,store_XYZ,item_dept_Grocery,item_dept_Household
48,2021-11-09,Beverages,ABC,815.0,199577.0,974.0,937.0,884.0,931.666667,45.236416,7936.0,992.0,837.0,1256.0,37.0,57.0,-8.809524,246984.682,218668.693,241963.0,235872.125,15108.702028,1950837.0,243854.610752,199145.59601,277904.838,28315.989,-7563.11701,-2629.784287,1,0,0,0.0,0.0,0.0
49,2021-11-09,Beverages,XYZ,971.0,263813.854,677.0,1043.0,1086.0,935.333333,224.75394,7172.0,896.5,677.0,1086.0,-366.0,-102.0,7.404762,201386.081,289993.0,262426.08,251268.387,45344.978867,1877084.0,234635.446128,187931.76902,289993.0,-88606.919,13454.31198,4394.378891,1,0,0,1.0,0.0,0.0
50,2021-11-09,Grocery,ABC,2489.046,497413.33899,2326.638,2646.82,2614.006,2529.154667,176.150333,20427.563,2553.445375,2155.128,3021.684,-320.182,-33.761,-8.663536,438841.43801,505512.74297,548660.9,497671.69366,55328.022725,3912755.0,489094.381872,397946.965024,582206.592985,-66671.30496,1554.81399,428.854628,1,0,0,0.0,1.0,0.0
51,2021-11-09,Grocery,XYZ,3608.112,622938.069,2879.414,3608.962,3625.864,3371.413333,426.167722,26569.025,3321.128125,2879.414,3625.864,-729.548,-107.646,2.449369,498037.791,661655.00001,622856.562004,594183.117671,85494.283894,4508435.0,563554.392877,491450.723,661655.00001,-163617.20901,6587.068,6958.502798,1,0,0,1.0,1.0,0.0
52,2021-11-09,Household,ABC,930.0,229917.0,883.0,998.0,1152.0,1011.0,134.970367,7997.0,999.625,819.0,1195.0,-115.0,-160.0,-12.392857,240294.89201,233781.880016,284326.0,252800.924009,27495.047618,2015710.0,251963.806386,209100.513996,292020.462024,6513.011994,-7041.374,-1100.918131,1,0,0,0.0,0.0,1.0
53,2021-11-09,Household,XYZ,1118.0,277746.993,944.0,1252.0,1315.0,1170.333333,198.525397,8850.0,1106.25,944.0,1315.0,-308.0,-180.0,-4.095238,237517.072,283841.0,305140.577002,275499.549667,34574.83843,2036196.0,254524.549999,222311.011,305140.577002,-46323.928,-9638.937988,2392.739691,1,0,0,1.0,0.0,1.0
54,2021-11-10,Beverages,ABC,924.0,238515.243,815.0,974.0,937.0,908.666667,83.200561,8751.0,972.333333,815.0,1256.0,-159.0,-325.0,-17.966667,199577.0,246984.682,218668.693,221743.458333,23852.939327,2150414.0,238934.876224,199145.59601,277904.838,-47407.682,-78327.838,-4792.689718,2,0,0,0.0,0.0,0.0
55,2021-11-10,Beverages,XYZ,934.0,259906.508,971.0,677.0,1043.0,897.0,193.89688,8143.0,904.777778,677.0,1086.0,294.0,49.0,10.15,263813.854,201386.081,289993.0,251730.978333,45522.449813,2140897.0,237877.491447,187931.76902,289993.0,62427.773,12483.724,5021.292415,2,0,0,1.0,0.0,0.0
56,2021-11-10,Grocery,ABC,2876.364,522461.826,2489.046,2326.638,2646.82,2487.501333,160.096589,22916.609,2546.289889,2155.128,3021.684,162.408,-532.638,-10.357767,497413.33899,438841.43801,505512.74297,480589.173323,36380.697341,4410168.0,490018.710441,397946.965024,582206.592985,58571.90098,-84793.253995,854.795381,2,0,0,0.0,1.0,0.0
57,2021-11-10,Grocery,XYZ,3666.634,635110.826,3608.112,2879.414,3608.962,3365.496,420.959575,30177.137,3353.015222,2879.414,3625.864,728.698,-15.604,20.846817,622938.069,498037.791,661655.00001,594210.28667,85507.963835,5131373.0,570152.579113,491450.723,661655.00001,124900.278,30742.579998,8829.8637,2,0,0,1.0,1.0,0.0


In [14]:
#creating train and test set from preprocessed data
from utils.model_helpers import create_training_testing
train_dict, test_dict = create_training_testing(full_df_processed, test_date_start='2022-02-01')

In [15]:
#Setting training and testing data
train_X = train_dict['train_features']
train_y_sales = train_dict['train_net_sales']
train_y_item_qty = train_dict['train_item_qty']

test_X = test_dict['train_features']
test_y_sales = test_dict['train_net_sales']
test_y_item_qty = test_dict['train_item_qty']

In [16]:
train_X

Unnamed: 0,lag_item_qty_1,lag_item_qty_2,lag_item_qty_3,rolling_mean_item_qty_3,rolling_std_item_qty_3,cumsum_item_qty,cummean_item_qty,expanding_min_item_qty,expanding_max_item_qty,diff_item_qty,diff_item_qty_7,trend_item_qty,lag_net_sales_1,lag_net_sales_2,lag_net_sales_3,rolling_mean_net_sales_3,rolling_std_net_sales_3,cumsum_net_sales,cummean_net_sales,expanding_min_net_sales,expanding_max_net_sales,diff_net_sales,diff_net_sales_7,trend_net_sales,day_of_week,isWeekend,Is_Holiday,store_XYZ,item_dept_Grocery,item_dept_Household
48,974.000,937.000,884.000,931.666667,45.236416,7936.000,992.000000,837.000,1256.000,37.000,57.000,-8.809524,246984.68200,218668.693000,241963.000000,235872.125000,15108.702028,1.950837e+06,243854.610752,199145.596010,2.779048e+05,28315.989000,-7563.11701,-2629.784287,1,0,0,0.0,0.0,0.0
49,677.000,1043.000,1086.000,935.333333,224.753940,7172.000,896.500000,677.000,1086.000,-366.000,-102.000,7.404762,201386.08100,289993.000000,262426.080000,251268.387000,45344.978867,1.877084e+06,234635.446128,187931.769020,2.899930e+05,-88606.919000,13454.31198,4394.378891,1,0,0,1.0,0.0,0.0
50,2326.638,2646.820,2614.006,2529.154667,176.150333,20427.563,2553.445375,2155.128,3021.684,-320.182,-33.761,-8.663536,438841.43801,505512.742970,548660.900000,497671.693660,55328.022725,3.912755e+06,489094.381872,397946.965024,5.822066e+05,-66671.304960,1554.81399,428.854628,1,0,0,0.0,1.0,0.0
51,2879.414,3608.962,3625.864,3371.413333,426.167722,26569.025,3321.128125,2879.414,3625.864,-729.548,-107.646,2.449369,498037.79100,661655.000010,622856.562004,594183.117671,85494.283894,4.508435e+06,563554.392877,491450.723000,6.616550e+05,-163617.209010,6587.06800,6958.502798,1,0,0,1.0,1.0,0.0
52,883.000,998.000,1152.000,1011.000000,134.970367,7997.000,999.625000,819.000,1195.000,-115.000,-160.000,-12.392857,240294.89201,233781.880016,284326.000000,252800.924009,27495.047618,2.015710e+06,251963.806386,209100.513996,2.920205e+05,6513.011994,-7041.37400,-1100.918131,1,0,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,1048.000,1255.000,1336.000,1213.000000,148.522726,92683.000,1018.494505,645.000,1761.000,-207.000,-230.000,0.967638,239877.76400,337437.000000,513057.000000,363457.254667,138435.956258,2.381819e+07,261738.336506,149946.090000,5.130570e+05,-97559.236000,-142750.23600,318.069279,0,0,0,1.0,0.0,0.0
548,2869.215,2587.436,2358.758,2605.136333,255.688411,236593.356,2599.926989,1779.437,7787.289,281.779,717.105,-1.440971,585570.03698,551861.601020,464186.114000,533872.584000,62659.538296,4.889516e+07,537309.461967,367440.183000,2.035445e+06,33708.435960,152000.05698,356.285934,0,0,0,0.0,1.0,0.0
549,4040.207,3539.002,3162.792,3580.667000,440.188880,306311.340,3366.058681,2396.285,5963.381,501.205,-144.120,-1.301238,756342.46200,667129.626000,583466.776000,668979.621333,86452.689747,5.545075e+07,609348.883327,443513.481000,1.161787e+06,89212.836000,-51907.11100,348.652326,0,0,0,1.0,1.0,0.0
550,1170.000,1236.000,1062.000,1156.000000,87.840765,94915.000,1043.021978,718.000,1606.000,-66.000,296.000,0.186909,323308.26797,349645.837880,290746.998990,321233.701613,29504.172160,2.512344e+07,276081.704621,181968.999000,4.518945e+05,-26337.569910,79452.26797,357.747163,0,0,0,0.0,0.0,1.0


In [17]:
test_X

Unnamed: 0,lag_item_qty_1,lag_item_qty_2,lag_item_qty_3,rolling_mean_item_qty_3,rolling_std_item_qty_3,cumsum_item_qty,cummean_item_qty,expanding_min_item_qty,expanding_max_item_qty,diff_item_qty,diff_item_qty_7,trend_item_qty,lag_net_sales_1,lag_net_sales_2,lag_net_sales_3,rolling_mean_net_sales_3,rolling_std_net_sales_3,cumsum_net_sales,cummean_net_sales,expanding_min_net_sales,expanding_max_net_sales,diff_net_sales,diff_net_sales_7,trend_net_sales,day_of_week,isWeekend,Is_Holiday,store_XYZ,item_dept_Grocery,item_dept_Household
552,887.000,923.000,966.000,925.333333,39.551654,102220.000,1111.086957,756.000,1748.000,-36.000,-105.000,1.492302,2.046880e+05,196153.950000,213541.730000,2.047945e+05,8694.380072,2.626751e+07,285516.456804,168349.26000,5.329314e+05,8534.001010,-104153.548990,339.156879,1,0,0,0.0,0.0,0.0
553,791.000,1048.000,1255.000,1031.333333,232.448561,93474.000,1016.021739,645.000,1761.000,-257.000,22.000,0.776891,1.791224e+05,239877.764000,337437.000000,2.521457e+05,79867.125785,2.399731e+07,260840.336816,149946.09000,5.130570e+05,-60755.398990,-1888.634990,249.873516,1,0,0,1.0,0.0,0.0
554,2603.874,2869.215,2587.436,2686.841667,158.153648,239197.230,2599.969891,1779.437,7787.289,-265.341,286.094,-1.391720,5.426439e+05,585570.036980,551861.601020,5.600252e+05,22597.472831,4.943780e+07,537367.445327,367440.18300,2.035445e+06,-42926.105980,64407.285000,348.533701,1,0,0,0.0,1.0,0.0
555,2996.909,4040.207,3539.002,3525.372667,521.782520,309308.249,3362.046185,2396.285,5963.381,-1043.298,434.602,-1.518133,5.630437e+05,756342.462000,667129.626000,6.621719e+05,96744.682254,5.601379e+07,608845.566486,443513.48100,1.161787e+06,-193298.728000,93324.735000,304.933422,1,0,0,1.0,1.0,0.0
556,1115.000,1170.000,1236.000,1173.666667,60.583276,96030.000,1043.804348,718.000,1606.000,-55.000,180.000,0.231355,3.148003e+05,323308.267970,349645.837880,3.292515e+05,18167.127914,2.543824e+07,276502.558593,181968.99900,4.518945e+05,-8507.997980,58967.269990,373.358801,1,0,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,1751.000,1126.000,1317.000,1398.000000,320.276443,130083.000,1093.134454,645.000,1763.000,625.000,353.000,3.319506,5.310318e+05,269641.753000,396324.003000,3.989992e+05,130715.547043,3.410990e+07,286637.847235,149946.09000,5.813210e+05,261390.028980,221723.017980,1113.022053,0,0,0,1.0,0.0,0.0
716,3752.998,3486.773,4293.282,3844.351000,410.941869,325810.103,2737.900025,1779.437,7787.289,266.225,1051.860,4.788838,7.867650e+05,725599.851278,867613.506013,7.933261e+05,71233.808433,6.772851e+07,569147.160637,367440.18300,2.035445e+06,61165.191660,218564.059951,1408.523359,0,0,0,0.0,1.0,0.0
717,7599.386,5254.694,4246.995,5700.358333,1720.056359,426823.943,3586.755824,2396.285,7599.386,2344.692,2713.212,8.340774,1.596598e+06,985413.796996,805993.474911,1.129335e+06,414486.318350,7.845215e+07,659261.805316,443513.48100,1.596598e+06,611184.257052,660945.272053,2168.671155,0,0,0,1.0,1.0,0.0
718,1642.000,1508.000,1750.000,1633.333333,121.232559,131131.000,1101.941176,718.000,1750.000,134.000,532.000,2.419627,4.441275e+05,390277.067990,453002.517000,4.291357e+05,33943.864780,3.497245e+07,293886.126171,181968.99900,4.530025e+05,53850.418996,141382.266986,857.617268,0,0,0,0.0,0.0,1.0


### Modelling

In [18]:
from utils.SalesItemQtyModel import SalesItemQtyModel

In [19]:
#Need to define 2 models to predict sales and item_qty
from catboost import CatBoostRegressor

#params chosen from hp tuning
params = {'bagging_temperature': 0.5447824858171504,'border_count': 220.0,'depth': 5.0,'iterations': 800.0,
          'l2_leaf_reg': 13.06135266464145,'learning_rate': 0.15538685598864,'one_hot_max_size': 34.0,
          'random_strength': 4.115171860221108}

sales_model = CatBoostRegressor(**params, verbose = 0)
item_qty_model = CatBoostRegressor(**params, verbose = 0)

In [20]:
#Defining the model to train and predict both sales and item qty
sales_item_qty_model = SalesItemQtyModel(
    model_sales=sales_model,
    model_item_qty=item_qty_model
)

In [21]:
#Fitting the model
sales_item_qty_model.fit(X = train_X, y_sales = train_y_sales, y_item_qty = train_y_item_qty)

In [22]:
pd.DataFrame({'feature': train_X.columns,
'importance_sales': sales_item_qty_model.model_sales.feature_importances_}).sort_values(by='importance_sales', ascending=False)

Unnamed: 0,feature,importance_sales
1,lag_item_qty_2,11.94223
13,lag_net_sales_2,7.391612
7,expanding_min_item_qty,6.652791
24,day_of_week,6.496555
15,rolling_mean_net_sales_3,5.25275
12,lag_net_sales_1,4.711818
0,lag_item_qty_1,4.520617
10,diff_item_qty_7,3.926765
9,diff_item_qty,3.892241
26,Is_Holiday,3.824454


In [23]:
pd.DataFrame({'feature': train_X.columns,
              'importance_item_qty': sales_item_qty_model.model_item_qty.feature_importances_}).sort_values(by='importance_item_qty', ascending=False)

Unnamed: 0,feature,importance_item_qty
1,lag_item_qty_2,18.731403
7,expanding_min_item_qty,11.981378
15,rolling_mean_net_sales_3,7.550455
13,lag_net_sales_2,6.155495
0,lag_item_qty_1,5.175402
28,item_dept_Grocery,4.903153
24,day_of_week,4.687941
12,lag_net_sales_1,4.056845
18,cummean_net_sales,4.025548
8,expanding_max_item_qty,3.711336


In [24]:
from pickle import dump
with open("../models/final_model.pkl", "wb") as f:
    dump(sales_item_qty_model, f, protocol=5)

In [25]:
with open("../models/preprocessor.pkl", "wb") as f:
    dump(preprocessor, f, protocol=5)

### Model Testing

In [26]:
from utils.model_helpers import get_results_fitted
get_results_fitted(train_dict=train_dict, valid_dict=test_dict, sales_item_qty_model=sales_item_qty_model)

Train Set Results...

MAPE for predicting Sales: 0.01939328030728429
MAPE for predicting Item Qty: 0.019478182536633637

Test Set Results...

MAPE for predicting Sales: 0.20380453078401514
MAPE for predicting Item Qty: 0.17008697986474247



### Production Level Forecasting

In [27]:
#This is to get the predictions in a real scenario (by forecasting using only the training set)

In [28]:
train_df_gb = preprocessor._groupby_df(train_df)
test_df_gb = preprocessor._groupby_df(test_df)
depts_list = train_df_gb['item_dept'].unique()
stores_list = train_df_gb['store'].unique()

In [29]:
from utils.model_helpers import generate_forecasting_df
feb_forecasted_df = generate_forecasting_df(start_date='2022-02-01', end_date='2022-02-28', depts_list=depts_list,
                                            stores_list=stores_list,historical_df=train_df_gb, dual_model=sales_item_qty_model,
                                            preprocessor=preprocessor)

In [30]:
feb_forecasted_df

Unnamed: 0,date_id,item_dept,store,net_sales,item_qty
0,2022-02-01,Beverages,ABC,305927.739037,992.540653
1,2022-02-01,Beverages,XYZ,219013.056069,871.673416
2,2022-02-01,Grocery,ABC,523650.074729,2520.072105
3,2022-02-01,Grocery,XYZ,611353.255042,3230.739556
4,2022-02-01,Household,ABC,288693.799355,1139.701694
...,...,...,...,...,...
163,2022-02-28,Beverages,XYZ,277494.609264,1017.237133
164,2022-02-28,Grocery,ABC,545777.978506,2739.979835
165,2022-02-28,Grocery,XYZ,606018.699498,2970.728568
166,2022-02-28,Household,ABC,262693.918395,1272.948841


In [31]:
from utils.model_helpers import store_level_results
store_level_results(test_df=test_df_gb, pred_df=feb_forecasted_df)

Overall MAPE score for Sales: 0.18596392751077492
Overall MAPE score for Item Qty: 0.16948743082093515

For Store ABC, MAPE for predicting sales each day: 0.16954004643360163
For Store ABC, MAPE for predicting item qty each day: 0.14077969045410008

For Store ABC and Department Beverages, MAPE for predicting sales each day: 0.2143146315711926
For Store ABC and Department Beverages, MAPE for predicting item qty each day: 0.14978673454726182

For Store ABC and Department Grocery, MAPE for predicting sales each day: 0.15674847900671854
For Store ABC and Department Grocery, MAPE for predicting item qty each day: 0.15666679768873332

For Store ABC and Department Household, MAPE for predicting sales each day: 0.1466203806951752
For Store ABC and Department Household, MAPE for predicting item qty each day: 0.11367552841832972

For Store XYZ, MAPE for predicting sales each day: 0.20238780858794822
For Store XYZ, MAPE for predicting item qty each day: 0.19819517118777016

For Store XYZ and Depa

In [35]:
test_y_sales.shape, test_y_item_qty.shape

((168,), (168,))

In [36]:
from sklearn.metrics import mean_absolute_percentage_error
mean_absolute_percentage_error(test_y_sales, feb_forecasted_df['net_sales'])

0.18689088893084088

In [37]:
mean_absolute_percentage_error(test_y_item_qty, feb_forecasted_df['item_qty'])

0.16501176697587275

##### GroupBy Store: Monitoring Store Performance

In [29]:
feb_forecasted_df_stores = feb_forecasted_df.groupby(['date_id', 'store'])[['net_sales', 'item_qty']].sum().reset_index()

In [30]:
feb_forecasted_df.head(10)

Unnamed: 0,date_id,item_dept,store,net_sales,item_qty
0,2022-02-01,Beverages,ABC,256286.176582,1007.133387
1,2022-02-01,Beverages,XYZ,222458.745834,898.002637
2,2022-02-01,Grocery,ABC,697563.119719,3086.222626
3,2022-02-01,Grocery,XYZ,683009.84647,3512.437157
4,2022-02-01,Household,ABC,274217.83898,1202.294195
5,2022-02-01,Household,XYZ,247067.375887,1160.606818
6,2022-02-02,Beverages,ABC,282756.963205,1101.433435
7,2022-02-02,Beverages,XYZ,271272.579183,1147.224903
8,2022-02-02,Grocery,ABC,601553.938929,2936.461767
9,2022-02-02,Grocery,XYZ,595255.98721,3270.000402


In [31]:
feb_forecasted_df_stores.head(10)

Unnamed: 0,date_id,store,net_sales,item_qty
0,2022-02-01,ABC,1228067.0,5295.650208
1,2022-02-01,XYZ,1152536.0,5571.046611
2,2022-02-02,ABC,1213581.0,5306.568708
3,2022-02-02,XYZ,1095721.0,5377.643121
4,2022-02-03,ABC,1480473.0,6131.762084
5,2022-02-03,XYZ,1288443.0,6050.982359
6,2022-02-04,ABC,1206545.0,5660.627848
7,2022-02-04,XYZ,1206220.0,5957.66981
8,2022-02-05,ABC,1248368.0,5804.939319
9,2022-02-05,XYZ,1342932.0,6639.093907


In [32]:
test_df_stores = test_df.groupby(['date_id', 'store'])[['net_sales', 'item_qty']].sum().reset_index()
test_df_stores

Unnamed: 0,date_id,store,net_sales,item_qty
0,2022-02-01,ABC,1516543.0,6517.128
1,2022-02-01,XYZ,1389440.0,6623.394
2,2022-02-02,ABC,1444364.0,6340.287
3,2022-02-02,XYZ,1557627.0,7188.348
4,2022-02-03,ABC,1756087.0,7445.199
5,2022-02-03,XYZ,1642178.0,7523.119
6,2022-02-04,ABC,1388147.0,5023.904
7,2022-02-04,XYZ,1802251.0,7743.322
8,2022-02-05,ABC,1536005.0,5849.92
9,2022-02-05,XYZ,1705150.0,7182.791


In [33]:
feb_forecasted_df_stores

Unnamed: 0,date_id,store,net_sales,item_qty
0,2022-02-01,ABC,1228067.0,5295.650208
1,2022-02-01,XYZ,1152536.0,5571.046611
2,2022-02-02,ABC,1213581.0,5306.568708
3,2022-02-02,XYZ,1095721.0,5377.643121
4,2022-02-03,ABC,1480473.0,6131.762084
5,2022-02-03,XYZ,1288443.0,6050.982359
6,2022-02-04,ABC,1206545.0,5660.627848
7,2022-02-04,XYZ,1206220.0,5957.66981
8,2022-02-05,ABC,1248368.0,5804.939319
9,2022-02-05,XYZ,1342932.0,6639.093907


In [34]:
mean_absolute_percentage_error(test_df_stores['net_sales'], feb_forecasted_df_stores['net_sales'])

0.1752244618107001

In [35]:
mean_absolute_percentage_error(test_df_stores['item_qty'], feb_forecasted_df_stores['item_qty'])

0.12352416383621234

In [36]:
test_df_stores[test_df_stores['store'] == 'ABC']

Unnamed: 0,date_id,store,net_sales,item_qty
0,2022-02-01,ABC,1516543.0,6517.128
2,2022-02-02,ABC,1444364.0,6340.287
4,2022-02-03,ABC,1756087.0,7445.199
6,2022-02-04,ABC,1388147.0,5023.904
8,2022-02-05,ABC,1536005.0,5849.92
10,2022-02-06,ABC,1171293.0,5273.912
12,2022-02-07,ABC,1257475.0,5819.82
14,2022-02-08,ABC,1367424.0,5876.401
16,2022-02-09,ABC,1319227.0,5564.455
18,2022-02-10,ABC,1568063.0,6605.404


In [37]:
feb_forecasted_df_stores[feb_forecasted_df_stores['store'] == 'ABC']

Unnamed: 0,date_id,store,net_sales,item_qty
0,2022-02-01,ABC,1228067.0,5295.650208
2,2022-02-02,ABC,1213581.0,5306.568708
4,2022-02-03,ABC,1480473.0,6131.762084
6,2022-02-04,ABC,1206545.0,5660.627848
8,2022-02-05,ABC,1248368.0,5804.939319
10,2022-02-06,ABC,1178523.0,5652.744628
12,2022-02-07,ABC,1248778.0,5468.659786
14,2022-02-08,ABC,1171436.0,5507.452542
16,2022-02-09,ABC,1305258.0,5761.024433
18,2022-02-10,ABC,1234707.0,6001.671214
