In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/BenchmarkingSample20230607.csv')
df

Unnamed: 0,Unit,Period,Days,Account,Amount
0,00040302CD88,1,30,Sales,89952
1,00040302CD88,2,31,Sales,90594
2,00040302CD88,3,30,Sales,81909
3,00040302CD88,4,31,Sales,87418
4,00040302CD88,5,31,Sales,91109
...,...,...,...,...,...
6043,0004030BEB57,20,30,Other Non-controllables,1455
6044,0004030BEB57,21,31,Other Non-controllables,7456
6045,0004030BEB57,22,31,Other Non-controllables,2037
6046,0004030BEB57,23,28,Other Non-controllables,2037


In [3]:
# Convert the 'Days' column to datetime format
dates = pd.date_range(start='4/1/2021', periods=df['Period'].max(), freq='M')
df['Date'] = dates.array[df['Period'] - 1]
df

Unnamed: 0,Unit,Period,Days,Account,Amount,Date
0,00040302CD88,1,30,Sales,89952,2021-04-30
1,00040302CD88,2,31,Sales,90594,2021-05-31
2,00040302CD88,3,30,Sales,81909,2021-06-30
3,00040302CD88,4,31,Sales,87418,2021-07-31
4,00040302CD88,5,31,Sales,91109,2021-08-31
...,...,...,...,...,...,...
6043,0004030BEB57,20,30,Other Non-controllables,1455,2022-11-30
6044,0004030BEB57,21,31,Other Non-controllables,7456,2022-12-31
6045,0004030BEB57,22,31,Other Non-controllables,2037,2023-01-31
6046,0004030BEB57,23,28,Other Non-controllables,2037,2023-02-28


In [4]:
# reshape df
unmelt = df.pivot_table(index=['Unit', 'Date', 'Days', 'Period'], columns='Account', values='Amount', aggfunc='sum')
unmelt = unmelt.reset_index()
unmelt

Account,Unit,Date,Days,Period,Advertising,Cost of Sales - Food,Cost of Sales - Paper,Crew Labor,Maintenance & Repair,Management Labor,Other Controllables,Other Labor-related Costs,Other Non-controllables,Outside Services,Promotion,Rent,Sales,Utilities
0,00040302CD88,2021-04-30,30,1,3778,20557,2190,486,5135,3983,3167,3429,7879,8462,401,-610,89952,5690
1,00040302CD88,2021-05-31,31,2,3805,20876,2058,1376,1553,4255,3899,3699,7918,5329,445,-178,90594,4917
2,00040302CD88,2021-06-30,30,3,3440,19531,2291,178,2321,4241,3646,3614,7471,5062,553,-527,81909,4692
3,00040302CD88,2021-07-31,31,4,3672,20999,2056,36597,3412,5515,2581,6944,7537,5967,874,-353,87418,5699
4,00040302CD88,2021-08-31,31,5,3827,21357,2312,29942,3345,5484,3772,7109,6385,5727,635,-167,91109,5109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,0004030BEB57,2022-11-30,30,20,15729,99236,10557,75243,8789,3997,2729,15678,26672,11476,-3,64375,374497,6230
212,0004030BEB57,2022-12-31,31,21,12227,108507,13137,94509,1704,4989,6187,15246,39347,16172,-1144,81337,424685,6480
213,0004030BEB57,2023-01-31,31,22,15255,101713,10812,82894,3256,4278,4408,16168,28140,11189,2665,58308,363205,8236
214,0004030BEB57,2023-02-28,28,23,15020,98547,11558,83338,3605,5253,3443,16454,27999,10646,608,56786,357611,2656


In [5]:
columns_to_average = unmelt.columns.drop(['Unit', 'Date', 'Days', 'Sales'])
for col in columns_to_average:
    unmelt[f'Average daily {col}'] = (unmelt[col] / unmelt['Days']).round(2)
old_columns = unmelt.columns
old_columns

Index(['Unit', 'Date', 'Days', 'Period', 'Advertising', 'Cost of Sales - Food',
       'Cost of Sales - Paper', 'Crew Labor', 'Maintenance & Repair',
       'Management Labor', 'Other Controllables', 'Other Labor-related Costs',
       'Other Non-controllables', 'Outside Services', 'Promotion', 'Rent',
       'Sales', 'Utilities', 'Average daily Period',
       'Average daily Advertising', 'Average daily Cost of Sales - Food',
       'Average daily Cost of Sales - Paper', 'Average daily Crew Labor',
       'Average daily Maintenance & Repair', 'Average daily Management Labor',
       'Average daily Other Controllables',
       'Average daily Other Labor-related Costs',
       'Average daily Other Non-controllables',
       'Average daily Outside Services', 'Average daily Promotion',
       'Average daily Rent', 'Average daily Utilities'],
      dtype='object', name='Account')

In [6]:
# Add columns to the dataframe as per the Example P & L excel file
unmelt['Total Cost of Sales'] = unmelt[['Cost of Sales - Food', 'Cost of Sales - Paper']].sum(axis=1)
unmelt['Gross Profit'] = unmelt['Sales'] - unmelt['Total Cost of Sales']
unmelt['Total Labor'] = unmelt[['Crew Labor', 'Management Labor', 'Other Labor-related Costs']].sum(axis=1)
unmelt['Total Prime Costs'] = unmelt['Total Cost of Sales'] + unmelt['Total Labor']
unmelt['Total Controllable Costs'] = unmelt[['Crew Labor','Management Labor','Other Labor-related Costs','Advertising','Promotion','Outside Services','Maintenance & Repair','Other Controllables','Utilities']].sum(axis = 1)
unmelt['Controllable Profit'] = unmelt['Gross Profit'] - unmelt['Total Controllable Costs']
unmelt['Total Non-controllable Costs'] = unmelt['Cost of Sales - Food'] + unmelt['Cost of Sales - Paper']
unmelt['Store EBITDA'] = unmelt['Gross Profit'] - unmelt['Total Controllable Costs'] - unmelt['Total Non-controllable Costs']
new_columns = unmelt.columns.drop(old_columns)
new_columns

Index(['Total Cost of Sales', 'Gross Profit', 'Total Labor',
       'Total Prime Costs', 'Total Controllable Costs', 'Controllable Profit',
       'Total Non-controllable Costs', 'Store EBITDA'],
      dtype='object', name='Account')

In [7]:
unmelt[pd.Index(['Unit']).append(new_columns)]
unmelt

Account,Unit,Date,Days,Period,Advertising,Cost of Sales - Food,Cost of Sales - Paper,Crew Labor,Maintenance & Repair,Management Labor,...,Average daily Rent,Average daily Utilities,Total Cost of Sales,Gross Profit,Total Labor,Total Prime Costs,Total Controllable Costs,Controllable Profit,Total Non-controllable Costs,Store EBITDA
0,00040302CD88,2021-04-30,30,1,3778,20557,2190,486,5135,3983,...,-20.33,189.67,22747,67205,7898,30645,34531,32674,22747,9927
1,00040302CD88,2021-05-31,31,2,3805,20876,2058,1376,1553,4255,...,-5.74,158.61,22934,67660,9330,32264,29278,38382,22934,15448
2,00040302CD88,2021-06-30,30,3,3440,19531,2291,178,2321,4241,...,-17.57,156.40,21822,60087,8033,29855,27747,32340,21822,10518
3,00040302CD88,2021-07-31,31,4,3672,20999,2056,36597,3412,5515,...,-11.39,183.84,23055,64363,49056,72111,71261,-6898,23055,-29953
4,00040302CD88,2021-08-31,31,5,3827,21357,2312,29942,3345,5484,...,-5.39,164.81,23669,67440,42535,66204,64950,2490,23669,-21179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,0004030BEB57,2022-11-30,30,20,15729,99236,10557,75243,8789,3997,...,2145.83,207.67,109793,264704,94918,204711,139868,124836,109793,15043
212,0004030BEB57,2022-12-31,31,21,12227,108507,13137,94509,1704,4989,...,2623.77,209.03,121644,303041,114744,236388,156370,146671,121644,25027
213,0004030BEB57,2023-01-31,31,22,15255,101713,10812,82894,3256,4278,...,1880.90,265.68,112525,250680,103340,215865,148349,102331,112525,-10194
214,0004030BEB57,2023-02-28,28,23,15020,98547,11558,83338,3605,5253,...,2028.07,94.86,110105,247506,105045,215150,141023,106483,110105,-3622


In [8]:
unmelt['Adjusted daily Rent'] = unmelt['Average daily Rent']
unmelt.loc[unmelt['Adjusted daily Rent'] < 0, 'Adjusted daily Rent'] = np.nan

# Calculate the average of positive values within each group
unmelt['Adjusted daily Rent'] = unmelt.groupby('Unit')['Adjusted daily Rent'].transform(lambda x: x[x > 0].mean())
averaged_columns = [column for column in unmelt.columns if (('Average daily' in column or 'Adjusted daily' in column) and (column not in ['Average daily Rent', 'Average daily Advertising', 'Average daily Period']))]
unmelt.loc[:, averaged_columns]

Account,Average daily Cost of Sales - Food,Average daily Cost of Sales - Paper,Average daily Crew Labor,Average daily Maintenance & Repair,Average daily Management Labor,Average daily Other Controllables,Average daily Other Labor-related Costs,Average daily Other Non-controllables,Average daily Outside Services,Average daily Promotion,Average daily Utilities,Adjusted daily Rent
0,685.23,73.00,16.20,171.17,132.77,105.57,114.30,262.63,282.07,13.37,189.67,15.003333
1,673.42,66.39,44.39,50.10,137.26,125.77,119.32,255.42,171.90,14.35,158.61,15.003333
2,651.03,76.37,5.93,77.37,141.37,121.53,120.47,249.03,168.73,18.43,156.40,15.003333
3,677.39,66.32,1180.55,110.06,177.90,83.26,224.00,243.13,192.48,28.19,183.84,15.003333
4,688.94,74.58,965.87,107.90,176.90,121.68,229.32,205.97,184.74,20.48,164.81,15.003333
...,...,...,...,...,...,...,...,...,...,...,...,...
211,3307.87,351.90,2508.10,292.97,133.23,90.97,522.60,889.07,382.53,-0.10,207.67,2175.308333
212,3500.23,423.77,3048.68,54.97,160.94,199.58,491.81,1269.26,521.68,-36.90,209.03,2175.308333
213,3281.06,348.77,2674.00,105.03,138.00,142.19,521.55,907.74,360.94,85.97,265.68,2175.308333
214,3519.54,412.79,2976.36,128.75,187.61,122.96,587.64,999.96,380.21,21.71,94.86,2175.308333


In [9]:
unmelt['Average Daily Cost'] = unmelt.loc[:, averaged_columns].sum(axis=1)
unmelt = unmelt.drop('Average daily Rent', axis=1)
unmelt

Account,Unit,Date,Days,Period,Advertising,Cost of Sales - Food,Cost of Sales - Paper,Crew Labor,Maintenance & Repair,Management Labor,...,Total Cost of Sales,Gross Profit,Total Labor,Total Prime Costs,Total Controllable Costs,Controllable Profit,Total Non-controllable Costs,Store EBITDA,Adjusted daily Rent,Average Daily Cost
0,00040302CD88,2021-04-30,30,1,3778,20557,2190,486,5135,3983,...,22747,67205,7898,30645,34531,32674,22747,9927,15.003333,2060.983333
1,00040302CD88,2021-05-31,31,2,3805,20876,2058,1376,1553,4255,...,22934,67660,9330,32264,29278,38382,22934,15448,15.003333,1831.933333
2,00040302CD88,2021-06-30,30,3,3440,19531,2291,178,2321,4241,...,21822,60087,8033,29855,27747,32340,21822,10518,15.003333,1801.663333
3,00040302CD88,2021-07-31,31,4,3672,20999,2056,36597,3412,5515,...,23055,64363,49056,72111,71261,-6898,23055,-29953,15.003333,3182.123333
4,00040302CD88,2021-08-31,31,5,3827,21357,2312,29942,3345,5484,...,23669,67440,42535,66204,64950,2490,23669,-21179,15.003333,2956.193333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,0004030BEB57,2022-11-30,30,20,15729,99236,10557,75243,8789,3997,...,109793,264704,94918,204711,139868,124836,109793,15043,2175.308333,10862.118333
212,0004030BEB57,2022-12-31,31,21,12227,108507,13137,94509,1704,4989,...,121644,303041,114744,236388,156370,146671,121644,25027,2175.308333,12018.358333
213,0004030BEB57,2023-01-31,31,22,15255,101713,10812,82894,3256,4278,...,112525,250680,103340,215865,148349,102331,112525,-10194,2175.308333,11006.238333
214,0004030BEB57,2023-02-28,28,23,15020,98547,11558,83338,3605,5253,...,110105,247506,105045,215150,141023,106483,110105,-3622,2175.308333,11607.698333


In [10]:
unmelt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 216 entries, 0 to 215
Data columns (total 41 columns):
 #   Column                                   Non-Null Count  Dtype         
---  ------                                   --------------  -----         
 0   Unit                                     216 non-null    object        
 1   Date                                     216 non-null    datetime64[ns]
 2   Days                                     216 non-null    int64         
 3   Period                                   216 non-null    int64         
 4   Advertising                              216 non-null    int64         
 5   Cost of Sales - Food                     216 non-null    int64         
 6   Cost of Sales - Paper                    216 non-null    int64         
 7   Crew Labor                               216 non-null    int64         
 8   Maintenance & Repair                     216 non-null    int64         
 9   Management Labor                         21

In [11]:
unmelt.to_csv("data/BenchmarkingSample20230607_with_features.csv", index=False)