In [7]:
import pandas as pd
import glob

def load_salescate_data(data_folder):
    # Get the path to all files containing 'salescate' and ending with '.xlsx'
    salescate_files = glob.glob(data_folder + "/salebycategory*.xlsx")
    
    # If no files are found, raise a FileNotFoundError
    if not salescate_files:
        raise FileNotFoundError(f"No 'salebycategory*.xlsx' files found in {data_folder}")
    
    # Load all the salescate data into a list of pandas dataframes
    salescate_dfs = [pd.read_excel(file) for file in salescate_files]
    
    # Concatenate all the dataframes into one
    salescate_df = pd.concat(salescate_dfs, ignore_index=True)
    
    return salescate_df

# Define the data folder
data_folder1 = r'C:\Users\PC\Dropbox\Projects\data_xls'
data_folder2 = r'C:\Users\trieu.pham\Dropbox\Projects\data_xls'
data_folder3 = r'/Users/trieupham/Dropbox/Projects/data_xls'

try:
    df = load_salescate_data(data_folder1)
except FileNotFoundError:
    try:
        df = load_salescate_data(data_folder2)
    except FileNotFoundError:
        try:
            df = load_salescate_data(data_folder3)
        except FileNotFoundError:
            print("Data folder not found")
            raise
print(df.head())

         barcode                             itemName  divisionCode  \
0  2010101000005                       Ly đá vừa x ly            25   
1  2501031190411                       Ly đá lớn x ly            25   
2  2501010000021                 HP Ly đá size L x ly            25   
3  8935049510864  DASANI Nước tinh khiết 510ml x chai             7   
4  2250103000058  TP. Bánh giò nhân thịt 150g x 1 cái            26   

  divisionName  categoryCode        categoryName  subCategoryCode  \
0  FF Beverage          2506  FF Beverage Onsite           250601   
1  FF Beverage          2506  FF Beverage Onsite           250601   
2  FF Beverage          2506  FF Beverage Onsite           250601   
3     Beverage           701       Bottled Water            70101   
4    FF Onsite          2604        Steamed Bun            260403   

       subCategoryName  itemQty  grossSales  itemDiscount  netSales  \
0           Cup Of Ice      681      462000       1581000    427810   
1           Cup 

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8738 entries, 0 to 8737
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   barcode          8738 non-null   int64  
 1   itemName         8738 non-null   object 
 2   divisionCode     8738 non-null   int64  
 3   divisionName     8738 non-null   object 
 4   categoryCode     8738 non-null   int64  
 5   categoryName     8738 non-null   object 
 6   subCategoryCode  8738 non-null   int64  
 7   subCategoryName  8738 non-null   object 
 8   itemQty          8738 non-null   int64  
 9   grossSales       8738 non-null   int64  
 10  itemDiscount     8738 non-null   int64  
 11  netSales         8738 non-null   int64  
 12  totalCostPrice   8738 non-null   float64
dtypes: float64(1), int64(8), object(4)
memory usage: 887.6+ KB


In [9]:
# print all the rows which have barcode = '2010101000005

df[df['barcode'] == 8936114080084]


Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
1557,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,1,33000,0,30556,21384.0
2633,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,5,171000,0,158332,106920.0
4633,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,3,99000,0,91668,64152.0
6533,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,2,66000,0,61112,42768.0
7992,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,4,132000,0,122224,85536.0


In [10]:
# List out all barcodes have more than one row in dataframe, print out the barcode and the number of rows for each barcode, and itemName of each barcode
df['barcode'].value_counts()[df['barcode'].value_counts() > 1]


barcode
2010101000005    5
8888826016175    5
8936135440027    5
8936079121648    5
8999178650235    5
                ..
8853301002714    2
8936195180024    2
5900020037190    2
8935217410309    2
2701020000213    2
Name: count, Length: 2060, dtype: int64

In [11]:
df_grouped = df.groupby(['barcode', 'itemName', 'divisionCode', 'divisionName', 'categoryCode', 'categoryName', 'subCategoryCode', 'subCategoryName']).agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_grouped

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,HH Cam Úc 1 trái,29,Fresh Grocery,2901,Fruit Cat,290101,Fruit Sub,7,280000,0,266665,192500.0
1,23133289,GPS Thạch hồng sâm Innerset 15g x gói,7,Beverage,708,Others Beverage,70801,Condition/Supplements,48,1392000,0,1288891,850910.4
2,45116568,Lotte Kem mochi Yukimi Daifuku 90ml x hộp,5,Ice Cream,501,Packaged Ice Cream/Novelties,50104,Other Ice Cream,151,5023000,0,4650961,3294518.0
3,50357680,FISHERMANS Kẹo cay con tau khuynh diep 25g x1 goi,9,Candy,901,Candy Category,90101,Hard Candy,3,66000,11000,61111,50490.0
4,50357680,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói,9,Candy,901,Candy Category,90101,Hard Candy,6,142000,4000,131480,106029.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3752,9556247536877,Beryl Almond Coated With Dark Chocolate 45g x ...,9,Candy,902,Chocolate,90202,Chocolate Pack,4,152000,0,140740,90240.0
3753,9556247536877,Beryl Almond Coated With Dark Chocolate 45g x Gói,9,Candy,902,Chocolate,90202,Chocolate Pack,3,114000,0,105555,73440.0
3754,18935012413328,THÙNG 24SABECO Lager Bia 330ml x Lon,6,Beer,602,Domestic Beer,60201,Domestic Beer Can,4,1280000,0,1163636,923344.0
3755,28935049502337,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon,7,Beverage,702,Carbonated Drinks - CSD,70202,No/Less Sugar CSD,1,194000,66000,179630,199560.0


In [12]:
top_20_barcodes = df_grouped.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
97,260202000017,Tteokbokki Signature x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,476,14280000,0,13222326,8509789.92
3331,8938502525368,Bánh bao nhân thịt heo trứng muối xá xíu,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,404,11716000,0,10848203,7378959.0
248,2250103000058,TP. Bánh giò nhân thịt 150g x 1 cái,26,FF Onsite,2604,Steamed Bun,260403,Others Steamed bun,754,10556000,0,9774102,5792745.24
204,2010805000363,Hotdog 25 Signature x cái,26,FF Onsite,2602,Other Cuisine,260201,Other Cuisine Sub,374,10456000,16000,9681510,7125885.58
374,2701010000179,GCCT Cơm nghêu mẹ nấu x hộp,27,FF Offsite,2701,Rice Offsite,270101,Bento,274,10412000,0,9640690,6302000.0
96,260202000017,Tteokbokki Signature x 1 phần,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,329,9870000,0,9138962,6065447.44
3328,8938502525337,BB 1 trứng cút 120g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,518,8772000,34000,8122310,4753271.6
3329,8938502525344,BB 2 trứng cút 150g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,455,8645000,0,8004778,5556378.1
2630,8935335400466,Bánh bao xá xíu phô mai 120g x 1 cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,334,8350000,0,7731434,5045988.5
298,2501031190510,Trà tắc ly lớn x 1 ly,25,FF Beverage,2506,FF Beverage Onsite,250603,Cold Local Drinks,457,8230000,910000,7620471,4642891.5


In [13]:
# Filter item Milo 2010103000010
df[df['barcode'] == 2602020019048]

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
30,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,103,3090000,0,2861134,1966167.0


In [14]:
df_summed = df.groupby('barcode').agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,7,280000,0,266665,192500.0
1,23133289,48,1392000,0,1288891,850910.4
2,45116568,151,5023000,0,4650961,3294518.0
3,50357680,9,208000,15000,192591,156519.0
4,50854011,34,790000,65000,731476,590733.0
...,...,...,...,...,...,...
2448,9556247504111,4,260000,0,240740,157440.0
2449,9556247536877,8,304000,0,281480,186240.0
2450,18935012413328,4,1280000,0,1163636,923344.0
2451,28935049502337,1,194000,66000,179630,199560.0


In [15]:
df_itemName = df.groupby('barcode')['itemName'].first()
df_summed['itemName'] = df_summed['barcode'].map(df_itemName)
df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
0,21202010,7,280000,0,266665,192500.0,HH Cam Úc 1 trái
1,23133289,48,1392000,0,1288891,850910.4,GPS Thạch hồng sâm Innerset 15g x gói
2,45116568,151,5023000,0,4650961,3294518.0,Lotte Kem mochi Yukimi Daifuku 90ml x hộp
3,50357680,9,208000,15000,192591,156519.0,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói
4,50854011,34,790000,65000,731476,590733.0,Fishermans Kẹo Cay Con Tàu Vị Bạc Hà 25g x Gói
...,...,...,...,...,...,...,...
2448,9556247504111,4,260000,0,240740,157440.0,#!Beryl Classic Tiramisu Almond White Chocolat...
2449,9556247536877,8,304000,0,281480,186240.0,Beryl Almond Coated With Dark Chocolate 45g x Gói
2450,18935012413328,4,1280000,0,1163636,923344.0,THÙNG 24SABECO Lager Bia 330ml x Lon
2451,28935049502337,1,194000,66000,179630,199560.0,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon


In [16]:

top_20_barcodes = df_summed.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
61,260202000017,805,24150000,0,22361288,14575237.36,Tteokbokki Signature x tô
124,2010103000010,595,14830000,45000,13731393,8719692.16,Milo ly lớn x 1ly
139,2010805000363,442,12360000,16000,11444478,8495448.37,Hotdog 25 Signature x cái
2188,8938502525368,404,11716000,0,10848203,7378959.0,Bánh bao nhân thịt heo trứng muối xá xíu
245,2701010000179,299,11362000,0,10520315,6877000.0,GCCT Cơm nghêu mẹ nấu x hộp
162,2250103000058,754,10556000,0,9774102,5792745.24,TP. Bánh giò nhân thịt 150g x 1 cái
189,2501031190510,561,10310000,910000,9546441,5700664.5,Trà tắc ly lớn x 1 ly
177,2270103000100,267,10146000,0,9394395,5958497.0,HĐ Gimbap dakgalbi phô mai x cái
2179,8938502004030,390,9750000,0,9027720,4875000.0,HTH Trà sữa olong lài 250ml x chai
187,2501031190374,840,9740000,340000,9018434,4502544.0,Nestea ly lớn (New) x 1 ly
