In [1]:
import pandas as pd
import glob

def load_salescate_data(data_folder):
    # Get the path to all files containing 'salescate' and ending with '.xlsx'
    salescate_files = glob.glob(data_folder + "/salebycategory*.xlsx")
    
    # If no files are found, raise a FileNotFoundError
    if not salescate_files:
        raise FileNotFoundError(f"No 'salebycategory*.xlsx' files found in {data_folder}")
    
    # Load all the salescate data into a list of pandas dataframes
    salescate_dfs = [pd.read_excel(file) for file in salescate_files]
    
    # Concatenate all the dataframes into one
    salescate_df = pd.concat(salescate_dfs, ignore_index=True)
    
    return salescate_df

# Define the data folder
data_folder1 = r'C:\Users\PC\Dropbox\Projects\data_xls\salebycategory'
data_folder2 = r'C:\Users\trieu.pham\Dropbox\Projects\data_xls\salebycategory'
data_folder3 = r'/Users/trieupham/Dropbox/Projects/data_xls/salebycategory'

try:
    df = load_salescate_data(data_folder1)
except FileNotFoundError:
    try:
        df = load_salescate_data(data_folder2)
    except FileNotFoundError:
        try:
            df = load_salescate_data(data_folder3)
        except FileNotFoundError:
            print("Data folder not found")
            raise
print(df.head())

         barcode                               itemName  divisionCode  \
0  2010101000005                         Ly đá vừa x ly            25   
1  2501031190411                         Ly đá lớn x ly            25   
2  2501031190510                  Trà tắc ly lớn x 1 ly            25   
3  8934588873553           TEA+ Trà Oolong 450ml x chai             7   
4  8934588063053  AQUAFINA Nước tinh khiết 500ml x chai             7   

  divisionName  categoryCode        categoryName  subCategoryCode  \
0  FF Beverage          2506  FF Beverage Onsite           250601   
1  FF Beverage          2506  FF Beverage Onsite           250601   
2  FF Beverage          2506  FF Beverage Onsite           250603   
3     Beverage           703                 Tea            70301   
4     Beverage           701       Bottled Water            70101   

     subCategoryName  itemQty  grossSales  itemDiscount  netSales  \
0         Cup Of Ice      666     1029000        969000    952845   
1       

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12457 entries, 0 to 12456
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   barcode          12457 non-null  int64  
 1   itemName         12457 non-null  object 
 2   divisionCode     12457 non-null  int64  
 3   divisionName     12457 non-null  object 
 4   categoryCode     12457 non-null  int64  
 5   categoryName     12457 non-null  object 
 6   subCategoryCode  12457 non-null  int64  
 7   subCategoryName  12457 non-null  object 
 8   itemQty          12457 non-null  int64  
 9   grossSales       12457 non-null  int64  
 10  itemDiscount     12457 non-null  int64  
 11  netSales         12457 non-null  int64  
 12  totalCostPrice   12457 non-null  float64
dtypes: float64(1), int64(8), object(4)
memory usage: 1.2+ MB


In [3]:
# print all the rows which have barcode = '2010101000005

df[df['barcode'] == 8936114080084]


Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
2691,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,5,171000,0,158332,106920.0
5134,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,1,33000,0,30556,21384.0
6317,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,4,132000,0,122224,85536.0
8384,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,2,66000,0,61112,42768.0
10197,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,2,66000,0,61112,42768.0
11833,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,3,99000,0,91668,64152.0


In [4]:
# List out all barcodes have more than one row in dataframe, print out the barcode and the number of rows for each barcode, and itemName of each barcode
df['barcode'].value_counts()[df['barcode'].value_counts() > 1]


barcode
2010101000005    7
8934563669133    7
8936079123833    7
8801382132394    7
8803628631012    7
                ..
8936076454879    2
8938524577222    2
6920354836930    2
8859533603224    2
8996001600597    2
Name: count, Length: 2327, dtype: int64

In [5]:
df_grouped = df.groupby(['barcode', 'itemName', 'divisionCode', 'divisionName', 'categoryCode', 'categoryName', 'subCategoryCode', 'subCategoryName']).agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_grouped

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,HH Cam Úc 1 trái,29,Fresh Grocery,2901,Fruit Cat,290101,Fruit Sub,7,280000,0,266665,192500.0
1,23133289,GPS Thạch hồng sâm Innerset 15g x gói,7,Beverage,708,Others Beverage,70801,Condition/Supplements,49,1421000,0,1315743,868637.7
2,40677798,Jagermeister 20ml x Chai,14,Wine&Liquor,1401,Liquor,140102,Cordials/Brandy/Cognac,7,371000,0,337274,226100.0
3,45116568,Lotte Kem mochi Yukimi Daifuku 90ml x hộp,5,Ice Cream,501,Packaged Ice Cream/Novelties,50104,Other Ice Cream,192,6376000,0,5903751,4189056.0
4,50357680,FISHERMANS Kẹo cay con tau khuynh diep 25g x1 goi,9,Candy,901,Candy Category,90101,Hard Candy,3,66000,11000,61111,50490.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4101,18935005801132,THÙNG 24LAVIE Nước khoáng 500ml x Chai,7,Beverage,701,Bottled Water,70102,Mineral water,1,120000,0,111111,90450.0
4102,18935012413328,THÙNG 24SABECO Lager Bia 330ml x Lon,6,Beer,602,Domestic Beer,60201,Domestic Beer Can,4,1280000,0,1163636,923344.0
4103,28935049502337,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon,7,Beverage,702,Carbonated Drinks - CSD,70202,No/Less Sugar CSD,1,194000,66000,179630,199560.0
4104,48934588063068,THÙNG 12AQUAFINA Nước tinh khiết 1.5L x chai,7,Beverage,701,Bottled Water,70101,Purified water,1,140000,0,129630,104280.0


In [6]:
top_20_barcodes = df_grouped.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
236,2010805000363,Hotdog 25 Signature x cái,26,FF Onsite,2602,Other Cuisine,260201,Other Cuisine Sub,758,21208000,16000,19637094,14442302.86
335,2501031190510,Trà tắc ly lớn x 1 ly,25,FF Beverage,2506,FF Beverage Onsite,250603,Cold Local Drinks,1180,20930000,2670000,19379886,11988210.0
3630,8938502525368,Bánh bao nhân thịt heo trứng muối xá xíu,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,610,17690000,0,16379714,11168047.33
431,2701010000179,GCCT Cơm nghêu mẹ nấu x hộp,27,FF Offsite,2701,Rice Offsite,270101,Bento,448,17024000,0,15762880,10304000.0
281,2250103000058,TP. Bánh giò nhân thịt 150g x 1 cái,26,FF Onsite,2604,Steamed Bun,260403,Others Steamed bun,1169,16366000,0,15153747,8972676.36
400,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,515,15450000,0,14305669,9966020.0
110,260202000017,Tteokbokki Signature x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,476,14280000,0,13222326,8509789.92
3628,8938502525344,BB 2 trứng cút 150g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,706,13414000,0,12420596,8642317.87
3627,8938502525337,BB 1 trứng cút 120g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,754,12784000,34000,11837164,6931342.0
245,2010901000120,HĐ Cơm gà sốt Hàn Quốc x hộp,27,FF Offsite,2701,Rice Offsite,270101,Bento,326,12388000,0,11470310,8136494.0


In [7]:
# Filter item Milo 2010103000010
df[df['barcode'] == 2602020019048]

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
5,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,205,6150000,0,5694489,3948126.0
3607,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,103,3090000,0,2861134,1966167.0
7071,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,207,6210000,0,5750046,4051727.0


In [8]:
df_summed = df.groupby('barcode').agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,7,280000,0,266665,192500.0
1,23133289,49,1421000,0,1315743,868637.7
2,40677798,7,371000,0,337274,226100.0
3,45116568,192,6376000,0,5903751,4189056.0
4,50357680,13,308000,15000,285183,230571.0
...,...,...,...,...,...,...
2699,18935005801132,1,120000,0,111111,90450.0
2700,18935012413328,4,1280000,0,1163636,923344.0
2701,28935049502337,1,194000,66000,179630,199560.0
2702,48934588063068,1,140000,0,129630,104280.0


In [9]:
df_itemName = df.groupby('barcode')['itemName'].first()
df_summed['itemName'] = df_summed['barcode'].map(df_itemName)
df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
0,21202010,7,280000,0,266665,192500.0,HH Cam Úc 1 trái
1,23133289,49,1421000,0,1315743,868637.7,GPS Thạch hồng sâm Innerset 15g x gói
2,40677798,7,371000,0,337274,226100.0,Jagermeister 20ml x Chai
3,45116568,192,6376000,0,5903751,4189056.0,Lotte Kem mochi Yukimi Daifuku 90ml x hộp
4,50357680,13,308000,15000,285183,230571.0,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói
...,...,...,...,...,...,...,...
2699,18935005801132,1,120000,0,111111,90450.0,THÙNG 24LAVIE Nước khoáng 500ml x Chai
2700,18935012413328,4,1280000,0,1163636,923344.0,THÙNG 24SABECO Lager Bia 330ml x Lon
2701,28935049502337,1,194000,66000,179630,199560.0,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon
2702,48934588063068,1,140000,0,129630,104280.0,THÙNG 12AQUAFINA Nước tinh khiết 1.5L x chai


In [10]:

top_20_barcodes = df_summed.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
69,260202000017,805,24150000,0,22361288,14575237.36,Tteokbokki Signature x 1 phần
165,2010805000363,826,23112000,16000,21400062,15811865.65,Hotdog 25 Signature x cái
218,2501031190510,1284,23010000,2670000,21305856,13045983.0,Trà tắc ly lớn x 1 ly
150,2010103000010,880,21230000,770000,19657366,12953937.16,Milo ly lớn x 1ly
287,2701010000179,473,17974000,0,16642505,10879000.0,GCCT Cơm nghêu mẹ nấu x hộp
2406,8938502525368,610,17690000,0,16379714,11168047.33,Bánh bao nhân thịt heo trứng muối xá xíu
188,2250103000058,1169,16366000,0,15153747,8972676.36,TP. Bánh giò nhân thịt 150g x 1 cái
260,2602020019048,515,15450000,0,14305669,9966020.0,Tteokbokki Signature (TC) x tô
170,2010901000120,376,14288000,0,13229560,9305441.0,HĐ Cơm gà sốt Hàn Quốc x hộp
213,2501031190374,1188,13886000,370000,12857283,6432204.0,Nestea ly lớn (New) x 1 ly
