In [1]:
import pandas as pd
import glob

def load_salescate_data(data_folder):
    # Get the path to all files containing 'salescate' and ending with '.xlsx'
    salescate_files = glob.glob(data_folder + "/salebycategory*.xlsx")
    
    # If no files are found, raise a FileNotFoundError
    if not salescate_files:
        raise FileNotFoundError(f"No 'salebycategory*.xlsx' files found in {data_folder}")
    
    # Load all the salescate data into a list of pandas dataframes
    salescate_dfs = [pd.read_excel(file) for file in salescate_files]
    
    # Concatenate all the dataframes into one
    salescate_df = pd.concat(salescate_dfs, ignore_index=True)
    
    return salescate_df

# Define the data folder
data_folder1 = r'C:\Users\PC\Dropbox\Projects\data_xls'
data_folder2 = r'C:\Users\trieu.pham\Dropbox\Projects\data_xls'
data_folder3 = r'/Users/trieupham/Dropbox/Projects/data_xls/salebycategory'

try:
    df = load_salescate_data(data_folder1)
except FileNotFoundError:
    try:
        df = load_salescate_data(data_folder2)
    except FileNotFoundError:
        try:
            df = load_salescate_data(data_folder3)
        except FileNotFoundError:
            print("Data folder not found")
            raise
print(df.head())

         barcode                             itemName  divisionCode  \
0  2010101000005                       Ly đá vừa x ly            25   
1  2501031190411                       Ly đá lớn x ly            25   
2  2501010000021                 HP Ly đá size L x ly            25   
3  8935049510864  DASANI Nước tinh khiết 510ml x chai             7   
4  2250103000058  TP. Bánh giò nhân thịt 150g x 1 cái            26   

  divisionName  categoryCode        categoryName  subCategoryCode  \
0  FF Beverage          2506  FF Beverage Onsite           250601   
1  FF Beverage          2506  FF Beverage Onsite           250601   
2  FF Beverage          2506  FF Beverage Onsite           250601   
3     Beverage           701       Bottled Water            70101   
4    FF Onsite          2604        Steamed Bun            260403   

       subCategoryName  itemQty  grossSales  itemDiscount  netSales  \
0           Cup Of Ice      681      462000       1581000    427810   
1           Cup 

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10599 entries, 0 to 10598
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   barcode          10599 non-null  int64  
 1   itemName         10599 non-null  object 
 2   divisionCode     10599 non-null  int64  
 3   divisionName     10599 non-null  object 
 4   categoryCode     10599 non-null  int64  
 5   categoryName     10599 non-null  object 
 6   subCategoryCode  10599 non-null  int64  
 7   subCategoryName  10599 non-null  object 
 8   itemQty          10599 non-null  int64  
 9   grossSales       10599 non-null  int64  
 10  itemDiscount     10599 non-null  int64  
 11  netSales         10599 non-null  int64  
 12  totalCostPrice   10599 non-null  float64
dtypes: float64(1), int64(8), object(4)
memory usage: 1.1+ MB


In [3]:
# print all the rows which have barcode = '2010101000005

df[df['barcode'] == 8936114080084]


Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
1557,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,1,33000,0,30556,21384.0
4494,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,5,171000,0,158332,106920.0
6494,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,3,99000,0,91668,64152.0
8394,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,2,66000,0,61112,42768.0
9853,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,4,132000,0,122224,85536.0


In [4]:
# List out all barcodes have more than one row in dataframe, print out the barcode and the number of rows for each barcode, and itemName of each barcode
df['barcode'].value_counts()[df['barcode'].value_counts() > 1]


barcode
2010101000005    6
50854011         6
8934680040136    6
8935311102353    6
8934755014154    6
                ..
5060786251329    2
8935136863385    2
4571157252193    2
8936181751924    2
8936079140649    2
Name: count, Length: 2216, dtype: int64

In [5]:
df_grouped = df.groupby(['barcode', 'itemName', 'divisionCode', 'divisionName', 'categoryCode', 'categoryName', 'subCategoryCode', 'subCategoryName']).agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_grouped

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,HH Cam Úc 1 trái,29,Fresh Grocery,2901,Fruit Cat,290101,Fruit Sub,7,280000,0,266665,192500.0
1,23133289,GPS Thạch hồng sâm Innerset 15g x gói,7,Beverage,708,Others Beverage,70801,Condition/Supplements,48,1392000,0,1288891,850910.4
2,45116568,Lotte Kem mochi Yukimi Daifuku 90ml x hộp,5,Ice Cream,501,Packaged Ice Cream/Novelties,50104,Other Ice Cream,179,5947000,0,5506523,3905422.0
3,50357680,FISHERMANS Kẹo cay con tau khuynh diep 25g x1 goi,9,Candy,901,Candy Category,90101,Hard Candy,3,66000,11000,61111,50490.0
4,50357680,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói,9,Candy,901,Candy Category,90101,Hard Candy,9,217000,4000,200924,161568.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3910,18935005801132,THÙNG 24LAVIE Nước khoáng 500ml x Chai,7,Beverage,701,Bottled Water,70102,Mineral water,1,120000,0,111111,90450.0
3911,18935012413328,THÙNG 24SABECO Lager Bia 330ml x Lon,6,Beer,602,Domestic Beer,60201,Domestic Beer Can,4,1280000,0,1163636,923344.0
3912,28935049502337,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon,7,Beverage,702,Carbonated Drinks - CSD,70202,No/Less Sugar CSD,1,194000,66000,179630,199560.0
3913,48934588063068,THÙNG 12AQUAFINA Nước tinh khiết 1.5L x chai,7,Beverage,701,Bottled Water,70101,Purified water,1,140000,0,129630,104280.0


In [6]:
top_20_barcodes = df_grouped.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
219,2010805000363,Hotdog 25 Signature x cái,26,FF Onsite,2602,Other Cuisine,260201,Other Cuisine Sub,549,15356000,16000,14218560,10460190.33
3458,8938502525368,Bánh bao nhân thịt heo trứng muối xá xíu,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,500,14500000,0,13425995,9132375.0
100,260202000017,Tteokbokki Signature x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,476,14280000,0,13222326,8509789.92
406,2701010000179,GCCT Cơm nghêu mẹ nấu x hộp,27,FF Offsite,2701,Rice Offsite,270101,Bento,368,13984000,0,12948080,8464000.0
317,2501031190510,Trà tắc ly lớn x 1 ly,25,FF Beverage,2506,FF Beverage Onsite,250603,Cold Local Drinks,788,13960000,1800000,12926096,8005686.0
263,2250103000058,TP. Bánh giò nhân thịt 150g x 1 cái,26,FF Onsite,2604,Steamed Bun,260403,Others Steamed bun,947,13258000,0,12275961,7261969.32
3456,8938502525344,BB 2 trứng cút 150g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,573,10887000,0,10080740,6997372.86
3455,8938502525337,BB 1 trứng cút 120g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,634,10744000,34000,9948256,5817710.8
228,2010901000120,HĐ Cơm gà sốt Hàn Quốc x hộp,27,FF Offsite,2701,Rice Offsite,270101,Bento,266,10108000,0,9359210,6611296.0
2739,8935335400466,Bánh bao xá xíu phô mai 120g x 1 cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,397,9925000,0,9189758,5997776.75


In [7]:
# Filter item Milo 2010103000010
df[df['barcode'] == 2602020019048]

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
30,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,103,3090000,0,2861134,1966167.0
1808,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,205,6150000,0,5694489,3948126.0


In [8]:
df_summed = df.groupby('barcode').agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,7,280000,0,266665,192500.0
1,23133289,48,1392000,0,1288891,850910.4
2,45116568,179,5947000,0,5506523,3905422.0
3,50357680,12,283000,15000,262035,212058.0
4,50854011,37,865000,65000,800920,646272.0
...,...,...,...,...,...,...
2563,18935005801132,1,120000,0,111111,90450.0
2564,18935012413328,4,1280000,0,1163636,923344.0
2565,28935049502337,1,194000,66000,179630,199560.0
2566,48934588063068,1,140000,0,129630,104280.0


In [9]:
df_itemName = df.groupby('barcode')['itemName'].first()
df_summed['itemName'] = df_summed['barcode'].map(df_itemName)
df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
0,21202010,7,280000,0,266665,192500.0,HH Cam Úc 1 trái
1,23133289,48,1392000,0,1288891,850910.4,GPS Thạch hồng sâm Innerset 15g x gói
2,45116568,179,5947000,0,5506523,3905422.0,Lotte Kem mochi Yukimi Daifuku 90ml x hộp
3,50357680,12,283000,15000,262035,212058.0,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói
4,50854011,37,865000,65000,800920,646272.0,Fishermans Kẹo Cay Con Tàu Vị Bạc Hà 25g x Gói
...,...,...,...,...,...,...,...
2563,18935005801132,1,120000,0,111111,90450.0,THÙNG 24LAVIE Nước khoáng 500ml x Chai
2564,18935012413328,4,1280000,0,1163636,923344.0,THÙNG 24SABECO Lager Bia 330ml x Lon
2565,28935049502337,1,194000,66000,179630,199560.0,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon
2566,48934588063068,1,140000,0,129630,104280.0,THÙNG 12AQUAFINA Nước tinh khiết 1.5L x chai


In [10]:

top_20_barcodes = df_summed.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
64,260202000017,805,24150000,0,22361288,14575237.36,Tteokbokki Signature x tô
138,2010103000010,719,17930000,45000,16601751,10561960.16,Milo ly lớn x 1ly
153,2010805000363,617,17260000,16000,15981528,11829753.12,Hotdog 25 Signature x cái
206,2501031190510,892,16040000,1800000,14852066,9063459.0,Trà tắc ly lớn x 1 ly
268,2701010000179,393,14934000,0,13827705,9039000.0,GCCT Cơm nghêu mẹ nấu x hộp
2284,8938502525368,500,14500000,0,13425995,9132375.0,Bánh bao nhân thịt heo trứng muối xá xíu
176,2250103000058,947,13258000,0,12275961,7261969.32,TP. Bánh giò nhân thịt 150g x 1 cái
158,2010901000120,316,12008000,0,11118460,7780243.0,HĐ Cơm gà sốt Hàn Quốc x hộp
201,2501031190374,1025,11954000,346000,11068414,5528369.0,Nestea ly lớn (New) x 1 ly
191,2270103000100,309,11742000,0,10872165,6905743.0,HĐ Gimbap dakgalbi phô mai x cái
