In [1]:
import pandas as pd
import glob

def load_salescate_data(data_folder):
    # Get the path to all files containing 'salescate' and ending with '.xlsx'
    salescate_files = glob.glob(data_folder + "/salebycategory*.xlsx")
    
    # If no files are found, raise a FileNotFoundError
    if not salescate_files:
        raise FileNotFoundError(f"No 'salebycategory*.xlsx' files found in {data_folder}")
    
    # Load all the salescate data into a list of pandas dataframes
    salescate_dfs = [pd.read_excel(file) for file in salescate_files]
    
    # Concatenate all the dataframes into one
    salescate_df = pd.concat(salescate_dfs, ignore_index=True)
    
    return salescate_df

# Define the data folder
data_folder1 = r'C:\Users\PC\Dropbox\Projects\data_xls'
data_folder2 = r'C:\Users\trieu.pham\Dropbox\Projects\data_xls'
data_folder3 = r'/Users/trieupham/Dropbox/Projects/data_xls'

try:
    df = load_salescate_data(data_folder1)
except FileNotFoundError:
    try:
        df = load_salescate_data(data_folder2)
    except FileNotFoundError:
        try:
            df = load_salescate_data(data_folder3)
        except FileNotFoundError:
            print("Data folder not found")
            raise
print(df.head())

         barcode                               itemName  divisionCode  \
0  2010101000005                         Ly đá vừa x ly            25   
1  2501031190411                         Ly đá lớn x ly            25   
2  2501031190510                  Trà tắc ly lớn x 1 ly            25   
3  2250103000058    TP. Bánh giò nhân thịt 150g x 1 cái            26   
4  8934588063053  AQUAFINA Nước tinh khiết 500ml x chai             7   

  divisionName  categoryCode        categoryName  subCategoryCode  \
0  FF Beverage          2506  FF Beverage Onsite           250601   
1  FF Beverage          2506  FF Beverage Onsite           250601   
2  FF Beverage          2506  FF Beverage Onsite           250603   
3    FF Onsite          2604        Steamed Bun            260403   
4     Beverage           701       Bottled Water            70101   

       subCategoryName  itemQty  grossSales  itemDiscount  netSales  \
0           Cup Of Ice      444      717000        615000    663935   
1   

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10364 entries, 0 to 10363
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   barcode          10364 non-null  int64  
 1   itemName         10364 non-null  object 
 2   divisionCode     10364 non-null  int64  
 3   divisionName     10364 non-null  object 
 4   categoryCode     10364 non-null  int64  
 5   categoryName     10364 non-null  object 
 6   subCategoryCode  10364 non-null  int64  
 7   subCategoryName  10364 non-null  object 
 8   itemQty          10364 non-null  int64  
 9   grossSales       10364 non-null  int64  
 10  itemDiscount     10364 non-null  int64  
 11  netSales         10364 non-null  int64  
 12  totalCostPrice   10364 non-null  float64
dtypes: float64(1), int64(8), object(4)
memory usage: 1.0+ MB


In [3]:
# print all the rows which have barcode = '2010101000005

df[df['barcode'] == 8936114080084]


Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
2456,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,5,171000,0,158332,106920.0
4899,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,1,33000,0,30556,21384.0
6082,8936114080084,DBMINT Kẹo cao su bạc hà 40 viên x1 1 Hủ,9,Candy,903,Gum,90301,Gum Sub,4,132000,0,122224,85536.0
8104,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,2,66000,0,61112,42768.0
9740,8936114080084,Doublemint Kẹo Cao Su Bạc Hà 40 Viên x Hủ,9,Candy,903,Gum,90301,Gum Sub,3,99000,0,91668,64152.0


In [4]:
# List out all barcodes have more than one row in dataframe, print out the barcode and the number of rows for each barcode, and itemName of each barcode
df['barcode'].value_counts()[df['barcode'].value_counts() > 1]


barcode
2010101000005    6
8938511172935    6
8935006361249    6
8936079124267    6
8938503131810    6
                ..
8935039513257    2
8934680040594    2
2260201000508    2
2602010136977    2
8935001719533    2
Name: count, Length: 2194, dtype: int64

In [5]:
df_grouped = df.groupby(['barcode', 'itemName', 'divisionCode', 'divisionName', 'categoryCode', 'categoryName', 'subCategoryCode', 'subCategoryName']).agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_grouped

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,HH Cam Úc 1 trái,29,Fresh Grocery,2901,Fruit Cat,290101,Fruit Sub,7,280000,0,266665,192500.0
1,23133289,GPS Thạch hồng sâm Innerset 15g x gói,7,Beverage,708,Others Beverage,70801,Condition/Supplements,48,1392000,0,1288891,850910.4
2,45116568,Lotte Kem mochi Yukimi Daifuku 90ml x hộp,5,Ice Cream,501,Packaged Ice Cream/Novelties,50104,Other Ice Cream,173,5749000,0,5323187,3774514.0
3,50357680,FISHERMANS Kẹo cay con tau khuynh diep 25g x1 goi,9,Candy,901,Candy Category,90101,Hard Candy,3,66000,11000,61111,50490.0
4,50357680,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói,9,Candy,901,Candy Category,90101,Hard Candy,7,167000,4000,154628,124542.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3852,18935005801132,THÙNG 24LAVIE Nước khoáng 500ml x Chai,7,Beverage,701,Bottled Water,70102,Mineral water,1,120000,0,111111,90450.0
3853,18935012413328,THÙNG 24SABECO Lager Bia 330ml x Lon,6,Beer,602,Domestic Beer,60201,Domestic Beer Can,4,1280000,0,1163636,923344.0
3854,28935049502337,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon,7,Beverage,702,Carbonated Drinks - CSD,70202,No/Less Sugar CSD,1,194000,66000,179630,199560.0
3855,48934588063068,THÙNG 12AQUAFINA Nước tinh khiết 1.5L x chai,7,Beverage,701,Bottled Water,70101,Purified water,1,140000,0,129630,104280.0


In [6]:
top_20_barcodes = df_grouped.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
98,260202000017,Tteokbokki Signature x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,476,14280000,0,13222326,8509789.92
3407,8938502525368,Bánh bao nhân thịt heo trứng muối xá xíu,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,469,13601000,0,12593583,8566167.75
212,2010805000363,Hotdog 25 Signature x cái,26,FF Onsite,2602,Other Cuisine,260201,Other Cuisine Sub,479,13396000,16000,12403740,9126468.43
256,2250103000058,TP. Bánh giò nhân thịt 150g x 1 cái,26,FF Onsite,2604,Steamed Bun,260403,Others Steamed bun,903,12642000,0,11705589,6927016.68
393,2701010000179,GCCT Cơm nghêu mẹ nấu x hộp,27,FF Offsite,2701,Rice Offsite,270101,Bento,330,12540000,0,11611050,7590000.0
307,2501031190510,Trà tắc ly lớn x 1 ly,25,FF Beverage,2506,FF Beverage Onsite,250603,Cold Local Drinks,644,11480000,1400000,10629767,6542718.0
3405,8938502525344,BB 2 trứng cút 150g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,522,9918000,0,9183500,6374570.04
97,260202000017,Tteokbokki Signature x 1 phần,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,329,9870000,0,9138962,6065447.44
3404,8938502525337,BB 1 trứng cút 120g x 1 Cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,573,9707000,34000,8988060,5257962.6
2695,8935335400466,Bánh bao xá xíu phô mai 120g x 1 cái,26,FF Onsite,2604,Steamed Bun,260402,Savoury Buns,373,9325000,0,8634206,5635190.75


In [7]:
# Filter item Milo 2010103000010
df[df['barcode'] == 2602020019048]

Unnamed: 0,barcode,itemName,divisionCode,divisionName,categoryCode,categoryName,subCategoryCode,subCategoryName,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
5,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,135,4050000,0,3750029,2580186.0
3372,2602020019048,Tteokbokki Signature (TC) x tô,26,FF Onsite,2602,Other Cuisine,260202,Korean cuisine Sub,103,3090000,0,2861134,1966167.0


In [8]:
df_summed = df.groupby('barcode').agg({
    'itemQty': 'sum',
    'grossSales': 'sum',
    'itemDiscount': 'sum',
    'netSales': 'sum',
    'totalCostPrice': 'sum'
}).reset_index()

df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice
0,21202010,7,280000,0,266665,192500.0
1,23133289,48,1392000,0,1288891,850910.4
2,45116568,173,5749000,0,5323187,3774514.0
3,50357680,10,233000,15000,215739,175032.0
4,50854011,35,815000,65000,754624,609246.0
...,...,...,...,...,...,...
2517,18935005801132,1,120000,0,111111,90450.0
2518,18935012413328,4,1280000,0,1163636,923344.0
2519,28935049502337,1,194000,66000,179630,199560.0
2520,48934588063068,1,140000,0,129630,104280.0


In [9]:
df_itemName = df.groupby('barcode')['itemName'].first()
df_summed['itemName'] = df_summed['barcode'].map(df_itemName)
df_summed

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
0,21202010,7,280000,0,266665,192500.0,HH Cam Úc 1 trái
1,23133289,48,1392000,0,1288891,850910.4,GPS Thạch hồng sâm Innerset 15g x gói
2,45116568,173,5749000,0,5323187,3774514.0,Lotte Kem mochi Yukimi Daifuku 90ml x hộp
3,50357680,10,233000,15000,215739,175032.0,Fishermans Kẹo Cay Con Tau Khuynh Diep 25g x Gói
4,50854011,35,815000,65000,754624,609246.0,Fishermans Kẹo Cay Con Tàu Vị Bạc Hà 25g x Gói
...,...,...,...,...,...,...,...
2517,18935005801132,1,120000,0,111111,90450.0,THÙNG 24LAVIE Nước khoáng 500ml x Chai
2518,18935012413328,4,1280000,0,1163636,923344.0,THÙNG 24SABECO Lager Bia 330ml x Lon
2519,28935049502337,1,194000,66000,179630,199560.0,THÙNG 24_COCA Zero Sleek 320ml x 1 Lon
2520,48934588063068,1,140000,0,129630,104280.0,THÙNG 12AQUAFINA Nước tinh khiết 1.5L x chai


In [10]:

top_20_barcodes = df_summed.sort_values('netSales', ascending=False).head(20)
top_20_barcodes

Unnamed: 0,barcode,itemQty,grossSales,itemDiscount,netSales,totalCostPrice,itemName
62,260202000017,805,24150000,0,22361288,14575237.36,Tteokbokki Signature x 1 phần
131,2010103000010,677,16880000,45000,15629533,9937966.16,Milo ly lớn x 1ly
146,2010805000363,547,15300000,16000,14166708,10496031.22,Hotdog 25 Signature x cái
2242,8938502525368,469,13601000,0,12593583,8566167.75,Bánh bao nhân thịt heo trứng muối xá xíu
196,2501031190510,748,13560000,1400000,12555737,7600491.0,Trà tắc ly lớn x 1 ly
256,2701010000179,355,13490000,0,12490675,8165000.0,GCCT Cơm nghêu mẹ nấu x hộp
169,2250103000058,903,12642000,0,11705589,6927016.68,TP. Bánh giò nhân thịt 150g x 1 cái
184,2270103000100,295,11210000,0,10379575,6589981.0,HĐ Gimbap dakgalbi phô mai x cái
194,2501031190374,962,11198000,346000,10368421,5179034.0,Nestea ly lớn (New) x 1 ly
151,2010901000120,284,10792000,0,9992540,6965443.0,HĐ Cơm gà sốt Hàn Quốc x hộp
