In [2]:
import pandas as pd
import configparser
import os

# Read the value from the config.ini file
config = configparser.ConfigParser()
config.read('config.ini')
output_file_dir = config.get('path', 'output_file_dir')
output_file_name_net_sales = config.get('path', 'output_file_name_net_sales')
# using os.path.join() method
# output_file_dir = os.path.join(output_file_dir, 'outputFile')
net_sales_data = os.path.join(output_file_dir, output_file_name_net_sales) + '.csv'
# Read the csv file
# input_file should be the path to the merge csv file
try:
    input_file = net_sales_data
    df = pd.read_csv(input_file)
except FileNotFoundError as e:
    print(e)
    # input_file = r"C:\Users\Trieu Pham\OneDrive - BTM Global Consulting\Projects\data csv\outputFile\data.csv"
    # df = pd.read_csv(input_file)
df.dropna(inplace=True)

In [3]:
df

Unnamed: 0,Date,Time,InvoiceID,Barcode,Total Include VAT
0,2023-01-01,00:40:59,VN0001010101230001,8935049500544,7000
1,2023-01-01,00:41:18,VN0001010101230002,8938512632025,12000
2,2023-01-01,00:41:55,VN0001010101230003,8936011773416,25000
3,2023-01-01,00:41:55,VN0001010101230003,8936079121761,13000
4,2023-01-01,00:41:55,VN0001010101230003,8850453017528,13000
...,...,...,...,...,...
2054556,2023-01-11,22:06:27,VN0236021101230057,8935112200005,13000
2054557,2023-01-11,22:10:29,VN0236021101230058,8934803044027,13000
2054558,2023-01-11,22:10:29,VN0236021101230058,8935001215028,11000
2054559,2023-01-11,18:19:22,VN9996011101230005,2010101000012,9000


In [5]:
# Add new column StoreID (first 6 digits of InvoiceID)
df['StoreID'] = df['InvoiceID'].str[:6]

# Add new column WeekDay (1: Sunday, 2: Monday, ..., 7: Saturday)
df['WeekDay'] = pd.to_datetime(df['Date']).dt.weekday + 2
df.loc[df['WeekDay'] > 7, 'WeekDay'] = 1

df

Unnamed: 0,Date,Time,InvoiceID,Barcode,Total Include VAT,StoreID,WeekDay
0,2023-01-01,00:40:59,VN0001010101230001,8935049500544,7000,VN0001,1
1,2023-01-01,00:41:18,VN0001010101230002,8938512632025,12000,VN0001,1
2,2023-01-01,00:41:55,VN0001010101230003,8936011773416,25000,VN0001,1
3,2023-01-01,00:41:55,VN0001010101230003,8936079121761,13000,VN0001,1
4,2023-01-01,00:41:55,VN0001010101230003,8850453017528,13000,VN0001,1
...,...,...,...,...,...,...,...
2054556,2023-01-11,22:06:27,VN0236021101230057,8935112200005,13000,VN0236,4
2054557,2023-01-11,22:10:29,VN0236021101230058,8934803044027,13000,VN0236,4
2054558,2023-01-11,22:10:29,VN0236021101230058,8935001215028,11000,VN0236,4
2054559,2023-01-11,18:19:22,VN9996011101230005,2010101000012,9000,VN9996,4


In [7]:
# Backup the dataframe
df_backup = df.copy()

df_backup

Unnamed: 0,Date,Time,InvoiceID,Barcode,Total Include VAT,StoreID,WeekDay
0,2023-01-01,00:40:59,VN0001010101230001,8935049500544,7000,VN0001,1
1,2023-01-01,00:41:18,VN0001010101230002,8938512632025,12000,VN0001,1
2,2023-01-01,00:41:55,VN0001010101230003,8936011773416,25000,VN0001,1
3,2023-01-01,00:41:55,VN0001010101230003,8936079121761,13000,VN0001,1
4,2023-01-01,00:41:55,VN0001010101230003,8850453017528,13000,VN0001,1
...,...,...,...,...,...,...,...
2054556,2023-01-11,22:06:27,VN0236021101230057,8935112200005,13000,VN0236,4
2054557,2023-01-11,22:10:29,VN0236021101230058,8934803044027,13000,VN0236,4
2054558,2023-01-11,22:10:29,VN0236021101230058,8935001215028,11000,VN0236,4
2054559,2023-01-11,18:19:22,VN9996011101230005,2010101000012,9000,VN9996,4


In [14]:
# Group by InvoiceID and calculate the sum of Total Include VAT for each group
df_grouped = df.groupby('InvoiceID', as_index=False).agg({
    'StoreID': 'first',
    'Date': 'first',
    'Time': 'first',
    'Barcode': 'first',
    'WeekDay': 'first',
    'Total Include VAT': 'sum'
})

# Function to get Time Range for each Time value
def get_time_range(time_str):
    hour = int(time_str[:2])
    time_range_start = f"{hour:02}:00"
    time_range_end = f"{hour:02}:59"
    return f"{time_range_start}-{time_range_end}"

# Add new column Time Range
df_grouped['Time Range'] = df_grouped['Time'].apply(get_time_range)

df_grouped

Unnamed: 0,InvoiceID,StoreID,Date,Time,Barcode,WeekDay,Total Include VAT,Time Range
0,VN0001010101230001,VN0001,2023-01-01,00:40:59,8935049500544,1,14000,00:00-00:59
1,VN0001010101230002,VN0001,2023-01-01,00:41:18,8938512632025,1,24000,00:00-00:59
2,VN0001010101230003,VN0001,2023-01-01,00:41:55,8936011773416,1,230000,00:00-00:59
3,VN0001010101230004,VN0001,2023-01-01,00:43:12,8801100128845,1,382000,00:00-00:59
4,VN0001010101230005,VN0001,2023-01-01,00:44:22,8934588843051,1,52000,00:00-00:59
...,...,...,...,...,...,...,...,...
830120,VN0236021101230055,VN0236,2023-01-11,22:04:24,2270103000025,4,54000,22:00-22:59
830121,VN0236021101230056,VN0236,2023-01-11,22:06:01,8936127790017,4,6000,22:00-22:59
830122,VN0236021101230057,VN0236,2023-01-11,22:06:27,2010101000005,4,16000,22:00-22:59
830123,VN0236021101230058,VN0236,2023-01-11,22:10:29,8934803044027,4,24000,22:00-22:59


In [15]:
# Create a pivot table to get the matrix representation
pivot_table = df_grouped.pivot_table(index=['StoreID', 'WeekDay'], columns='Time Range', values='Total Include VAT', aggfunc='sum')
pivot_table = pivot_table.applymap("{:,.0f}".format)

pivot_table

Unnamed: 0_level_0,Time Range,00:00-00:59,01:00-01:59,02:00-02:59,03:00-03:59,04:00-04:59,05:00-05:59,06:00-06:59,07:00-07:59,08:00-08:59,09:00-09:59,...,14:00-14:59,15:00-15:59,16:00-16:59,17:00-17:59,18:00-18:59,19:00-19:59,20:00-20:59,21:00-21:59,22:00-22:59,23:00-23:59
StoreID,WeekDay,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
VN0001,1,8383000,8873000,1520000,664000,254000,452000,985000,1865000,1274000,2285000,...,3774000,2230000,2645000,4613000,2478000,2395000,4445000,4459000,5320000,3446000
VN0001,2,862000,758000,439000,510000,280000,34000,672000,2685000,3779000,3171000,...,2338000,2505000,3548000,1810000,2528000,1726000,1637000,2489000,1871000,1148000
VN0001,3,833000,1724000,381000,106000,108000,382000,1230000,4108000,7012000,3686000,...,2761000,3990000,4059000,3247000,2947000,2618000,2658000,2499000,2379000,1375000
VN0001,4,548000,1431000,225000,282000,190000,634000,1351000,5596000,7583000,5683000,...,1987200,4586000,3090000,7920000,2682300,2520000,2170000,1668000,2254000,1687000
VN0001,5,416000,143000,224000,114000,278000,6000,1064000,2406000,4623000,2333000,...,1825000,1648000,1745000,2232000,3396000,1922000,1196000,1579000,1624000,679000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VN0236,4,1411000,593000,386000,106000,203000,306000,1331000,416000,358000,1774000,...,648000,544000,1993000,989000,1938000,1419000,2247000,2990000,3386000,2364000
VN0236,5,752000,1198000,183000,243000,966000,335000,473000,430000,349000,627000,...,37000,327000,1346000,747000,639000,880000,704000,1333000,1550000,834000
VN0236,6,491000,345000,357000,106000,487000,145000,187000,281000,383000,323500,...,462600,871000,1178000,1254000,611000,1283000,696000,1306000,1025000,965000
VN0236,7,580000,379000,662000,133000,,85000,132000,764000,663000,750000,...,549000,936000,742000,1408000,831000,872000,2373000,1097000,1944000,1130000


In [None]:
# Save the pivot table to csv file
# pivot_table.to_csv('result.csv')