In [28]:
import pandas as pd
import re

In [29]:
# Read the list of filenames from the configuration file
with open('file_list.txt', 'r', encoding='utf-8') as config_file:
    file_names = config_file.read().splitlines()

# Regex pattern to match the data format
pattern = r'\[(.*?)\] (.*?): (.*)'


# Initialize an empty list to store parsed data
datalist = []
stream_count = 0
# Iterate over each specified file
for file in file_names:
    full_path = "data\\"+file
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            match = re.match(pattern, line)
            if match:
                date, user, message = match.groups()
                datalist.append([date, user, message,stream_count])
    stream_count = stream_count + 1

# Create a DataFrame from the parsed data
data = pd.DataFrame(datalist, columns=["date", "user", "message","stream"])
data["user"] = data["user"].replace("Banties1g", "banties1g")
data["user"] = data["user"].replace("fyodor_m_d1821", "fyredoor4")

In [30]:
# Convert the date column to datetime
data['date'] = pd.to_datetime(data['date'])

# Format the date column to only include the day (YYYY-MM-DD)
data['date'] = data['date'].dt.date

# Create a pivot table
pivot_table = data.pivot_table(
    index='user', 
    columns='date', 
    values='message', 
    aggfunc=lambda x: 1,  # Assign 1 if there's a message
    fill_value=0          # Assign 0 if there's no message
)

# Reset the column names to show as dates instead of a MultiIndex
pivot_table.columns = pivot_table.columns.astype(str)

print(pivot_table)

date                  2024-05-01  2024-05-02  2024-05-03  2024-05-04  \
user                                                                   
0000000emirburak0320           0           0           0           0   
00001joel                      0           0           0           0   
000dexdliy000                  0           0           0           0   
000kokushibo000                0           0           0           0   
000mrx                         0           0           0           0   
...                          ...         ...         ...         ...   
안톤958                          0           0           0           0   
엘레레레ㅔ                          0           0           0           0   
진짜요시맨                          0           0           0           0   
하이드로른                          0           0           0           0   
흑우의도인                          0           0           0           0   

date                  2024-05-05  2024-05-06  2024-05-07  2024-

In [31]:
cumulative_sum = pivot_table.cumsum(axis=1)

In [32]:
cumulative_sum

date,2024-05-01,2024-05-02,2024-05-03,2024-05-04,2024-05-05,2024-05-06,2024-05-07,2024-05-09,2024-05-10,2024-05-11,...,2024-12-13,2024-12-16,2024-12-17,2024-12-18,2024-12-19,2024-12-20,2024-12-21,2024-12-22,2024-12-23,2024-12-25
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0000000emirburak0320,0,0,0,0,0,0,0,0,0,0,...,10,10,10,10,10,10,10,10,10,10
00001joel,0,0,0,0,1,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
000dexdliy000,0,0,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
000kokushibo000,0,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4
000mrx,0,0,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
안톤958,0,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4
엘레레레ㅔ,0,0,0,0,0,0,0,0,0,0,...,9,9,9,9,9,9,9,9,9,9
진짜요시맨,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,1,1
하이드로른,0,0,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3


In [33]:
filtered_cumulative_sum = cumulative_sum[cumulative_sum.iloc[:, -1] >= 5]
filtered_cumulative_sum

date,2024-05-01,2024-05-02,2024-05-03,2024-05-04,2024-05-05,2024-05-06,2024-05-07,2024-05-09,2024-05-10,2024-05-11,...,2024-12-13,2024-12-16,2024-12-17,2024-12-18,2024-12-19,2024-12-20,2024-12-21,2024-12-22,2024-12-23,2024-12-25
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0000000emirburak0320,0,0,0,0,0,0,0,0,0,0,...,10,10,10,10,10,10,10,10,10,10
0buddha,0,0,0,0,0,0,0,0,0,0,...,63,63,63,63,63,63,64,64,64,64
0cops0problems,0,0,0,0,0,0,0,0,0,0,...,10,10,10,10,10,10,10,10,10,10
0mqnn,0,0,0,0,0,0,0,0,0,0,...,5,5,5,5,5,5,5,5,5,5
0nlywon,0,0,0,0,0,0,0,0,0,0,...,14,14,14,14,14,14,14,14,14,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
爪口长工,1,2,2,3,3,4,4,5,6,6,...,89,89,90,91,91,91,91,92,92,92
眼科權威,0,0,0,0,0,0,0,0,0,0,...,5,5,5,5,5,5,5,5,5,5
马克斯,0,0,0,0,0,0,0,0,0,0,...,5,5,5,5,5,5,5,5,5,5
덕충2,0,0,0,0,0,0,0,0,0,0,...,15,15,15,15,15,15,15,15,15,15


In [34]:
filtered_cumulative_sum.to_excel('your_excel_file.xlsx', sheet_name='Pivot Table')