In [1]:
import pandas as pd
import re

# Read the list of filenames from the configuration file
with open('file_list.txt', 'r', encoding='utf-8') as config_file:
    file_names = config_file.read().splitlines()

# Regex pattern to match the data format
pattern = r'\[(.*?)\] (.*?): (.*)'


# Initialize an empty list to store parsed data
datalist = []
stream_count = 0
# Iterate over each specified file
for file in file_names:
    full_path = file
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            match = re.match(pattern, line)
            if match:
                date, user, message = match.groups()
                datalist.append([date, user, message,stream_count])
    stream_count = stream_count + 1

# Create a DataFrame from the parsed data
data = pd.DataFrame(datalist, columns=["date", "user", "message","stream"])

In [2]:
# Group by 'stream' and aggregate unique users
users_per_stream = data.groupby("stream")["user"].unique().reset_index()

# Display the result
print(users_per_stream)

    stream                                               user
0        0  [Martin_Gales, StreamElements, Aloddin, utaBz,...
1        1  [StreamElements, Martin_Gales, Zeololz, NSAPar...
2        2  [Martin_Gales, StreamElements, sbeedy20, Banti...
3        3  [Martin_Gales, StreamElements, swedenhobohealt...
4        4  [Martin_Gales, StreamElements, jqqq777, SeeMeN...
5        5  [Martin_Gales, StreamElements, Banties1g, coll...
6        6  [Martin_Gales, StreamElements, x3lolx, simcapr...
7        7  [Martin_Gales, StreamElements, FarfarFertil, m...
8        8  [Martin_Gales, StreamElements, zoiodelul4, Zeo...
9        9  [Martin_Gales, StreamElements, slarzka, The_As...
10      10  [Martin_Gales, StreamElements, mrzexyy, 3_nzi,...
11      11  [Martin_Gales, StreamElements, IRLKingsman, ut...
12      12  [StreamElements, klimzaa, balintboss, domjel12...
13      13  [1206paul_, StreamElements, Banties1g, Aloddin...
14      14  [StreamElements, Zxngetsu0, hilallunar, sabago...
15      

In [3]:
# Group by 'users' and aggregate unique streams
stream_per_users = data.groupby("user")["stream"].unique().reset_index()

# Display the result
print(stream_per_users)

           user                                             stream
0     00001joel                                             [4, 5]
1     00ramez00                                               [14]
2      062_davi                                               [37]
3        0_OS_0                                               [39]
4        0criss                                       [32, 41, 45]
...         ...                                                ...
3983       無法顯示                                               [23]
3984       爪口长工  [0, 1, 3, 5, 7, 8, 10, 11, 13, 15, 21, 29, 30,...
3985      的显示名称                                           [49, 50]
3986    게이머2334                                               [45]
3987        덕충2                               [42, 43, 44, 45, 46]

[3988 rows x 2 columns]


In [4]:
stream_per_users["stream_count"] = stream_per_users["stream"].apply(len)

In [5]:
stream_per_users.head(5)

Unnamed: 0,user,stream,stream_count
0,00001joel,"[4, 5]",2
1,00ramez00,[14],1
2,062_davi,[37],1
3,0_OS_0,[39],1
4,0criss,"[32, 41, 45]",3


In [6]:
stream_per_users['stream_count'].describe()

count    3988.000000
mean        2.965396
std         5.011129
min         1.000000
25%         1.000000
50%         1.000000
75%         3.000000
max        52.000000
Name: stream_count, dtype: float64

In [7]:
# Function to calculate the longest streak of consecutive streams
def longest_streak(streams):
    sorted_streams = sorted(streams)
    max_streak = 1
    current_streak = 1
    
    for i in range(1, len(sorted_streams)):
        if sorted_streams[i] == sorted_streams[i-1] + 1:
            current_streak += 1
        else:
            max_streak = max(max_streak, current_streak)
            current_streak = 1
    
    return max(max_streak, current_streak)

In [8]:
# Add a column with the longest streak of consecutive streams
stream_per_users["longest_streak"] = stream_per_users["stream"].apply(longest_streak)

# Display the result
stream_per_users.head(5)

Unnamed: 0,user,stream,stream_count,longest_streak
0,00001joel,"[4, 5]",2,2
1,00ramez00,[14],1,1
2,062_davi,[37],1,1
3,0_OS_0,[39],1,1
4,0criss,"[32, 41, 45]",3,1


In [9]:
stream_per_users["longest_streak"].describe()

count    3988.000000
mean        1.621113
std         2.537726
min         1.000000
25%         1.000000
50%         1.000000
75%         1.000000
max        52.000000
Name: longest_streak, dtype: float64

In [10]:
stream_per_users[stream_per_users['user']=='kecso1g']

Unnamed: 0,user,stream,stream_count,longest_streak
2430,kecso1g,"[2, 4, 7, 10, 13, 16, 23, 31, 36, 37, 40, 44, 46]",13,2


In [11]:
stream_per_users[stream_per_users['stream_count']>= 43]

Unnamed: 0,user,stream,stream_count,longest_streak
14,1206paul_,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",52,52
121,Aloddin,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14,...",47,18
174,Banties1g,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",50,35
526,IvanOnMyOwn,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",49,30
706,Martin_Gales,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",51,36
911,Risc__V,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16...",45,18
1036,StreamElements,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",52,52
1216,Zeololz,"[0, 1, 2, 3, 4, 5, 6, 8, 11, 13, 14, 15, 16, 1...",43,15
1451,balintboss,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",52,52
1810,dorozea,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",43,28


In [21]:
user_watche_these = stream_per_users[stream_per_users['user']=='balintboss']['stream']

for s in user_watche_these:
    print(s)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51]
