In [1]:
import pandas as pd
import re

# Read the list of filenames from the configuration file
with open('file_list.txt', 'r', encoding='utf-8') as config_file:
    file_names = config_file.read().splitlines()

# Regex pattern to match the data format
pattern = r'\[(.*?)\] (.*?): (.*)'


# Initialize an empty list to store parsed data
datalist = []
stream_count = 0
# Iterate over each specified file
for file in file_names:
    full_path = file
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            match = re.match(pattern, line)
            if match:
                date, user, message = match.groups()
                datalist.append([date, user, message,stream_count])
    stream_count = stream_count + 1

# Create a DataFrame from the parsed data
data = pd.DataFrame(datalist, columns=["date", "user", "message","stream"])

In [2]:
# Group by 'stream' and aggregate unique users
users_per_stream = data.groupby("stream")["user"].unique().reset_index()

# Display the result
print(users_per_stream)

    stream                                               user
0        0  [Martin_Gales, StreamElements, Aloddin, utaBz,...
1        1  [StreamElements, Martin_Gales, Zeololz, NSAPar...
2        2  [Martin_Gales, StreamElements, sbeedy20, Banti...
3        3  [Martin_Gales, StreamElements, swedenhobohealt...
4        4  [Martin_Gales, StreamElements, jqqq777, SeeMeN...
..     ...                                                ...
57      57  [StreamElements, banner2k, balintboss, Drakesf...
58      58  [Martin_Gales, StreamElements, balintboss, kur...
59      59  [Martin_Gales, StreamElements, xxxflower2, bal...
60      60  [StreamElements, ErPardi, zunkic, saa2323d, as...
61      61  [Martin_Gales, 1206paul_, StreamElements, Alum...

[62 rows x 2 columns]


In [3]:
# Group by 'users' and aggregate unique streams
stream_per_users = data.groupby("user")["stream"].unique().reset_index()

# Display the result
print(stream_per_users)

              user                                             stream
0        00001joel                                             [4, 5]
1        00ramez00                                               [14]
2         062_davi                                               [37]
3     082274699958                                               [55]
4         09logdog                                               [53]
...            ...                                                ...
4979          無法顯示                                               [23]
4980          爪口长工  [0, 1, 3, 5, 7, 8, 10, 11, 13, 15, 21, 29, 30,...
4981         的显示名称                                           [49, 50]
4982       게이머2334                                               [45]
4983           덕충2                   [42, 43, 44, 45, 46, 54, 56, 57]

[4984 rows x 2 columns]


In [4]:
stream_per_users["stream_count"] = stream_per_users["stream"].apply(len)

In [5]:
stream_per_users.head(5)

Unnamed: 0,user,stream,stream_count
0,00001joel,"[4, 5]",2
1,00ramez00,[14],1
2,062_davi,[37],1
3,082274699958,[55],1
4,09logdog,[53],1


In [6]:
stream_per_users['stream_count'].describe()

count    4984.000000
mean        2.978331
std         5.285842
min         1.000000
25%         1.000000
50%         1.000000
75%         3.000000
max        62.000000
Name: stream_count, dtype: float64

In [7]:
# Function to calculate the longest streak of consecutive streams
def longest_streak(streams):
    sorted_streams = sorted(streams)
    max_streak = 1
    current_streak = 1
    
    for i in range(1, len(sorted_streams)):
        if sorted_streams[i] == sorted_streams[i-1] + 1:
            current_streak += 1
        else:
            max_streak = max(max_streak, current_streak)
            current_streak = 1
    
    return max(max_streak, current_streak)

In [8]:
# Add a column with the longest streak of consecutive streams
stream_per_users["longest_streak"] = stream_per_users["stream"].apply(longest_streak)

# Display the result
stream_per_users.head(5)

Unnamed: 0,user,stream,stream_count,longest_streak
0,00001joel,"[4, 5]",2,2
1,00ramez00,[14],1,1
2,062_davi,[37],1,1
3,082274699958,[55],1,1
4,09logdog,[53],1,1


In [9]:
stream_per_users["longest_streak"].describe()

count    4984.000000
mean        1.574037
std         2.561649
min         1.000000
25%         1.000000
50%         1.000000
75%         1.000000
max        62.000000
Name: longest_streak, dtype: float64

In [10]:
stream_per_users[stream_per_users['user']=='kecso1g']

Unnamed: 0,user,stream,stream_count,longest_streak
3013,kecso1g,"[2, 4, 7, 10, 13, 16, 23, 31, 36, 37, 40, 44, ...",22,9


In [11]:
stream_per_users[stream_per_users['stream_count']>= 43]

Unnamed: 0,user,stream,stream_count,longest_streak
21,1206paul_,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",62,62
153,Aloddin,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14,...",56,18
155,Aluminiumminimumimmunity,"[9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22...",51,42
215,Banties1g,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",57,35
626,IRLKingsman,"[9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21...",48,35
651,IvanOnMyOwn,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",58,38
652,Ivana_10,"[0, 1, 2, 3, 4, 5, 6, 8, 9, 11, 12, 13, 14, 15...",45,7
680,Jeddix,"[0, 1, 2, 3, 6, 7, 11, 12, 13, 15, 16, 17, 18,...",49,20
793,LX212,"[0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 1...",43,12
871,Martin_Gales,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",61,36


In [12]:
user_watche_these = stream_per_users[stream_per_users['user']=='balintboss']['stream']

for s in user_watche_these:
    print(s)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61]
