In [1]:
import pandas as pd
import re

# Read the list of filenames from the configuration file
with open('file_list.txt', 'r', encoding='utf-8') as config_file:
    file_names = config_file.read().splitlines()

# Regex pattern to match the data format
pattern = r'\[(.*?)\] (.*?): (.*)'


# Initialize an empty list to store parsed data
datalist = []
stream_count = 0
# Iterate over each specified file
for file in file_names:
    full_path = file
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            match = re.match(pattern, line)
            if match:
                date, user, message = match.groups()
                datalist.append([date, user, message,stream_count])
    stream_count = stream_count + 1

# Create a DataFrame from the parsed data
data = pd.DataFrame(datalist, columns=["date", "user", "message","stream"])

In [2]:
# Group by 'stream' and aggregate unique users
users_per_stream = data.groupby("stream")["user"].unique().reset_index()

# Display the result
print(users_per_stream)

    stream                                               user
0        0  [Martin_Gales, StreamElements, Aloddin, utaBz,...
1        1  [StreamElements, Martin_Gales, Zeololz, NSAPar...
2        2  [Martin_Gales, StreamElements, sbeedy20, Banti...
3        3  [Martin_Gales, StreamElements, swedenhobohealt...
4        4  [Martin_Gales, StreamElements, jqqq777, SeeMeN...
..     ...                                                ...
69      69  [StreamElements, Martin_Gales, Aluminiumminimu...
70      70  [Martin_Gales, StreamElements, balintboss, Alu...
71      71  [Martin_Gales, StreamElements, Aluminiumminimu...
72      72  [Martin_Gales, balintboss, ch1ck3n_b0y, pavle9...
73      73  [StreamElements, Aluminiumminimumimmunity, bal...

[74 rows x 2 columns]


In [3]:
# Group by 'users' and aggregate unique streams
stream_per_users = data.groupby("user")["stream"].unique().reset_index()

# Display the result
print(stream_per_users)

              user                            stream
0        00001joel                            [4, 5]
1        00ramez00                              [14]
2      04lennart04                              [72]
3         062_davi                              [37]
4     082274699958                              [55]
...            ...                               ...
6035       豪軒99999                              [71]
6036       게이머2334                              [45]
6037           덕충2  [42, 43, 44, 45, 46, 54, 56, 57]
6038      쌍베님사랑합니다                              [73]
6039         하이드로른                          [70, 71]

[6040 rows x 2 columns]


In [4]:
stream_per_users["stream_count"] = stream_per_users["stream"].apply(len)

In [5]:
stream_per_users.head(5)

Unnamed: 0,user,stream,stream_count
0,00001joel,"[4, 5]",2
1,00ramez00,[14],1
2,04lennart04,[72],1
3,062_davi,[37],1
4,082274699958,[55],1


In [6]:
stream_per_users['stream_count'].describe()

count    6040.000000
mean        3.049007
std         5.625467
min         1.000000
25%         1.000000
50%         1.000000
75%         3.000000
max        74.000000
Name: stream_count, dtype: float64

In [7]:
# Function to calculate the longest streak of consecutive streams
def longest_streak(streams):
    sorted_streams = sorted(streams)
    max_streak = 1
    current_streak = 1
    
    for i in range(1, len(sorted_streams)):
        if sorted_streams[i] == sorted_streams[i-1] + 1:
            current_streak += 1
        else:
            max_streak = max(max_streak, current_streak)
            current_streak = 1
    
    return max(max_streak, current_streak)

In [8]:
# Add a column with the longest streak of consecutive streams
stream_per_users["longest_streak"] = stream_per_users["stream"].apply(longest_streak)

# Display the result
stream_per_users.head(5)

Unnamed: 0,user,stream,stream_count,longest_streak
0,00001joel,"[4, 5]",2,2
1,00ramez00,[14],1,1
2,04lennart04,[72],1,1
3,062_davi,[37],1,1
4,082274699958,[55],1,1


In [9]:
stream_per_users["longest_streak"].describe()

count    6040.000000
mean        1.554967
std         2.660815
min         1.000000
25%         1.000000
50%         1.000000
75%         1.000000
max        74.000000
Name: longest_streak, dtype: float64

In [10]:
stream_per_users[stream_per_users["stream_count"] > 3]["stream_count"].describe()

count    1133.000000
mean       10.204766
std        10.190348
min         4.000000
25%         5.000000
50%         6.000000
75%        11.000000
max        74.000000
Name: stream_count, dtype: float64

In [11]:
stream_per_users[stream_per_users['user']=='Martin_Gales']

Unnamed: 0,user,stream,stream_count,longest_streak
1032,Martin_Gales,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",72,36


In [12]:
stream_per_users[stream_per_users['stream_count']>= 43]

Unnamed: 0,user,stream,stream_count,longest_streak
25,1206paul_,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",74,74
194,Aloddin,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14,...",68,19
197,Aluminiumminimumimmunity,"[9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21...",64,62
268,Banties1g,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",67,35
385,Cuhhsin,"[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3...",45,27
674,Grumsee,"[0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14...",44,7
749,IRLKingsman,"[9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20...",57,46
779,IvanOnMyOwn,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",68,38
780,Ivana_10,"[0, 1, 2, 3, 4, 5, 6, 8, 9, 11, 12, 13, 14, 15...",50,7
810,Jeddix,"[0, 1, 2, 3, 6, 7, 11, 12, 13, 15, 16, 17, 18,...",58,25


In [13]:
user_watche_these = stream_per_users[stream_per_users['user']=='balintboss']['stream']

for s in user_watche_these:
    print(s)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73]
