In [30]:
import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np

# Read the list of filenames from the configuration file
with open('file_list.txt', 'r', encoding='utf-8') as config_file:
    file_names = config_file.read().splitlines()

# Regex pattern to match the data format
pattern = r'\[(.*?)\] (.*?): (.*)'


# Initialize an empty list to store parsed data
datalist = []
stream_count = 0
# Iterate over each specified file
for file in file_names:
    full_path = "data\\"+file
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            match = re.match(pattern, line)
            if match:
                date, user, message = match.groups()
                datalist.append([date, user, message,stream_count])
    stream_count = stream_count + 1

# Create a DataFrame from the parsed data
data = pd.DataFrame(datalist, columns=["date", "user", "message","stream"])

In [31]:
data = data[data.apply(lambda row: row['user'].lower() in row['message'].lower(), axis=1)]


In [32]:
data.count()

date       2840
user       2840
message    2840
stream     2840
dtype: int64

In [33]:
data.head(25)

Unnamed: 0,date,user,message,stream
100,2024-05-01 15:13:16 UTC,kloakpojken,4 raiders from kloakpojken have joined!,0
120,2024-05-01 15:14:42 UTC,yusuf2774,yusuf2774 is gifting 1 Tier 1 Subs to dorozea'...,0
121,2024-05-01 15:14:43 UTC,yusuf2774,yusuf2774 gifted a Tier 1 sub to ChickenBurgah...,0
210,2024-05-01 15:26:58 UTC,iborotti_06,iborotti_06 subscribed with Prime.,0
566,2024-05-01 16:29:11 UTC,x3lolx,x3lolx is gifting 1 Tier 1 Subs to dorozea's c...,0
567,2024-05-01 16:29:11 UTC,x3lolx,x3lolx gifted a Tier 1 sub to ArtMassong!,0
568,2024-05-01 16:30:00 UTC,x3lolx,x3lolx is gifting 1 Tier 1 Subs to dorozea's c...,0
569,2024-05-01 16:30:00 UTC,x3lolx,x3lolx gifted a Tier 1 sub to PrachtkerleDE!,0
575,2024-05-01 16:31:10 UTC,x3lolx,x3lolx is gifting 1 Tier 1 Subs to dorozea's c...,0
576,2024-05-01 16:31:10 UTC,x3lolx,x3lolx gifted a Tier 1 sub to lubb1x!,0


In [34]:
giftedData = data[data['message'].str.contains(' sub ', case=False, na=False)]

In [35]:
gifterData = data[data['message'].str.contains(' is gifting ', case=False, na=False)]
gifterData.tail(25)

Unnamed: 0,date,user,message,stream
318973,2024-09-05 14:54:31 UTC,xan_gny,xan_gny is gifting 1 Tier 1 Subs to dorozea's ...,104
319153,2024-09-05 15:27:28 UTC,shenxir,shenxir is gifting 5 Tier 1 Subs to dorozea's ...,104
319173,2024-09-05 15:28:45 UTC,xan_gny,xan_gny is gifting 1 Tier 1 Subs to dorozea's ...,104
319209,2024-09-05 15:32:52 UTC,xan_gny,xan_gny is gifting 3 Tier 1 Subs to dorozea's ...,104
320421,2024-09-05 17:48:07 UTC,slmonkaq8,slmonkaq8 is gifting 10 Tier 1 Subs to dorozea...,104
323579,2024-09-06 14:25:14 UTC,fyodor_m_d1821,fyodor_m_d1821 is gifting 1 Tier 1 Subs to dor...,105
324738,2024-09-06 15:57:16 UTC,waterlem_n,waterlem_n is gifting 1 Tier 1 Subs to dorozea...,105
328549,2024-09-08 12:31:23 UTC,shenxir,shenxir is gifting 5 Tier 1 Subs to dorozea's ...,107
330787,2024-09-08 15:25:10 UTC,xan_gny,xan_gny is gifting 1 Tier 1 Subs to dorozea's ...,107
332818,2024-09-09 12:34:21 UTC,xan_gny,xan_gny is gifting 1 Tier 1 Subs to dorozea's ...,108


In [36]:
gifterData['message'].head(1)

120    yusuf2774 is gifting 1 Tier 1 Subs to dorozea'...
Name: message, dtype: object

In [37]:
# Extract the number after the word "gifting"
gifterData['gifts'] = gifterData['message'].apply(lambda x: int(re.search(r'gifting\s(\d+)', x).group(1)))

# Display the dataframe with the extracted gifts
print(gifterData[['message', 'gifts']])

                                                  message  gifts
120     yusuf2774 is gifting 1 Tier 1 Subs to dorozea'...      1
566     x3lolx is gifting 1 Tier 1 Subs to dorozea's c...      1
568     x3lolx is gifting 1 Tier 1 Subs to dorozea's c...      1
575     x3lolx is gifting 1 Tier 1 Subs to dorozea's c...      1
660     x3lolx is gifting 1 Tier 1 Subs to dorozea's c...      1
...                                                   ...    ...
340150  Riyadh_z21 is gifting 100 Tier 1 Subs to doroz...    100
340327  Aluminiumminimumimmunity is gifting 5 Tier 1 S...      5
340360  fyodor_m_d1821 is gifting 1 Tier 1 Subs to dor...      1
342608  IIISaboIII is gifting 25 Tier 1 Subs to doroze...     25
342716  IIISaboIII is gifting 10 Tier 1 Subs to doroze...     10

[236 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gifterData['gifts'] = gifterData['message'].apply(lambda x: int(re.search(r'gifting\s(\d+)', x).group(1)))


In [38]:
giftedData.head(50)

Unnamed: 0,date,user,message,stream
121,2024-05-01 15:14:43 UTC,yusuf2774,yusuf2774 gifted a Tier 1 sub to ChickenBurgah...,0
567,2024-05-01 16:29:11 UTC,x3lolx,x3lolx gifted a Tier 1 sub to ArtMassong!,0
569,2024-05-01 16:30:00 UTC,x3lolx,x3lolx gifted a Tier 1 sub to PrachtkerleDE!,0
576,2024-05-01 16:31:10 UTC,x3lolx,x3lolx gifted a Tier 1 sub to lubb1x!,0
661,2024-05-01 16:41:16 UTC,x3lolx,x3lolx gifted a Tier 1 sub to alaskalyeska!,0
1076,2024-05-01 17:54:29 UTC,Bizzan,Bizzan gifted a Tier 1 sub to shotashellz!,0
1077,2024-05-01 17:54:29 UTC,Bizzan,Bizzan gifted a Tier 1 sub to aabeemick!,0
1078,2024-05-01 17:54:29 UTC,Bizzan,Bizzan gifted a Tier 1 sub to gantl4!,0
1079,2024-05-01 17:54:29 UTC,Bizzan,Bizzan gifted a Tier 1 sub to Minaqua__!,0
1080,2024-05-01 17:54:29 UTC,Bizzan,Bizzan gifted a Tier 1 sub to FeiFuchs!,0


In [39]:
giftedData.describe()

Unnamed: 0,stream
count,1451.0
mean,65.745003
std,30.511181
min,0.0
25%,49.0
50%,61.0
75%,95.0
max,110.0


In [40]:
unique_gifter = giftedData['user'].nunique()
print("Number of unique users:", unique_gifter)

Number of unique users: 96


In [41]:
#prime 

prime_sub = data[data['message'].str.contains('Prime', case=False, na=False)]

prime_sub.head(5)


Unnamed: 0,date,user,message,stream
210,2024-05-01 15:26:58 UTC,iborotti_06,iborotti_06 subscribed with Prime.,0
866,2024-05-01 17:19:54 UTC,ze1nar,ze1nar subscribed with Prime. They've subscrib...,0
3460,2024-05-02 17:07:26 UTC,blakbilt,blakbilt subscribed with Prime. They've subscr...,1
5061,2024-05-03 15:10:03 UTC,ammoke,ammoke subscribed with Prime.,2
6387,2024-05-03 17:55:56 UTC,domjel12,domjel12 subscribed with Prime.,2


In [42]:
trueSub = data[~data['message'].str.contains('Prime', case=False, na=False)]

trueSub = trueSub[~trueSub['message'].str.contains('raiders', case=False, na=False)]

trueSub = trueSub[~trueSub['message'].str.contains('gifting', case=False, na=False)]

trueSub = trueSub[~trueSub['message'].str.contains('gifted', case=False, na=False)]

trueSub = trueSub[trueSub['message'].str.contains(' subscribed at ', case=False, na=False)]

tier1 = trueSub[trueSub['message'].str.contains('Tier 1', case=False, na=False)]
tier2 = trueSub[trueSub['message'].str.contains('Tier 2', case=False, na=False)]
tier3 = trueSub[trueSub['message'].str.contains('Tier 3', case=False, na=False)]

In [43]:
tier1.describe()

Unnamed: 0,stream
count,409.0
mean,63.337408
std,32.188379
min,0.0
25%,36.0
50%,64.0
75%,92.0
max,111.0


In [44]:
tier2.describe()

Unnamed: 0,stream
count,5.0
mean,65.6
std,34.282649
min,28.0
25%,38.0
50%,63.0
75%,89.0
max,110.0


In [45]:
tier3.describe()

Unnamed: 0,stream
count,15.0
mean,64.133333
std,34.823774
min,7.0
25%,40.5
50%,67.0
75%,93.5
max,110.0


In [46]:
tier2.head(10)

Unnamed: 0,date,user,message,stream
82963,2024-05-31 19:40:14 UTC,IRLKingsman,IRLKingsman subscribed at Tier 2.,28
106885,2024-06-11 14:25:52 UTC,IRLKingsman,IRLKingsman subscribed at Tier 2. They've subs...,38
180346,2024-07-14 13:29:43 UTC,IRLKingsman,IRLKingsman subscribed at Tier 2. They've subs...,63
266775,2024-08-20 13:40:20 UTC,IRLKingsman,IRLKingsman subscribed at Tier 2. They've subs...,89
340441,2024-09-11 12:19:46 UTC,IRLKingsman,IRLKingsman subscribed at Tier 2. They've subs...,110


In [47]:
tier3.head(10)

Unnamed: 0,date,user,message,stream
19054,2024-05-09 15:17:05 UTC,Substen,Substen subscribed at Tier 3. They've subscrib...,7
27607,2024-05-12 16:08:02 UTC,reflectsiam,reflectsiam subscribed at Tier 3.,10
42158,2024-05-19 15:30:38 UTC,reflectsiam,reflectsiam subscribed at Tier 3. They've subs...,16
98603,2024-06-07 14:07:45 UTC,Substen,Substen subscribed at Tier 3. They've subscrib...,35
137050,2024-06-20 15:45:38 UTC,reflectsiam,reflectsiam subscribed at Tier 3. They've subs...,46
168261,2024-07-08 13:18:25 UTC,Substen,Substen subscribed at Tier 3. They've subscrib...,58
184738,2024-07-15 15:07:18 UTC,yigit_ivan07,yigit_ivan07 subscribed at Tier 3.,64
190456,2024-07-19 12:50:39 UTC,reflectsiam,reflectsiam subscribed at Tier 3. They've subs...,67
209770,2024-07-25 15:59:03 UTC,Juhail,Juhail subscribed at Tier 3. They've subscribe...,73
249027,2024-08-07 12:16:40 UTC,Substen,Substen subscribed at Tier 3. They've subscrib...,85


In [48]:
prime_sub[prime_sub['user'] == 'balintboss']

Unnamed: 0,date,user,message,stream
39336,2024-05-18 15:07:03 UTC,balintboss,balintboss subscribed with Prime. They've subs...,15
135696,2024-06-20 12:58:15 UTC,balintboss,balintboss subscribed with Prime. They've subs...,46
194206,2024-07-20 14:37:00 UTC,balintboss,balintboss subscribed with Prime. They've subs...,68
267472,2024-08-20 14:57:06 UTC,balintboss,balintboss subscribed with Prime. They've subs...,89


In [49]:
raids = data[data['message'].str.contains(' raiders from ', case=False, na=False)].copy()
raids['num'] = raids['message'].str.split().str[0]
raids['num'] = pd.to_numeric(raids['num'], errors='raise')
raids.head(50)

Unnamed: 0,date,user,message,stream,num
100,2024-05-01 15:13:16 UTC,kloakpojken,4 raiders from kloakpojken have joined!,0,4
3627,2024-05-02 17:16:16 UTC,gkey,125 raiders from gkey have joined!,1,125
21089,2024-05-09 19:37:36 UTC,gkey,92 raiders from gkey have joined!,7,92
21323,2024-05-10 15:18:35 UTC,kloakpojken,5 raiders from kloakpojken have joined!,8,5
25548,2024-05-11 20:44:41 UTC,kloakpojken,11 raiders from kloakpojken have joined!,9,11
35708,2024-05-16 17:18:46 UTC,gkey,466 raiders from gkey have joined!,13,466
46077,2024-05-20 17:52:20 UTC,gkey,920 raiders from gkey have joined!,17,920
49925,2024-05-22 13:47:49 UTC,dima_wallhacks,952 raiders from dima_wallhacks have joined!,19,952
58991,2024-05-25 18:05:29 UTC,gkey,613 raiders from gkey have joined!,22,613
64632,2024-05-27 18:35:31 UTC,mbXtreme,115 raiders from mbXtreme have joined!,24,115


In [50]:
raids['num'].describe()

count      50.000000
mean      231.740000
std       351.224785
min         4.000000
25%        13.000000
50%        88.500000
75%       225.250000
max      1289.000000
Name: num, dtype: float64