In [1]:
import pandas as pd
import re
from collections import Counter
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
import pytz

In [2]:
# Read the list of filenames from the configuration file
with open('../file_list.txt', 'r', encoding='utf-8') as config_file:
    file_names = config_file.read().splitlines()

# Regex pattern to match the data format
pattern = r'\[(.*?)\] (.*?): (.*)'

# Initialize an empty list to store parsed data
datalist = []
stream_count = 0
# Iterate over each specified file
for file in file_names:
    full_path = f"../data/{file}"
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            match = re.match(pattern, line)
            if match:
                date, user, message = match.groups()
                datalist.append([date, user, message,stream_count])
    stream_count = stream_count + 1

# Create a DataFrame from the parsed data
data = pd.DataFrame(datalist, columns=["date", "user", "message","stream"])


In [3]:

data['date'] = pd.to_datetime(data['date'])


In [4]:

def convert_utc_to_cet(df, date_column='date'):
    """
    Convert UTC timestamps to Central European Time (CET/CEST) with proper DST handling
    
    Parameters:
    df (pd.DataFrame): DataFrame containing the date column
    date_column (str): Name of the column containing UTC timestamps
    
    Returns:
    pd.DataFrame: DataFrame with converted timestamps
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Ensure timestamps are UTC aware
    if df[date_column].dt.tz is None:
        df[date_column] = df[date_column].dt.tz_localize('UTC')
    elif df[date_column].dt.tz != pytz.UTC:
        df[date_column] = df[date_column].dt.tz_convert('UTC')
    
    # Convert to CET/CEST (Europe/Berlin includes proper DST handling)
    df[date_column] = df[date_column].dt.tz_convert('Europe/Berlin')
    
    return df



In [5]:
data = convert_utc_to_cet(data)

In [None]:
data["user"] = data["user"].replace("Banties1g", "banties_x")
data["user"] = data["user"].replace("banties1g", "banties_x")
data["user"] = data["user"].replace("chili_poe", "chili_con_bacon")
data["user"] = data["user"].replace("CHILI_POE", "chili_con_bacon")
data["user"] = data["user"].replace("chili_conbacon", "chili_con_bacon")
data["user"] = data["user"].replace("Wirelesss_", "W1r3lesss")
data["user"] = data["user"].replace("treklul", "trek44_")
data["user"] = data["user"].replace("ttrek_", "trek44_")
data["user"] = data["user"].replace("trek_x", "trek44_")
data["user"] = data["user"].replace("TriplesingleJ", "TripleSingleJames")
data["user"] = data["user"].replace("uuccugr", "uwu_cougar")
data["user"] = data["user"].replace("uuccugr", "uuccugr_")
data["user"] = data["user"].replace("StanIV4_", "stan_iv4")
data["user"] = data["user"].replace("Muuskie2", "Muuskie")
data["user"] = data["user"].replace("nishad_more1311", "nishad13")
data["user"] = data["user"].replace("softarballt", "softarr")
data["user"] = data["user"].replace("softarballtt23", "softarr")
data["user"] = data["user"].replace("bonkwiththefunk", "bonk67")


In [7]:

# Get all unique usernames
unique_users = data['user'].unique()

# Create a mapping from lowercase username to all variants

user_variants = defaultdict(set)
for user in unique_users:
    user_variants[user.lower()].add(user)

# Find usernames with different capitalization
duplicate_users = {k: v for k, v in user_variants.items() if len(v) > 1}


In [8]:
# Create a mapping from all variants to the canonical (sorted first) variant
variant_map = {}
for variants in duplicate_users.values():
    sorted_variants = sorted(variants)
    canonical = sorted_variants[0]
    for v in variants:
        variant_map[v] = canonical

# Replace usernames in 'user' column
data['user'] = data['user'].apply(lambda u: variant_map.get(u, u))

In [9]:
# 7tv list
tv7_emotes = """omeFaded nonono wideSpeedFear wideSpeedDesert SpeedOmg Disappointed ME? ome13 doroPfft CONFUSED doro52 wideSpeedConcerned doroBANGER doroVibe DoroBrainPLug NAHHHH intrigued oda WOOF BANG peepoFirework Gladge omeMoji SobBounce belka doroHips dorochan doroXi DOMEYES DOWAY FRENCH YeahIGetIt doroPls Offline sheMightBeRight 89 dwerk wideGkeyDance3 76 xqcCheer omeCheer Dmile zoro LMAO iguessbro doroScary haii SON omeBru wideSpeedCall plink laracroft ADHD yipe doroWaiting 67 FOWL doroLOL OfCourse tyler67 THYME peepoLost speed61 GOODMOOD Jercula ILOVECS CLOWN thatsCrazy xqcBleh omEING wideSpeedLaugh15 WHATTHEHELI RickyDicky jonkler wideSpeedPumpkin wideSpeedLaugh16 SPIDER chillCat LEEKED soblaugh BANANA wideSpeedLaugh21 Binoculars footstepmenace ome99 doroBanger GhankYou NO YES auntiePls OMEGADANCEBUTGHAST gkeyRide GENERATINGGODSEED perimeter sideeye omePfft MySunshine posture ??? twocatsfightingonacouch dSmile wideSpeedLaugh4 flirtt Suspicion zigzag YeatCat nom omeSad shomonting thinking agafat FirstTimeClanka DORVIS gasp RISING dd uncPLS domgBruh Deafge OMEGADANCEBUTFASTER kim3 Buggin speed8 .... d32 ome54 ohok minionBike Clown Explosion hackingCD JermaSoy MathTime MoneyRain PokiShare TakingNotes :0 :3 :33 :tf: !boost +1 0pixel 1DLove 3Head 3Heading 4House 4Shrug 4Weird 5Head AAAA Acknowledged ACTINUP ADHD agahi AIM AIRBALL AIWITHTHEBRAIDS Alarm ALE Alfred AlienDance AlienPls AlienPls2 AlienPls3 Aloo Alright amongE ANGRE ANOTHERONE Ant AREYOUAGIRL AREYOUAGIRLFtxQcYellingAtYou areyoufr AreYouSeriousRightNeow arnoldHalt arthur Assept AURA AwHellNah Aware AWOO AWOOGA axelF ayo bah BAND Banger BANGER banties Barack barryArrive Barry63 BantiesPaulBeef Based BASED BatChestAbove batman Batman batJAM batPls Beatles Bedge BEG BEGGING Bello BigD bieberDougie BELIEVERS BibleThump BINGO Bleh Bloons BOOBA bog BOOM BOOMIES BOINK BORGIR Borfday brbToilet Broadcaster brb Bruh BRUHMM bruv buh buhbye buhFlipExplode BUSSIN BUSSERS bye CanIHaveADollar cannySilly catAsk catBusiness CatCozy catDespair catEat catJAM catKiss catPls catSigh catSmash CatTime catTwerk CAUGHT Caught CaughtIn4K Celebrating CHADDING characterSelected CHATTERS chatting cheerleaders ChillGuy Chillin Cinema chilling clappi Clap classic CLEAN Clueless CLIPPERS CLOWNDETECTED COCKA cokeBreak COMEHERE Concerned Considering Cooked COPIUM crabPls Crunch CS2 Cuck Cuh D: damily Damn dansi dash Dave deadassFaint Delusional DemonTiming Dentge despair DespairRyan Devious DIESOFCRINGE Dime DinkDonk Dinema doggoSlava dogJAM DogLookingWickedAndCool doid dojaPls dome44 dome32 donowall donoWall doroAunt doroBleh dorobubu doroCD DoroCheer doroFiddy doroFlex doroGHOST doroHEAD doroKick doroL doroMAD doroPray doroRip doroSoy DoroTalkingAgain Dorozea doster DOUBTERS DRAIN Drake DRAMA dreamwastaken drooling drukiDnace drukiDnace2 duaKiss dudWhat EDGE EDM EDITING emo erinNya essaying ewphop eww EZ EZdodge Exerpas Explosion eyeroll fadedthanaho FARMING FeelsBadMan FeelsDankMan FeelsBlackScreen FeelsGladMan FeelsLagMan FeelsLateMan FeelsOkayMan FeelsStrongMan FeelsTiredMan FeelsWeirdMan FeelsWowMan fein FEINFEINFEINFEINFEINFEINFEINFEI FellOff fembajJAM Fiddy FiddyWtf FINALLY firewriting FirstTime FirstTimeBackseating FirstTimeChadder FirstTimeChatter FirstTimeEmoteFail FirstTimeGooner FirstTimePepega FirstTimeTest firsttimebuh FLASHBANG flightnotL Flirt Flushed fnaf footstep forsenCD forsenLaughingAtYou ForsenSingingAtYou forsenPls fortnite fr freakbob freakyfredday freddy Freedom FUNNY g32 GAGAGA gachiGASM gachiHYPER gamily GAMBA GameplayTime GAMING GatieG Gaught GENIUS GetALoadOfThisGuy gg GIGACHAD GIGACHAIR GIGACLAUS GIGAMODS GIGAMOD gigl gkeyFlip gkeyPregnantBounce gkeySMP gkeyUwu gkeywide gkeyWiding gkitten GivenUp girlBoss gkitten glorpaga glorpdetective glorp GlorpMeeting glorprave gmoney goaler goat goblin44 GODDID Gogging GoodBye Gooner gooner GoodTake GOONING GotCaughtTrolling GotEEM gothKiss gPls greetingsladies GREEDY GROOTING GRRR GULP GuitarTime GYAT HABIBI hackingCD HACKERMANS hai HAH HaltEinfachDeineFresseDuHurensohn HandsUp happi HARAM HarryStylesKiss Headbang healed HECOOKING heh HEHE HEHEHEHA Heisenberj HELLO HELP helvete Herewego hesRight heyywithrizz HEYYY hi hiii hiiii Hmm HOBBY HOLY HolyFuck homelessPOV HowDoWeTellHer HowDoWeTellHim hue HUH HUHHHHHHHHHH iAsked ICANT idiot iDrive IFISPEAK IfYouCantSeeThisEmoteUseExclamationMark7tv Ignored IGON imback IMAGINENOTHAVING7TVGETFUCKEDNON7TVUSERSIMAGINENOTHAVING7TVGETFUCKEDNON7TVUSERSIMAGINENOTHAVING7TVGET ImNotOk ImtiredBoss INTENSEGAMING islandboy ISeeYou itsover itstime Jackass jacob1 jacob2 jacob3 jacob4 jah Jammies JARVIS Jay JermaSoy jiggy job JOB Joel joever john Johnporkiscalling JokerHAHA JokerLaugh juh JumpScared JUMPSCARE JustAChillGuy JustAnotherDay JustHowItIs justinbieber KaiCenatOhiogyatwithskibiditoiletwatchingtheWrizzhappeningrightinfrontofhimwithfanumtaxtaxingthegyat KanyeStare KEKW KENOUGH KeyShaker kim3 kittyBANGER kittyBop KKalinka KKonaW KKool kratos Lamonting LastTimeChatter lava lebronArrive lebronJAM lebronTROLL LEBRONNN lemon Lemon LetsBingo LETHERCOOK LETSFUCKINGJOE LETSGO LieMeter life Life Listening LiterallyMe Lithuanian LittleTrolling LiveReaction LL LMAOFREAKY lmao Loading LOCKIN lockedin LOL Looking LookUp lore luh lurkk luton LULE LULW MAJ Madge ManchesterUnited Massive? MarblesTime Martin matSad maxwin MeRN me: MeWhenIBuyEgyptianProperty MEGALUL mee Memories merch mhm MicTime mikuPls mindloud modCheck ModAbuse MODDING Modding mods MODS Mog monakS monday monkeyListening monkeySip MONKA monkaTOS monkaW MONKE MinionHoting MoneyRain muted mutted MUGA MVP MVPOfFarallah MYHEARTILOVEDHER myIQ MYLIFE NAILS NAILSING NAHH NAHHH NAHHHH nananAYAYA NAUR NAvsEU Nerd niceguy NOCHECKMARKS NODDERS NOIDONTTHINKSO NoMaidens NOOPERS NOOOOO NOHORNY noonecares NOSHOT NOTED notListening notxqcL NOW NOWAY NOWAYING np nt nuhuh nyehehehe nyanPls nya o7 Ogre ohhh ohhhhhhhhh OHMYGAWDD ohneFinger ohno ohSHIT oj OK Okei okak OLDWORK OM om omE ome10 ome101 ome104 ome105 ome14 ome15 ome18 ome21 ome29 ome32 ome4 ome41 ome44 ome44444444 ome47 ome5 ome51 ome52 ome55 ome57 ome67 ome69 ome79 ome808 ome83 ome9 ome90 ome96 ome99 OMEGADANCE OMEGALUL OMEGALULiguess omEE omeJAM omeJudging omeOhSHIT omeScrajj omeStare OMEYES omeWiggle OMFG omgBruh ongang OneGuy ONEMORE OnMyWayToDoroMomHouse OOOO oopsie otag OuttaPocket OVERWATCH OVERWORKING OverwhelminglyWholesome owoCheer PagBounce PagChomp PagMan Panam parasocial Parasocial PARASOCIAL paris paul Paul paulNya PauseMan PAUSENEMOGU Peace PEEPEES peepoAds peepoBox peepoBelievers peepoClap peepoComfy peepoDJ peepoDoubters peepoEvil peepoFarmer peepoFat peepoGiggles peepoHappy peepoHey peepoHug peepoKiss peepoLeave peepoLegs peepoLove peepoMarch peepoPls peepoPride peepoRiot peepoSad peepoShy peepoSmile peepoStop peepoTalk pepeAgony pepeGun PepeHands pepeJAM PepeLaugh pepePoint PepePls pepeW Pepega PepegaAim PepegaChat PepegaReading PepoG Petter Pffttt Pffttt2 phew phpk pickle PianoTime Pipege pKitten pL Please pleading plink-182 plinkVibe plonk pmo Plotge PogO PogU pointless pokiFlirt pol POLICE Pondering popipopipipopipo poroPls POVbornbefore2000 ppHop ppL ppOverheat Prayge prePffttt PRIMERS PTSD pulNya PuzzleTime qq ragebait RAGEY RAHH RainTime RAMBOLMG RareParrot ratomilton RaveDance RaveTime ratio Reacting RealForsen ReallyMad RebeccaBlack Reddit RememberTheDays RibertJam RiddleMeThis RIPBOZO RIRI Rizzler RobertJam ROFL RoxyPotato RUNNING rt ryanArrive Sadding Sadge SADge SAJ SAVEME SCATTER saythatagain scawy SCHEISSE SCHIZO SCRAPETHATSHITJOHNNY SCHTOP sdd SERIOUSLY SEXO shogaNya Shits shutup Shruge silliness sisyphus Sippin Sits skip SLAY Sleepo Smile smh Smoge SmurfHey Smurfing SNACKING SNEAK SNIFFA sob SOLARFLARE songbird sotruebestie SOYSCREAM Speechless speed1 speed2 speed21 speed25 speed32 speed4 speed44 speed8 speed88 speedVibe spfLEAN:()wiltee_()tonyhawkproskater4:-:-:_FREEWAVE3-encinoman--:enteringwalmart:-wheezethelean-123 SpeedLaugh SpeedLeft SpeedR spongePls squadHips Stare Staring steve Steve SteerR StreamEnding STREAMER STREAMERSGIVINGTHEWORSTFUCKINGTAKESINEXISTENCE StoryTime Surfing SurE sus susDog Susge SUSSY Swag swagJAM ta tak TakingNotes TeamEDWARD test THATHIT ThatsJustMe ThePaulers TheVoices TheWolfInMe Thinking Thinking2 ThisChat ThisIsMinecraft TIMEOUT Tomfoolery totallylistening TriJam TriKool TriSad TRUEING TRIVSsorry ts Tuckge tuff TWEAK typeshit typhu UGH um UltraMad unibrow unemployment unmod uwu uuh VALORANT veryDoro VeryKey VeryPog VeryPogftxQcInTheShower vibePls VibePls VIDEOGAME VIEWERS vips Voices wade Waddup waga wah waiting Waiting WAHHH WAJAJA WAIT WAITWAITWAIT WakeTheFuckUpSamuraiWeHaveACityToBurn wallE waltuh walterShocked WalterVibe War WasZumPenis WATAFUCKEDUPDAY WatchingStream WAYTOODANK wdym WeAreLive WeDoNotCare WEDIDIT WEEWOO WeGood WePaid WHATAFUCKEDUPDAY WHAT WHATTT wheresmyhug Whenyourinnerwolfreleases WideAlERT WideCatGroove wideDvaAss WideHardo WidelebronJAM widemonkaGIGAftRobertDowneyJr wideprespeedlaugh WideRaveTime wideReacting wideSpeedLaugh3 widetime WidezyzzPls wig WineTime winton Wisdom woah Wokege WOT wot wrapitup WW wowie Xd xar2EDM xdd XDoubt xJAM xqc32 xqcBOZO xqcDespair xqcFuel xqcGoofy xqcL xqcSCHIZO xqcSlam xqcTake xqcTwerk xQcVeryWide YAAAY YamesBond YANITED YAPPING YeahThatsWhatIWouldaDid YEAHHH YEP YESS YIPIEE yonose Yoink YOOLOOKATTHISCATDOINITSLILDANCYDANCEINTOABREAKDANCEMOVE Yooo YOUDIED YouGotMe YouWouldntGetIt ZAMN ZhongXina zyzzBass zyzzJAM"""

# Convert to list and clean 
emote_list = [emote.strip() for emote in tv7_emotes.split() if emote.strip()]

# Remove any remaining duplicates (if any)
unique_emotes = sorted(list(set(emote_list)))

print(f"Total unique emotes: {len(unique_emotes)}")

# Create the final shortened list
final_emote_list = unique_emotes

Total unique emotes: 1040


july_2025:

In [10]:
# Filter only July 2025
july_2025 = data[(data["date"].dt.year == 2025) & (data["date"].dt.month == 7)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in July 2025
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2025) & (first_messages["date"].dt.month == 7)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in July 2025: {num_new_chatters}")
stream_counts = july_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in July 2025: {len(stream_counts)}")
print(f"Number of Messages in July 2025: {july_2025.shape[0]}")
print(f"Number of Users in July 2025: {july_2025['user'].nunique()}")
user_counts = july_2025['user'].value_counts().reset_index()
user_counts.sort_values('count').tail(10)

# Count number of messages per user in July 2025
user_counts = july_2025['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 3 users
top_10_users = user_counts.head(10)

print(top_10_users)

# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in july_2025["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 3 emotes
top_5_emotes = emote_counter.most_common(5)

print("Top 5 emotes in July 2025:")
for emote, count in top_5_emotes:
    print(f"{emote}: {count} times")

# Count all words from July messages
word_counter = Counter()

for message in july_2025["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("Top 3 words in July 2025:")
for emote, count in top_3_words:
    print(f"{emote}: {count} times")

# Make a copy to avoid SettingWithCopyWarning
july_2025 = july_2025.copy()

# Round timestamps to nearest 5-minute interval
july_2025["5min"] = july_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = july_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("Top 5 busiest 5-minute intervals in July 2025:")
print(top_5_fastest)
# Group by stream and compute message counts and time range
stream_stats = july_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("Stream with the highest messages per minute:")
print(fastest_stream)

# Get all unique stream IDs in July
all_streams = set(july_2025["stream"].unique())
# Group by user and get the set of streams each user chatted in
user_streams = july_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("chatters who chatted in every stream in July 2025:")
print(users_in_every_stream)


Number of new chatters in July 2025: 4625
Number of Streams in July 2025: 26
Number of Messages in July 2025: 179825
Number of Users in July 2025: 9840
          user  message_count
0     BenXBari           8624
1     JBIN2036           7848
2      trek44_           6030
3  GEORGIE1471           5324
4   cr7vaibhav           5013
5   balintboss           4639
6    W1r3lesss           4484
7     rautsi__           3970
8    SchiKen44           2976
9      Typhu25           2513
Top 5 emotes in July 2025:
LOL: 4934 times
hai: 2794 times
OMEYES: 2746 times
OOOO: 2502 times
WW: 2063 times
Top 3 words in July 2025:
the: 12611 times
you: 10652 times
i: 8784 times
Top 5 busiest 5-minute intervals in July 2025:
                          5min  message_count
44   2025-07-03 18:05:00+02:00            729
45   2025-07-03 18:10:00+02:00            609
38   2025-07-03 17:35:00+02:00            602
1376 2025-07-31 15:30:00+02:00            567
37   2025-07-03 17:30:00+02:00            551
Stream with

May 2024

In [11]:
# Filter only May 2024
may_2024 = data[(data["date"].dt.year == 2024) & (data["date"].dt.month == 5)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in May 2024
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2024) & (first_messages["date"].dt.month == 5)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in May 2024: {num_new_chatters}")
stream_counts = may_2024['stream'].value_counts().reset_index()
print(f"Number of Streams in May 2024: {len(stream_counts)}")
print(f"Number of Messages in May 2024: {may_2024.shape[0]}")
print(f"Number of Users in May 2024: {may_2024['user'].nunique()}")
user_counts = may_2024['user'].value_counts().reset_index()
user_counts.sort_values('count').tail(10)

# Count number of messages per user in May 2024
user_counts = may_2024['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)

print(top_10_users)

# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in may_2024["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 5 emotes
top_5_emotes = emote_counter.most_common(5)

print("Top 5 emotes in May 2024:")
for emote, count in top_5_emotes:
    print(f"{emote}: {count} times")

# Count all words from May messages
word_counter = Counter()

for message in may_2024["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("Top 3 words in May 2024:")
for emote, count in top_3_words:
    print(f"{emote}: {count} times")

# Make a copy to avoid SettingWithCopyWarning
may_2024 = may_2024.copy()

# Round timestamps to nearest 5-minute interval
may_2024["5min"] = may_2024["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = may_2024.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("Top 5 busiest 5-minute intervals in May 2024:")
print(top_5_fastest)

# Group by stream and compute message counts and time range
stream_stats = may_2024.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("Stream with the highest messages per minute:")
print(fastest_stream)

# Get all unique stream IDs in May
all_streams = set(may_2024["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = may_2024.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("Chatters who chatted in every stream in May 2024:")
print(users_in_every_stream)

Number of new chatters in May 2024: 2266
Number of Streams in May 2024: 29
Number of Messages in May 2024: 83053
Number of Users in May 2024: 2266
             user  message_count
0    Martin_Gales           7506
1       banties_x           3670
2            oJov           3302
3       1206paul_           2506
4  StreamElements           2403
5           roxa0           2309
6           LX212           2226
7     IvanOnMyOwn           2089
8         klimzaa           1990
9         Risc__V           1527
Top 5 emotes in May 2024:
mhm: 1967 times
omE: 1368 times
o7: 625 times
OMEGALUL: 502 times
hi: 474 times
Top 3 words in May 2024:
the: 11231 times
you: 7300 times
i: 7155 times
Top 5 busiest 5-minute intervals in May 2024:
                          5min  message_count
1494 2024-05-27 23:55:00+02:00            339
1149 2024-05-22 15:45:00+02:00            292
1495 2024-05-28 00:00:00+02:00            225
1558 2024-05-28 21:55:00+02:00            186
1643 2024-05-29 19:55:00+02:00      

AUG 25

In [12]:
# Filter only August 2025
aug_2025 = data[(data["date"].dt.year == 2025) & (data["date"].dt.month == 8)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in August 2025
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2025) & (first_messages["date"].dt.month == 8)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in August 2025: {num_new_chatters}")
stream_counts = aug_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in August 2025: {len(stream_counts)}")
print(f"Number of Messages in August 2025: {aug_2025.shape[0]}")
print(f"Number of Users in August 2025: {aug_2025['user'].nunique()}")
user_counts = aug_2025['user'].value_counts().reset_index()
user_counts.sort_values('count').tail(10)

# Count number of messages per user in August 2025
user_counts = aug_2025['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)

print(top_10_users)

# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in aug_2025["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 5 emotes
top_5_emotes = emote_counter.most_common(5)

print("Top 5 emotes in August 2025:")
for emote, count in top_5_emotes:
    print(f"{emote}: {count} times")

# Count all words from August messages
word_counter = Counter()

for message in aug_2025["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("Top 3 words in August 2025:")
for emote, count in top_3_words:
    print(f"{emote}: {count} times")

# Make a copy to avoid SettingWithCopyWarning
aug_2025 = aug_2025.copy()

# Round timestamps to nearest 5-minute interval
aug_2025["5min"] = aug_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = aug_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("Top 5 busiest 5-minute intervals in August 2025:")
print(top_5_fastest)

# Group by stream and compute message counts and time range
stream_stats = aug_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("Stream with the highest messages per minute:")
print(fastest_stream)

# Get all unique stream IDs in August
all_streams = set(aug_2025["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = aug_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("Chatters who chatted in every stream in August 2025:")
print(users_in_every_stream)

Number of new chatters in August 2025: 4210
Number of Streams in August 2025: 25
Number of Messages in August 2025: 151681
Number of Users in August 2025: 9146
              user  message_count
0         JBIN2036           7068
1       cr7vaibhav           4634
2       balintboss           4306
3         BenXBari           4237
4      GEORGIE1471           4139
5         rautsi__           3155
6          Odah_02           3051
7  InverseEntropy_           2718
8         KRIESEAX           2684
9    lajosbarnabas           2661
Top 5 emotes in August 2025:
LOL: 3754 times
ome44: 2672 times
OMEYES: 2367 times
hi: 2216 times
sob: 2098 times
Top 3 words in August 2025:
the: 10251 times
you: 9201 times
is: 7932 times
Top 5 busiest 5-minute intervals in August 2025:
                          5min  message_count
463  2025-08-11 19:20:00+02:00           1360
464  2025-08-11 19:25:00+02:00            965
462  2025-08-11 19:15:00+02:00            961
1285 2025-08-31 19:10:00+02:00            55

Sep 25

In [13]:
# Filter only September 2025 
sep_2025 = data[(data["date"].dt.year == 2025) & (data["date"].dt.month == 9)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in September 2025
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2025) & (first_messages["date"].dt.month == 9)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in Sep 2025: {num_new_chatters}")
stream_counts = sep_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in Sep 2025: {len(stream_counts)}")
print(f"Number of Messages in Sep 2025: {sep_2025.shape[0]}")
print(f"Number of Users in Sep 2025: {sep_2025['user'].nunique()}")
user_counts = sep_2025['user'].value_counts().reset_index()
user_counts.sort_values('count').tail(10)

# Count number of messages per user in September 2025
user_counts = sep_2025['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)

print(top_10_users)

# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in sep_2025["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 5 emotes
top_5_emotes = emote_counter.most_common(10)

print("Top 10 emotes in Sep 2025:")
for emote, count in top_5_emotes:
    print(f"{emote}: {count} times")

# Count all words from May messages
word_counter = Counter()

for message in sep_2025["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("Top 3 words in Sep 2025:")
for emote, count in top_3_words:
    print(f"{emote}: {count} times")

# Make a copy to avoid SettingWithCopyWarning
sep_2025 = sep_2025.copy()

# Round timestamps to nearest 5-minute interval
sep_2025["5min"] = sep_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = sep_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("Top 5 busiest 5-minute intervals in Sep 2025:")
print(top_5_fastest)

# Group by stream and compute message counts and time range
stream_stats = sep_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("Stream with the highest messages per minute:")
print(fastest_stream)

# Get all unique stream IDs in Sep
all_streams = set(sep_2025["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = sep_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("Chatters who chatted in every stream in Sep 2025:")
print(users_in_every_stream)

Number of new chatters in Sep 2025: 3971
Number of Streams in Sep 2025: 26
Number of Messages in Sep 2025: 170299
Number of Users in Sep 2025: 9395
             user  message_count
0        BenXBari          14081
1        JBIN2036           7935
2        HALP____           6030
3      balintboss           4705
4         Odah_02           4609
5        nishad13           4425
6      cr7vaibhav           3581
7   lajosbarnabas           3460
8        KRIESEAX           3393
9  StreamElements           3131
Top 10 emotes in Sep 2025:
WW: 3481 times
LOL: 3422 times
sob: 3216 times
hi: 2306 times
mhm: 1956 times
om: 1745 times
OMEYES: 1702 times
OOOO: 1651 times
ome51: 1584 times
bye: 1508 times
Top 3 words in Sep 2025:
the: 14390 times
to: 13160 times
you: 12413 times
Top 5 busiest 5-minute intervals in Sep 2025:
                         5min  message_count
271 2025-09-07 15:30:00+02:00            457
137 2025-09-03 17:15:00+02:00            403
139 2025-09-03 17:25:00+02:00            38

Oct 25

In [14]:
# Filter only October 2025 
oct_2025 = data[(data["date"].dt.year == 2025) & (data["date"].dt.month == 10)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in October 2025
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2025) & (first_messages["date"].dt.month == 10)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in Oct 2025: {num_new_chatters}")
stream_counts = oct_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in Oct 2025: {len(stream_counts)}")
print(f"Number of Messages in Oct 2025: {oct_2025.shape[0]}")
print(f"Number of Users in Oct 2025: {oct_2025['user'].nunique()}")
user_counts = oct_2025['user'].value_counts().reset_index()
user_counts.sort_values('count').tail(10)

# Count number of messages per user in October 2025
user_counts = oct_2025['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)

print(top_10_users)

# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in oct_2025["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 5 emotes
top_5_emotes = emote_counter.most_common(10)

print("Top 10 emotes in Oct 2025:")
for emote, count in top_5_emotes:
    print(f"{emote}: {count} times")

# Count all words from October messages
word_counter = Counter()

for message in oct_2025["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("Top 3 words in Oct 2025:")
for emote, count in top_3_words:
    print(f"{emote}: {count} times")

# Make a copy to avoid SettingWithCopyWarning
oct_2025 = oct_2025.copy()

# Round timestamps to nearest 5-minute interval
oct_2025["5min"] = oct_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = oct_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("Top 5 busiest 5-minute intervals in Oct 2025:")
print(top_5_fastest)

# Group by stream and compute message counts and time range
stream_stats = oct_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("Stream with the highest messages per minute:")
print(fastest_stream)

# Get all unique stream IDs in Oct
all_streams = set(oct_2025["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = oct_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("Chatters who chatted in every stream in Oct 2025:")
print(users_in_every_stream)

Number of new chatters in Oct 2025: 3458
Number of Streams in Oct 2025: 27
Number of Messages in Oct 2025: 174954
Number of Users in Oct 2025: 8868
            user  message_count
0       JBIN2036           7907
1      rafa30___           7265
2  lajosbarnabas           6545
3       HALP____           6245
4    polimpompis           6164
5       BenXBari           5855
6       nishad13           5094
7     balintboss           4979
8        Odah_02           3519
9       KRIESEAX           3109
Top 10 emotes in Oct 2025:
sob: 4146 times
LOL: 3142 times
OMEYES: 3106 times
WW: 2605 times
mhm: 2340 times
hi: 2305 times
ome44: 1989 times
bye: 1902 times
om: 1801 times
qq: 1697 times
Top 3 words in Oct 2025:
the: 14651 times
you: 11574 times
to: 10568 times
Top 5 busiest 5-minute intervals in Oct 2025:
                          5min  message_count
1313 2025-10-30 16:45:00+01:00            546
769  2025-10-17 19:20:00+02:00            504
662  2025-10-15 19:30:00+02:00            430
46   20

In [15]:
# Filter only November 2025 
nov_2025 = data[(data["date"].dt.year == 2025) & (data["date"].dt.month == 11)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in November 2025
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2025) & (first_messages["date"].dt.month == 11)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in Nov 2025: {num_new_chatters}")

stream_counts = nov_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in Nov 2025: {len(stream_counts)}")
print(f"Number of Messages in Nov 2025: {nov_2025.shape[0]}")
print(f"Number of Users in Nov 2025: {nov_2025['user'].nunique()}")

user_counts = nov_2025['user'].value_counts().reset_index()
user_counts.sort_values('count').tail(10)

# Count number of messages per user in November 2025
user_counts = nov_2025['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)

print(top_10_users)

# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in nov_2025["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 5 emotes
top_5_emotes = emote_counter.most_common(10)

print("Top 10 emotes in Nov 2025:")
for emote, count in top_5_emotes:
    print(f"{emote}: {count} times")

# Count all words from November messages
word_counter = Counter()

for message in nov_2025["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("Top 3 words in Nov 2025:")
for emote, count in top_3_words:
    print(f"{emote}: {count} times")

# Make a copy to avoid SettingWithCopyWarning
nov_2025 = nov_2025.copy()

# Round timestamps to nearest 5-minute interval
nov_2025["5min"] = nov_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = nov_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("Top 5 busiest 5-minute intervals in Nov 2025:")
print(top_5_fastest)

# Group by stream and compute message counts and time range
stream_stats = nov_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("Stream with the highest messages per minute:")
print(fastest_stream)

# Get all unique stream IDs in Nov
all_streams = set(nov_2025["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = nov_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("Chatters who chatted in every stream in Nov 2025:")
print(users_in_every_stream)


Number of new chatters in Nov 2025: 3401
Number of Streams in Nov 2025: 25
Number of Messages in Nov 2025: 218383
Number of Users in Nov 2025: 8926
            user  message_count
0    polimpompis          12967
1      rafa30___           9850
2       nishad13           8825
3       JBIN2036           7948
4        Muuskie           7757
5       erdeedge           6412
6  lajosbarnabas           6394
7       HALP____           6333
8     cr7vaibhav           4946
9     balintboss           4785
Top 10 emotes in Nov 2025:
WW: 5333 times
hi: 5320 times
SON: 3646 times
OMEYES: 3632 times
LOL: 3619 times
sob: 3425 times
mhm: 2982 times
bye: 2855 times
67: 2483 times
OOOO: 2442 times
Top 3 words in Nov 2025:
the: 15777 times
you: 11441 times
to: 11172 times
Top 5 busiest 5-minute intervals in Nov 2025:
                        5min  message_count
42 2025-11-01 18:35:00+01:00           1597
41 2025-11-01 18:30:00+01:00           1527
43 2025-11-01 18:40:00+01:00           1338
44 2025-11-01 1

In [16]:
# Filter only December 2025 
dec_2025 = data[(data["date"].dt.year == 2025) & (data["date"].dt.month == 12)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in December 2025
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2025) & (first_messages["date"].dt.month == 12)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in Dec 2025: {num_new_chatters}")

stream_counts = dec_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in Dec 2025: {len(stream_counts)}")
print(f"Number of Messages in Dec 2025: {dec_2025.shape[0]}")
print(f"Number of Users in Dec 2025: {dec_2025['user'].nunique()}")

# Count number of messages per user in December 2025
user_counts = dec_2025['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)
print("\nTop 10 users in Dec 2025:")
print(top_10_users)

# --- Emote Analysis ---
# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in dec_2025["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 10 emotes
top_10_emotes = emote_counter.most_common(10)

print("\nTop 10 emotes in Dec 2025:")
for emote, count in top_10_emotes:
    print(f"{emote}: {count} times")

# --- Word Analysis ---
# Count all words from December messages
word_counter = Counter()

for message in dec_2025["message"]:
    words = message.lower().split()
    word_counter.update(words)

# Get top 3 most common words
top_3_words = word_counter.most_common(3)

print("\nTop 3 words in Dec 2025:")
for word, count in top_3_words:
    print(f"{word}: {count} times")

# --- Activity Spikes ---
# Make a copy to avoid SettingWithCopyWarning
dec_2025 = dec_2025.copy()

# Round timestamps to nearest 5-minute interval
dec_2025["5min"] = dec_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = dec_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("\nTop 5 busiest 5-minute intervals in Dec 2025:")
print(top_5_fastest)

# --- Stream Velocity ---
# Group by stream and compute message counts and time range
stream_stats = dec_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("\nStream with the highest messages per minute:")
print(fastest_stream)

# --- Loyalty Check ---
# Get all unique stream IDs in Dec
all_streams = set(dec_2025["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = dec_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("\nChatters who chatted in every stream in Dec 2025:")
print(users_in_every_stream)

Number of new chatters in Dec 2025: 4876
Number of Streams in Dec 2025: 25
Number of Messages in Dec 2025: 268661
Number of Users in Dec 2025: 11036

Top 10 users in Dec 2025:
            user  message_count
0    polimpompis          14355
1       erdeedge          13043
2       nishad13          12711
3       JBIN2036          11398
4        Muuskie           9368
5  lajosbarnabas           9067
6       HALP____           7495
7      rafa30___           7430
8        Odah_02           5658
9     balintboss           5212

Top 10 emotes in Dec 2025:
hi: 10350 times
dwerk: 8309 times
WW: 7170 times
mhm: 4469 times
OMEYES: 4424 times
LOL: 4410 times
bye: 3874 times
sob: 3681 times
BOOM: 3470 times
67: 3066 times

Top 3 words in Dec 2025:
the: 18621 times
i: 13794 times
a: 13164 times

Top 5 busiest 5-minute intervals in Dec 2025:
                          5min  message_count
499  2025-12-06 08:00:00+01:00            589
176  2025-12-04 15:55:00+01:00            511
1537 2025-12-28 23:50:

In [17]:
# Filter for the whole year 2025 
year_2025 = data[data["date"].dt.year == 2025]

# Find the first message date for each user (using the full dataset 'data')
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message ever was in 2025
new_chatters = first_messages[first_messages["date"].dt.year == 2025]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"--- 2025 Annual Summary ---")
print(f"Number of new chatters in 2025: {num_new_chatters}")

stream_counts = year_2025['stream'].value_counts().reset_index()
print(f"Number of Streams in 2025: {len(stream_counts)}")
print(f"Number of Messages in 2025: {year_2025.shape[0]}")
print(f"Number of Users in 2025: {year_2025['user'].nunique()}")

# Count number of messages per user in 2025
user_counts = year_2025['user'].value_counts().reset_index()
user_counts.columns = ['user', 'message_count']

# Get the top 10 most active users
top_10_users = user_counts.head(10)
print("\nTop 10 users in 2025:")
print(top_10_users)

# --- Emote Analysis ---
# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes from final_emote_list
for message in year_2025["message"]:
    words = str(message).split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 10 emotes
top_10_emotes = emote_counter.most_common(10)

print("\nTop 10 emotes in 2025:")
for emote, count in top_10_emotes:
    print(f"{emote}: {count} times")


# --- Activity Spikes ---
# Make a copy to avoid SettingWithCopyWarning
year_2025 = year_2025.copy()

# Round timestamps to nearest 5-minute interval
year_2025["5min"] = year_2025["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = year_2025.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals of the year
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("\nTop 5 busiest 5-minute intervals in 2025:")
print(top_5_fastest)

# --- Stream Velocity ---
# Group by stream and compute message counts and time range
stream_stats = year_2025.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("\nFastest stream of 2025 (Messages per Minute):")
print(fastest_stream)

# --- Loyalty Check ---
# Get all unique stream IDs in 2025
all_streams = set(year_2025["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = year_2025.groupby("user")["stream"].apply(set)

# Filter users who chatted in every single stream of the year
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print(f"\nChatters who chatted in every stream in 2025 (Total: {len(users_in_every_stream)}):")
print(users_in_every_stream)

--- 2025 Annual Summary ---
Number of new chatters in 2025: 69240
Number of Streams in 2025: 308
Number of Messages in 2025: 2388045
Number of Users in 2025: 76715

Top 10 users in 2025:
             user  message_count
0        JBIN2036          93140
1      balintboss          54100
2        BenXBari          49702
3       W1r3lesss          44530
4         trek44_          43682
5   lajosbarnabas          42136
6       rafa30___          36184
7     polimpompis          34755
8  StreamElements          32723
9        stan_iv4          31857

Top 10 emotes in 2025:
speed1: 145143 times
ome44: 138181 times
LOL: 48325 times
hi: 41412 times
WW: 38208 times
Smurfing: 37341 times
OMEYES: 34871 times
OOOO: 32434 times
mhm: 29180 times
qq: 25607 times

Top 5 busiest 5-minute intervals in 2025:
                           5min  message_count
2382  2025-02-17 20:25:00+01:00           1791
4621  2025-03-25 18:10:00+01:00           1750
4663  2025-03-26 15:50:00+01:00           1601
14925 2025-1

In [18]:
# 1. Define the bots to exclude
bots_to_exclude = ["Fossabot", "StreamElements","Nightbot",]

# 2. Filter the 2025 data to remove bots
filtered_2025 = year_2025[~year_2025["user"].isin(bots_to_exclude)]

# 3. Group by user and count unique streams
user_stream_participation = (
    filtered_2025.groupby("user")["stream"]
    .nunique()
    .sort_values(ascending=False)
    .reset_index(name="stream_count")
)

# 4. Determine the threshold for the 10th position (including ties)
if len(user_stream_participation) >= 10:
    # Get the stream count value of the person at the 10th spot
    threshold = user_stream_participation.iloc[9]["stream_count"]
else:
    threshold = user_stream_participation["stream_count"].min() if not user_stream_participation.empty else 0

# 5. Filter the list to include everyone who meets or exceeds that threshold
top_users_by_streams = user_stream_participation[user_stream_participation["stream_count"] >= threshold]

print(f"Top users by number of streams in 2025 (Excluding bots, Count >= {threshold}):")
print(top_users_by_streams.to_string(index=False))

print(f"\nTotal users in this list: {len(top_users_by_streams)}")

Top users by number of streams in 2025 (Excluding bots, Count >= 254):
                    user  stream_count
              balintboss           308
                JBIN2036           307
Aluminiumminimumimmunity           305
             Wanderer039           286
                 trek44_           284
                 Zeololz           282
               1206paul_           260
            haHAA_12_btw           258
               W1r3lesss           258
                PiGE0N98           254

Total users in this list: 10


In [19]:
year_2025['speed1_count'] = year_2025['message'].str.count(r'\bspeed1\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_speed1_users = (
    year_2025.groupby('user')['speed1_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_speed1_users)

user
devilbabymamadrama    24077
W1r3lesss             17283
SchiKen44             12398
rodrigo_20771         10155
banties_x              8811
uwu_cougar             7013
HoneyKick              6907
ACEiCLE                6001
Typhu25                5716
klimzaa                5252
Name: speed1_count, dtype: int64


In [20]:
year_2025['ome44_count'] = year_2025['message'].str.count(r'\bome44\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_ome44_users = (
    year_2025.groupby('user')['ome44_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_ome44_users)

user
Typhu25         29089
trek44_         14884
shogalul         7169
elluiti          6442
JBIN2036         6164
banties_x        5876
Martin_Gales     4813
stan_iv4         3791
W1r3lesss        3591
CrazeE420xd      3262
Name: ome44_count, dtype: int64


In [21]:
year_2025['LOL_count'] = year_2025['message'].str.count(r'\bLOL\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_LOL_users = (
    year_2025.groupby('user')['LOL_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_LOL_users)

user
balintboss         3177
Ivana_10           1667
JBIN2036           1425
cr7vaibhav         1285
softarballtt       1227
amirmasoud_2018    1112
BenXBari           1005
tiberiu0s           932
Typhu25             885
StunnerGR           799
Name: LOL_count, dtype: int64


In [22]:
year_2025['hi_count'] = year_2025['message'].str.count(r'\bhi\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_hi_users = (
    year_2025.groupby('user')['hi_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_hi_users)

user
Aluminiumminimumimmunity    1771
Muuskie                     1633
lajosbarnabas               1575
HALP____                    1312
nishad13                    1261
polimpompis                 1190
trek44_                     1133
erdeedge                     985
SchiKen44                    777
rautsi__                     770
Name: hi_count, dtype: int64


In [23]:
year_2025['WW_count'] = year_2025['message'].str.count(r'\bWW\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_WW_users = (
    year_2025.groupby('user')['WW_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_WW_users)

user
JBIN2036         1884
klimzaa          1520
lajosbarnabas    1454
BenXBari         1192
Typhu25          1122
HALP____         1102
nishad13         1084
stan_iv4         1074
balintboss       1006
rafa30___         960
Name: WW_count, dtype: int64


In [24]:
year_2025['Smurfing_count'] = year_2025['message'].str.count(r'\bSmurfing\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_Smurfing_users = (
    year_2025.groupby('user')['Smurfing_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_Smurfing_users)

user
W1r3lesss             8149
SchiKen44             4435
devilbabymamadrama    4159
HoneyKick             4090
sisq                  3943
trek44_               3079
softarballtt          1656
S_Face                1072
elluiti               1023
Typhu25                827
Name: Smurfing_count, dtype: int64


In [25]:
year_2025['OMEYES_count'] = year_2025['message'].str.count(r'\bOMEYES\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_OMEYES_users = (
    year_2025.groupby('user')['OMEYES_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_OMEYES_users)

user
lajosbarnabas    4000
trek44_          2482
W1r3lesss        2330
SchiKen44        1484
polimpompis      1438
CrazeE420xd      1403
HALP____         1365
elluiti          1316
shogalul         1293
BenXBari         1263
Name: OMEYES_count, dtype: int64


In [26]:
year_2025['OOOO_count'] = year_2025['message'].str.count(r'\bOOOO\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_OOOO_users = (
    year_2025.groupby('user')['OOOO_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_OOOO_users)

user
JBIN2036       3249
stan_iv4       1147
balintboss     1138
trek44_        1052
CrazeE420xd     972
klimzaa         939
StunnerGR       858
W1r3lesss       853
SchiKen44       748
elluiti         734
Name: OOOO_count, dtype: int64


In [27]:
year_2025['mhm_count'] = year_2025['message'].str.count(r'\bmhm\b')

# 2. Group by user, sum the counts, and get the top 10
top_10_mhm_users = (
    year_2025.groupby('user')['mhm_count']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

print(top_10_mhm_users)

user
JBIN2036       2978
balintboss     1804
nishad13       1173
W1r3lesss       920
CrazeE420xd     882
BenXBari        822
polimpompis     778
KRIESEAX        670
erdeedge        663
elluiti         661
Name: mhm_count, dtype: int64


In [28]:
result = data[data['message'] == '!arc'].head(15)
print(result)

                             date                      user message  stream
3060434 2025-12-16 15:59:44+01:00                  JBIN2036    !arc     496
3060437 2025-12-16 15:59:45+01:00                   klimzaa    !arc     496
3060439 2025-12-16 15:59:46+01:00              thebigdogjay    !arc     496
3060441 2025-12-16 15:59:46+01:00                  KRIESEAX    !arc     496
3060444 2025-12-16 15:59:48+01:00                TheMixtape    !arc     496
3060445 2025-12-16 15:59:48+01:00                  nishad13    !arc     496
3060447 2025-12-16 15:59:50+01:00               Der_Stoppi_    !arc     496
3060448 2025-12-16 15:59:50+01:00  Aluminiumminimumimmunity    !arc     496
3060450 2025-12-16 15:59:53+01:00            kindheadbanger    !arc     496
3060452 2025-12-16 15:59:55+01:00             ayuzawatakumi    !arc     496
3060455 2025-12-16 15:59:56+01:00                   Odah_02    !arc     496
3060466 2025-12-16 16:00:00+01:00                  mikirii_    !arc     496
3060468 2025

In [None]:
from datetime import timedelta

# 2. Filter for Year 2025
data_2025 = data[data['date'].dt.year == 2025].copy()

# Filter for the specific user "JBIN2036"
target_user = "JBIN2036"
df_target = data_2025[data_2025['user'] == target_user].copy()

# --- STATISTICS CALCULATION ---

# 1. Stream Participation: How many streams he chatted in vs total streams
total_streams = data_2025['stream'].nunique()
user_streams = df_target['stream'].nunique()

# 2. Longest Streak (Consecutive Streams)
# Get all unique streams in 2025 sorted by date to establish the timeline
all_streams_ordered = data_2025.sort_values('date')['stream'].unique()

# Identify which of those streams the target user participated in
user_streams_set = set(df_target['stream'].unique())

# Calculate the streak
current_streak = 0
longest_streak = 0

for stream in all_streams_ordered:
    if stream in user_streams_set:
        current_streak += 1
        longest_streak = max(longest_streak, current_streak)
    else:
        current_streak = 0

# 3. Most Chatted Stream (Date and Message Count)
if not df_target.empty:
    # Group by stream identifier to find the one with most messages
    stream_counts = df_target.groupby('stream').size()
    most_active_stream_id = stream_counts.idxmax()
    most_active_stream_msgs = stream_counts.max()
    
    # Find the primary date for this stream (the most frequent date associated with this stream ID)
    stream_date = df_target[df_target['stream'] == most_active_stream_id]['date'].dt.date.mode()[0]
    most_chatted_stream_info = f"{stream_date} (Stream ID: {most_active_stream_id})"
else:
    most_chatted_stream_info = "N/A"
    most_active_stream_msgs = 0

# 4. Total Messages Sent
total_messages = len(df_target)

# 5. Mentions by Others (Count "JBIN2036" or "jbin" in messages from others)
df_others = data_2025[data_2025['user'] != target_user]
# Regex: (?i) for case-insensitive, \b for word boundaries
mention_pattern = r'(?i)\bJBIN2036\b|\bjbin\b|\b@JBIN2036\b|\b@jbin2036\b'
mentions_mask = df_others['message'].str.contains(mention_pattern, regex=True, na=False)
total_mentions_by_others = mentions_mask.sum()

# 6. Who Mentioned Him the Most
if total_mentions_by_others > 0:
    top_mentioner = df_others[mentions_mask]['user'].value_counts().idxmax()
    top_mentioner_count = df_others[mentions_mask]['user'].value_counts().max()
else:
    top_mentioner = "None"
    top_mentioner_count = 0

# 7. Messages Containing "@"
msgs_with_at = df_target['message'].str.contains('@', na=False).sum()

# 8. Who He Mentioned Most (with count)
if not df_target.empty:
    # Extract all words starting with @
    mentions_extracted = df_target['message'].str.extractall(r'(@\w+)')
    if not mentions_extracted.empty:
        counts = mentions_extracted[0].value_counts()
        top_user = counts.idxmax()
        top_count = counts.max()
        most_mentioned_user = f"{top_user} ({top_count} times)"
    else:
        most_mentioned_user = "None"
else:
    most_mentioned_user = "None"

# 9. Top 5 Emotes Used (from final_emote_list)
# Assuming final_emote_list is defined in your environment
emote_counts = {}
if not df_target.empty and 'final_emote_list' in locals():
    for emote in final_emote_list:
        # Count exact word matches for the emote
        # re.escape ensures special characters in emote names don't break regex
        pattern = r'\b' + re.escape(emote) + r'\b'
        count = df_target['message'].str.count(pattern).sum()
        if count > 0:
            emote_counts[emote] = count
    
    # Sort by count descending and take top 5
    top_5_emotes = sorted(emote_counts.items(), key=lambda x: x[1], reverse=True)[:5]
else:
    top_5_emotes = []

# 10. Average Words Per Message
if not df_target.empty:
    avg_words = df_target['message'].str.split().apply(len).mean()
else:
    avg_words = 0

# 11. Most Common 5-minute Interval
if not df_target.empty:
    # Calculate minutes from midnight
    minutes_from_midnight = df_target['date'].dt.hour * 60 + df_target['date'].dt.minute
    # Integer divide by 5 to get the "bin" index
    bin_index = minutes_from_midnight // 5
    most_common_bin = bin_index.value_counts().idxmax()
    
    # Convert back to time string HH:MM
    start_hour = (most_common_bin * 5) // 60
    start_min = (most_common_bin * 5) % 60
    most_active_time = f"{int(start_hour):02d}:{int(start_min):02d}"
else:
    most_active_time = "N/A"

# --- OUTPUT RESULTS ---
print(f"--- Stats for {target_user} in 2025 ---")
print(f"1. Participation: Chatted in {user_streams} out of {total_streams} total streams")
print(f"2. Longest Streak: {longest_streak} consecutive streams")
print(f"3. Most Chatted Stream: {most_chatted_stream_info} with {most_active_stream_msgs} messages")
print(f"4. Total Messages Sent: {total_messages}")
print(f"5. Mentions by Others: {total_mentions_by_others} times")
print(f"6. Top Mentioner: {top_mentioner} ({top_mentioner_count} times)")
print(f"7. Messages with '@': {msgs_with_at}")
print(f"8. Who He Mentioned Most: {most_mentioned_user}")
print(f"9. Top 5 Emotes: {top_5_emotes}")
print(f"10. Avg Words/Message: {avg_words:.2f}")
print(f"11. Most Common Time: {most_active_time} (5-min interval)")

--- Stats for JBIN2036 in 2025 ---
1. Participation: Chatted in 307 out of 308 total streams
2. Longest Streak: 293 consecutive days
3. Most Chatted Stream: 2025-12-05 (Stream ID: 489) with 2025 messages
4. Total Messages Sent: 93140
5. Mentions by Others: 6033 times
6. Top Mentioner: BenXBari (378 times)
7. Messages with '@': 3358
8. Who He Mentioned Most: @1206paul_ (209 times)
9. Top 5 Emotes: [('ome44', 6164), ('OOOO', 3249), ('mhm', 2978), ('doroL', 2410), ('WW', 1884)]
10. Avg Words/Message: 2.55
11. Most Common Time: 18:35 (5-min interval)


In [None]:
# 2. Filter for Year 2025
data_2025 = data[data['date'].dt.year == 2025].copy()

# Filter for the specific user "JBIN2036"
target_user = "balintboss"
df_target = data_2025[data_2025['user'] == target_user].copy()

# --- STATISTICS CALCULATION ---

# 1. Stream Participation: How many streams he chatted in vs total streams
total_streams = data_2025['stream'].nunique()
user_streams = df_target['stream'].nunique()

# 2. Longest Streak (Consecutive Streams)
# Get all unique streams in 2025 sorted by date to establish the timeline
all_streams_ordered = data_2025.sort_values('date')['stream'].unique()

# Identify which of those streams the target user participated in
user_streams_set = set(df_target['stream'].unique())

# Calculate the streak
current_streak = 0
longest_streak = 0

for stream in all_streams_ordered:
    if stream in user_streams_set:
        current_streak += 1
        longest_streak = max(longest_streak, current_streak)
    else:
        current_streak = 0

# 3. Most Chatted Stream (Date and Message Count)
if not df_target.empty:
    # Group by stream identifier to find the one with most messages
    stream_counts = df_target.groupby('stream').size()
    most_active_stream_id = stream_counts.idxmax()
    most_active_stream_msgs = stream_counts.max()
    
    # Find the primary date for this stream (the most frequent date associated with this stream ID)
    stream_date = df_target[df_target['stream'] == most_active_stream_id]['date'].dt.date.mode()[0]
    most_chatted_stream_info = f"{stream_date} (Stream ID: {most_active_stream_id})"
else:
    most_chatted_stream_info = "N/A"
    most_active_stream_msgs = 0

# 4. Total Messages Sent
total_messages = len(df_target)

# 5. Mentions by Others (Count "JBIN2036" or "jbin" in messages from others)
df_others = data_2025[data_2025['user'] != target_user]
# Regex: (?i) for case-insensitive, \b for word boundaries
mention_pattern = r'(?i)\bbalintboss\b|\bbalin\b|\bbalint\b|\bboss\b|\b@balintboss\b'
mentions_mask = df_others['message'].str.contains(mention_pattern, regex=True, na=False)
total_mentions_by_others = mentions_mask.sum()

# 6. Who Mentioned Him the Most
if total_mentions_by_others > 0:
    top_mentioner = df_others[mentions_mask]['user'].value_counts().idxmax()
    top_mentioner_count = df_others[mentions_mask]['user'].value_counts().max()
else:
    top_mentioner = "None"
    top_mentioner_count = 0

# 7. Messages Containing "@"
msgs_with_at = df_target['message'].str.contains('@', na=False).sum()

# 8. Who He Mentioned Most (with count)
if not df_target.empty:
    # Extract all words starting with @
    mentions_extracted = df_target['message'].str.extractall(r'(@\w+)')
    if not mentions_extracted.empty:
        counts = mentions_extracted[0].value_counts()
        top_user = counts.idxmax()
        top_count = counts.max()
        most_mentioned_user = f"{top_user} ({top_count} times)"
    else:
        most_mentioned_user = "None"
else:
    most_mentioned_user = "None"

# 9. Top 5 Emotes Used (from final_emote_list)
# Assuming final_emote_list is defined in your environment
emote_counts = {}
if not df_target.empty and 'final_emote_list' in locals():
    for emote in final_emote_list:
        # Count exact word matches for the emote
        # re.escape ensures special characters in emote names don't break regex
        pattern = r'\b' + re.escape(emote) + r'\b'
        count = df_target['message'].str.count(pattern).sum()
        if count > 0:
            emote_counts[emote] = count
    
    # Sort by count descending and take top 5
    top_5_emotes = sorted(emote_counts.items(), key=lambda x: x[1], reverse=True)[:5]
else:
    top_5_emotes = []

# 10. Average Words Per Message
if not df_target.empty:
    avg_words = df_target['message'].str.split().apply(len).mean()
else:
    avg_words = 0

# 11. Most Common 5-minute Interval
if not df_target.empty:
    # Calculate minutes from midnight
    minutes_from_midnight = df_target['date'].dt.hour * 60 + df_target['date'].dt.minute
    # Integer divide by 5 to get the "bin" index
    bin_index = minutes_from_midnight // 5
    most_common_bin = bin_index.value_counts().idxmax()
    
    # Convert back to time string HH:MM
    start_hour = (most_common_bin * 5) // 60
    start_min = (most_common_bin * 5) % 60
    most_active_time = f"{int(start_hour):02d}:{int(start_min):02d}"
else:
    most_active_time = "N/A"

# --- OUTPUT RESULTS ---
print(f"--- Stats for {target_user} in 2025 ---")
print(f"1. Participation: Chatted in {user_streams} out of {total_streams} total streams")
print(f"2. Longest Streak: {longest_streak} consecutive streams")
print(f"3. Most Chatted Stream: {most_chatted_stream_info} with {most_active_stream_msgs} messages")
print(f"4. Total Messages Sent: {total_messages}")
print(f"5. Mentions by Others: {total_mentions_by_others} times")
print(f"6. Top Mentioner: {top_mentioner} ({top_mentioner_count} times)")
print(f"7. Messages with '@': {msgs_with_at}")
print(f"8. Who He Mentioned Most: {most_mentioned_user}")
print(f"9. Top 5 Emotes: {top_5_emotes}")
print(f"10. Avg Words/Message: {avg_words:.2f}")
print(f"11. Most Common Time: {most_active_time} (5-min interval)")

--- Stats for balintboss in 2025 ---
1. Participation: Chatted in 308 out of 308 total streams
2. Longest Streak: 308 consecutive days
3. Most Chatted Stream: 2025-12-05 (Stream ID: 489) with 615 messages
4. Total Messages Sent: 54100
5. Mentions by Others: 12883 times
6. Top Mentioner: lajosbarnabas (731 times)
7. Messages with '@': 12469
8. Who He Mentioned Most: @Ivana_10 (672 times)
9. Top 5 Emotes: [('LOL', 3177), ('doroL', 2025), ('mhm', 1804), ('OOOO', 1138), ('dome32', 1137)]
10. Avg Words/Message: 3.91
11. Most Common Time: 15:25 (5-min interval)


In [None]:
# 2. Filter for Year 2025
data_2025 = data[data['date'].dt.year == 2025].copy()

# Filter for the specific user "JBIN2036"
target_user = "BenXBari"
df_target = data_2025[data_2025['user'] == target_user].copy()

# --- STATISTICS CALCULATION ---

# 1. Stream Participation: How many streams he chatted in vs total streams
total_streams = data_2025['stream'].nunique()
user_streams = df_target['stream'].nunique()

# 2. Longest Streak (Consecutive Streams)
# Get all unique streams in 2025 sorted by date to establish the timeline
all_streams_ordered = data_2025.sort_values('date')['stream'].unique()

# Identify which of those streams the target user participated in
user_streams_set = set(df_target['stream'].unique())

# Calculate the streak
current_streak = 0
longest_streak = 0

for stream in all_streams_ordered:
    if stream in user_streams_set:
        current_streak += 1
        longest_streak = max(longest_streak, current_streak)
    else:
        current_streak = 0

# 3. Most Chatted Stream (Date and Message Count)
if not df_target.empty:
    # Group by stream identifier to find the one with most messages
    stream_counts = df_target.groupby('stream').size()
    most_active_stream_id = stream_counts.idxmax()
    most_active_stream_msgs = stream_counts.max()
    
    # Find the primary date for this stream (the most frequent date associated with this stream ID)
    stream_date = df_target[df_target['stream'] == most_active_stream_id]['date'].dt.date.mode()[0]
    most_chatted_stream_info = f"{stream_date} (Stream ID: {most_active_stream_id})"
else:
    most_chatted_stream_info = "N/A"
    most_active_stream_msgs = 0

# 4. Total Messages Sent
total_messages = len(df_target)

# 5. Mentions by Others (Count "JBIN2036" or "jbin" in messages from others)
df_others = data_2025[data_2025['user'] != target_user]
# Regex: (?i) for case-insensitive, \b for word boundaries
mention_pattern = r'(?i)\bBenXBari\b|\b@BenXBari\b|\b@benxbari\b|\bbenxbari\b|\bben\b'
mentions_mask = df_others['message'].str.contains(mention_pattern, regex=True, na=False)
total_mentions_by_others = mentions_mask.sum()

# 6. Who Mentioned Him the Most
if total_mentions_by_others > 0:
    top_mentioner = df_others[mentions_mask]['user'].value_counts().idxmax()
    top_mentioner_count = df_others[mentions_mask]['user'].value_counts().max()
else:
    top_mentioner = "None"
    top_mentioner_count = 0

# 7. Messages Containing "@"
msgs_with_at = df_target['message'].str.contains('@', na=False).sum()

# 8. Who He Mentioned Most (with count)
if not df_target.empty:
    # Extract all words starting with @
    mentions_extracted = df_target['message'].str.extractall(r'(@\w+)')
    if not mentions_extracted.empty:
        counts = mentions_extracted[0].value_counts()
        top_user = counts.idxmax()
        top_count = counts.max()
        most_mentioned_user = f"{top_user} ({top_count} times)"
    else:
        most_mentioned_user = "None"
else:
    most_mentioned_user = "None"

# 9. Top 5 Emotes Used (from final_emote_list)
# Assuming final_emote_list is defined in your environment
emote_counts = {}
if not df_target.empty and 'final_emote_list' in locals():
    for emote in final_emote_list:
        # Count exact word matches for the emote
        # re.escape ensures special characters in emote names don't break regex
        pattern = r'\b' + re.escape(emote) + r'\b'
        count = df_target['message'].str.count(pattern).sum()
        if count > 0:
            emote_counts[emote] = count
    
    # Sort by count descending and take top 5
    top_5_emotes = sorted(emote_counts.items(), key=lambda x: x[1], reverse=True)[:5]
else:
    top_5_emotes = []

# 10. Average Words Per Message
if not df_target.empty:
    avg_words = df_target['message'].str.split().apply(len).mean()
else:
    avg_words = 0

# 11. Most Common 5-minute Interval
if not df_target.empty:
    # Calculate minutes from midnight
    minutes_from_midnight = df_target['date'].dt.hour * 60 + df_target['date'].dt.minute
    # Integer divide by 5 to get the "bin" index
    bin_index = minutes_from_midnight // 5
    most_common_bin = bin_index.value_counts().idxmax()
    
    # Convert back to time string HH:MM
    start_hour = (most_common_bin * 5) // 60
    start_min = (most_common_bin * 5) % 60
    most_active_time = f"{int(start_hour):02d}:{int(start_min):02d}"
else:
    most_active_time = "N/A"

# --- OUTPUT RESULTS ---
print(f"--- Stats for {target_user} in 2025 ---")
print(f"1. Participation: Chatted in {user_streams} out of {total_streams} total streams")
print(f"2. Longest Streak: {longest_streak} consecutive streams")
print(f"3. Most Chatted Stream: {most_chatted_stream_info} with {most_active_stream_msgs} messages")
print(f"4. Total Messages Sent: {total_messages}")
print(f"5. Mentions by Others: {total_mentions_by_others} times")
print(f"6. Top Mentioner: {top_mentioner} ({top_mentioner_count} times)")
print(f"7. Messages with '@': {msgs_with_at}")
print(f"8. Who He Mentioned Most: {most_mentioned_user}")
print(f"9. Top 5 Emotes: {top_5_emotes}")
print(f"10. Avg Words/Message: {avg_words:.2f}")
print(f"11. Most Common Time: {most_active_time} (5-min interval)")

--- Stats for BenXBari in 2025 ---
1. Participation: Chatted in 166 out of 308 total streams
2. Longest Streak: 83 consecutive days
3. Most Chatted Stream: 2025-12-05 (Stream ID: 489) with 3334 messages
4. Total Messages Sent: 49702
5. Mentions by Others: 7886 times
6. Top Mentioner: lajosbarnabas (692 times)
7. Messages with '@': 6343
8. Who He Mentioned Most: @lajosbarnabas (478 times)
9. Top 5 Emotes: [('sob', 2467), ('hai', 1696), ('OMEYES', 1263), ('WW', 1192), ('LOL', 1005)]
10. Avg Words/Message: 3.34
11. Most Common Time: 15:25 (5-min interval)


In [None]:
# 2. Filter for Year 2025
data_2025 = data[data['date'].dt.year == 2025].copy()

# Filter for the specific user "JBIN2036"
target_user = "W1r3lesss"
df_target = data_2025[data_2025['user'] == target_user].copy()

# --- STATISTICS CALCULATION ---

# 1. Stream Participation: How many streams he chatted in vs total streams
total_streams = data_2025['stream'].nunique()
user_streams = df_target['stream'].nunique()

# 2. Longest Streak (Consecutive Streams)
# Get all unique streams in 2025 sorted by date to establish the timeline
all_streams_ordered = data_2025.sort_values('date')['stream'].unique()

# Identify which of those streams the target user participated in
user_streams_set = set(df_target['stream'].unique())

# Calculate the streak
current_streak = 0
longest_streak = 0

for stream in all_streams_ordered:
    if stream in user_streams_set:
        current_streak += 1
        longest_streak = max(longest_streak, current_streak)
    else:
        current_streak = 0

# 3. Most Chatted Stream (Date and Message Count)
if not df_target.empty:
    # Group by stream identifier to find the one with most messages
    stream_counts = df_target.groupby('stream').size()
    most_active_stream_id = stream_counts.idxmax()
    most_active_stream_msgs = stream_counts.max()
    
    # Find the primary date for this stream (the most frequent date associated with this stream ID)
    stream_date = df_target[df_target['stream'] == most_active_stream_id]['date'].dt.date.mode()[0]
    most_chatted_stream_info = f"{stream_date} (Stream ID: {most_active_stream_id})"
else:
    most_chatted_stream_info = "N/A"
    most_active_stream_msgs = 0

# 4. Total Messages Sent
total_messages = len(df_target)

# 5. Mentions by Others (Count "JBIN2036" or "jbin" in messages from others)
df_others = data_2025[data_2025['user'] != target_user]
# Regex: (?i) for case-insensitive, \b for word boundaries
mention_pattern = r'(?i)\bW1r3lesss\b|\bw1r3less\b|\bwireless\b|\bwire\b|\b@W1r3lesss\b|\b@w1r3lesss\b'
mentions_mask = df_others['message'].str.contains(mention_pattern, regex=True, na=False)
total_mentions_by_others = mentions_mask.sum()

# 6. Who Mentioned Him the Most
if total_mentions_by_others > 0:
    top_mentioner = df_others[mentions_mask]['user'].value_counts().idxmax()
    top_mentioner_count = df_others[mentions_mask]['user'].value_counts().max()
else:
    top_mentioner = "None"
    top_mentioner_count = 0

# 7. Messages Containing "@"
msgs_with_at = df_target['message'].str.contains('@', na=False).sum()

# 8. Who He Mentioned Most (with count)
if not df_target.empty:
    # Extract all words starting with @
    mentions_extracted = df_target['message'].str.extractall(r'(@\w+)')
    if not mentions_extracted.empty:
        counts = mentions_extracted[0].value_counts()
        top_user = counts.idxmax()
        top_count = counts.max()
        most_mentioned_user = f"{top_user} ({top_count} times)"
    else:
        most_mentioned_user = "None"
else:
    most_mentioned_user = "None"

# 9. Top 5 Emotes Used (from final_emote_list)
# Assuming final_emote_list is defined in your environment
emote_counts = {}
if not df_target.empty and 'final_emote_list' in locals():
    for emote in final_emote_list:
        # Count exact word matches for the emote
        # re.escape ensures special characters in emote names don't break regex
        pattern = r'\b' + re.escape(emote) + r'\b'
        count = df_target['message'].str.count(pattern).sum()
        if count > 0:
            emote_counts[emote] = count
    
    # Sort by count descending and take top 5
    top_5_emotes = sorted(emote_counts.items(), key=lambda x: x[1], reverse=True)[:5]
else:
    top_5_emotes = []

# 10. Average Words Per Message
if not df_target.empty:
    avg_words = df_target['message'].str.split().apply(len).mean()
else:
    avg_words = 0

# 11. Most Common 5-minute Interval
if not df_target.empty:
    # Calculate minutes from midnight
    minutes_from_midnight = df_target['date'].dt.hour * 60 + df_target['date'].dt.minute
    # Integer divide by 5 to get the "bin" index
    bin_index = minutes_from_midnight // 5
    most_common_bin = bin_index.value_counts().idxmax()
    
    # Convert back to time string HH:MM
    start_hour = (most_common_bin * 5) // 60
    start_min = (most_common_bin * 5) % 60
    most_active_time = f"{int(start_hour):02d}:{int(start_min):02d}"
else:
    most_active_time = "N/A"

# --- OUTPUT RESULTS ---
print(f"--- Stats for {target_user} in 2025 ---")
print(f"1. Participation: Chatted in {user_streams} out of {total_streams} total streams")
print(f"2. Longest Streak: {longest_streak} consecutive streams")
print(f"3. Most Chatted Stream: {most_chatted_stream_info} with {most_active_stream_msgs} messages")
print(f"4. Total Messages Sent: {total_messages}")
print(f"5. Mentions by Others: {total_mentions_by_others} times")
print(f"6. Top Mentioner: {top_mentioner} ({top_mentioner_count} times)")
print(f"7. Messages with '@': {msgs_with_at}")
print(f"8. Who He Mentioned Most: {most_mentioned_user}")
print(f"9. Top 5 Emotes: {top_5_emotes}")
print(f"10. Avg Words/Message: {avg_words:.2f}")
print(f"11. Most Common Time: {most_active_time} (5-min interval)")

--- Stats for W1r3lesss in 2025 ---
1. Participation: Chatted in 258 out of 308 total streams
2. Longest Streak: 138 consecutive days
3. Most Chatted Stream: 2025-03-20 (Stream ID: 270) with 1005 messages
4. Total Messages Sent: 44530
5. Mentions by Others: 2525 times
6. Top Mentioner: trek44_ (165 times)
7. Messages with '@': 6440
8. Who He Mentioned Most: @lajosbarnabas (329 times)
9. Top 5 Emotes: [('speed1', 17283), ('Smurfing', 8149), ('ome44', 3591), ('ome32', 2916), ('OMEYES', 2330)]
10. Avg Words/Message: 3.51
11. Most Common Time: 17:30 (5-min interval)


In [None]:
# 2. Filter for Year 2025
data_2025 = data[data['date'].dt.year == 2025].copy()

# Filter for the specific user "JBIN2036"
target_user = "trek44_"
df_target = data_2025[data_2025['user'] == target_user].copy()

# --- STATISTICS CALCULATION ---

# 1. Stream Participation: How many streams he chatted in vs total streams
total_streams = data_2025['stream'].nunique()
user_streams = df_target['stream'].nunique()

# 2. Longest Streak (Consecutive Streams)
# Get all unique streams in 2025 sorted by date to establish the timeline
all_streams_ordered = data_2025.sort_values('date')['stream'].unique()

# Identify which of those streams the target user participated in
user_streams_set = set(df_target['stream'].unique())

# Calculate the streak
current_streak = 0
longest_streak = 0

for stream in all_streams_ordered:
    if stream in user_streams_set:
        current_streak += 1
        longest_streak = max(longest_streak, current_streak)
    else:
        current_streak = 0

# 3. Most Chatted Stream (Date and Message Count)
if not df_target.empty:
    # Group by stream identifier to find the one with most messages
    stream_counts = df_target.groupby('stream').size()
    most_active_stream_id = stream_counts.idxmax()
    most_active_stream_msgs = stream_counts.max()
    
    # Find the primary date for this stream (the most frequent date associated with this stream ID)
    stream_date = df_target[df_target['stream'] == most_active_stream_id]['date'].dt.date.mode()[0]
    most_chatted_stream_info = f"{stream_date} (Stream ID: {most_active_stream_id})"
else:
    most_chatted_stream_info = "N/A"
    most_active_stream_msgs = 0

# 4. Total Messages Sent
total_messages = len(df_target)

# 5. Mentions by Others (Count "JBIN2036" or "jbin" in messages from others)
df_others = data_2025[data_2025['user'] != target_user]
# Regex: (?i) for case-insensitive, \b for word boundaries
mention_pattern = r'(?i)\btreklul\b|\b@treklul\b|\b@trek44\b|\btrek44\b|\bttrek_\b|\b@ttrek_\b|\btrek_x\b|\b@trek_x\b|\btrek\b'
mentions_mask = df_others['message'].str.contains(mention_pattern, regex=True, na=False)
total_mentions_by_others = mentions_mask.sum()

# 6. Who Mentioned Him the Most
if total_mentions_by_others > 0:
    top_mentioner = df_others[mentions_mask]['user'].value_counts().idxmax()
    top_mentioner_count = df_others[mentions_mask]['user'].value_counts().max()
else:
    top_mentioner = "None"
    top_mentioner_count = 0

# 7. Messages Containing "@"
msgs_with_at = df_target['message'].str.contains('@', na=False).sum()

# 8. Who He Mentioned Most (with count)
if not df_target.empty:
    # Extract all words starting with @
    mentions_extracted = df_target['message'].str.extractall(r'(@\w+)')
    if not mentions_extracted.empty:
        counts = mentions_extracted[0].value_counts()
        top_user = counts.idxmax()
        top_count = counts.max()
        most_mentioned_user = f"{top_user} ({top_count} times)"
    else:
        most_mentioned_user = "None"
else:
    most_mentioned_user = "None"

# 9. Top 5 Emotes Used (from final_emote_list)
# Assuming final_emote_list is defined in your environment
emote_counts = {}
if not df_target.empty and 'final_emote_list' in locals():
    for emote in final_emote_list:
        # Count exact word matches for the emote
        # re.escape ensures special characters in emote names don't break regex
        pattern = r'\b' + re.escape(emote) + r'\b'
        count = df_target['message'].str.count(pattern).sum()
        if count > 0:
            emote_counts[emote] = count
    
    # Sort by count descending and take top 5
    top_5_emotes = sorted(emote_counts.items(), key=lambda x: x[1], reverse=True)[:5]
else:
    top_5_emotes = []

# 10. Average Words Per Message
if not df_target.empty:
    avg_words = df_target['message'].str.split().apply(len).mean()
else:
    avg_words = 0

# 11. Most Common 5-minute Interval
if not df_target.empty:
    # Calculate minutes from midnight
    minutes_from_midnight = df_target['date'].dt.hour * 60 + df_target['date'].dt.minute
    # Integer divide by 5 to get the "bin" index
    bin_index = minutes_from_midnight // 5
    most_common_bin = bin_index.value_counts().idxmax()
    
    # Convert back to time string HH:MM
    start_hour = (most_common_bin * 5) // 60
    start_min = (most_common_bin * 5) % 60
    most_active_time = f"{int(start_hour):02d}:{int(start_min):02d}"
else:
    most_active_time = "N/A"

# --- OUTPUT RESULTS ---
print(f"--- Stats for {target_user} in 2025 ---")
print(f"1. Participation: Chatted in {user_streams} out of {total_streams} total streams")
print(f"2. Longest Streak: {longest_streak} consecutive streams")
print(f"3. Most Chatted Stream: {most_chatted_stream_info} with {most_active_stream_msgs} messages")
print(f"4. Total Messages Sent: {total_messages}")
print(f"5. Mentions by Others: {total_mentions_by_others} times")
print(f"6. Top Mentioner: {top_mentioner} ({top_mentioner_count} times)")
print(f"7. Messages with '@': {msgs_with_at}")
print(f"8. Who He Mentioned Most: {most_mentioned_user}")
print(f"9. Top 5 Emotes: {top_5_emotes}")
print(f"10. Avg Words/Message: {avg_words:.2f}")
print(f"11. Most Common Time: {most_active_time} (5-min interval)")

--- Stats for trek44_ in 2025 ---
1. Participation: Chatted in 284 out of 308 total streams
2. Longest Streak: 121 consecutive days
3. Most Chatted Stream: 2025-03-14 (Stream ID: 265) with 841 messages
4. Total Messages Sent: 43682
5. Mentions by Others: 4354 times
6. Top Mentioner: W1r3lesss (330 times)
7. Messages with '@': 3050
8. Who He Mentioned Most: @banties_x (265 times)
9. Top 5 Emotes: [('ome44', 14884), ('speed1', 4848), ('Banger', 4010), ('Smurfing', 3079), ('OMEYES', 2482)]
10. Avg Words/Message: 3.89
11. Most Common Time: 16:00 (5-min interval)


2026 Starts here

In [30]:
new_emotes = [
    'ome62','Staredown','PentagramOfFarallah','Gloving','footgammaRadiation',
    'RAAAAAAAAGH','praise','doroRage','AndreSmithing','omePIECE','Rime',
    'doroPIECE','kaiReading','Deadge','furi','blub','duh','nuh','gopissgirl',
    'plong','Awesome','o','feaky','DoroThinking','evol','doro18','staycalm',
    'omeFOWL','s','fairs','deal','moshimoshi','gn'
]

final_emote_list = list(set(final_emote_list) | set(new_emotes))


In [31]:
# Filter only January 2026
jan_2026 = data[(data["date"].dt.year == 2026) & (data["date"].dt.month == 1)]

# Find the first message date for each user
first_messages = data.groupby("user")["date"].min().reset_index()

# Filter users whose first message was in January 2026
new_chatters = first_messages[
    (first_messages["date"].dt.year == 2026) & (first_messages["date"].dt.month == 1)
]

# Get the number of new chatters
num_new_chatters = new_chatters["user"].nunique()

print(f"Number of new chatters in Jan 2026: {num_new_chatters}")

stream_counts = jan_2026['stream'].value_counts().reset_index()
print(f"Number of Streams in Jan 2026: {len(stream_counts)}")
print(f"Number of Messages in Jan 2026: {jan_2026.shape[0]}")
print(f"Number of Users in Jan 2026: {jan_2026['user'].nunique()}")

# Count number of messages per user in January 2026
user_counts = jan_2026['user'].value_counts().reset_index()

# Rename columns for clarity
user_counts.columns = ['user', 'message_count']

# Get the top 10 users
top_10_users = user_counts.head(10)
print("\nTop 10 users in Jan 2026:")
print(top_10_users)

# --- Emote Analysis ---
# Initialize a Counter to store emote frequencies
emote_counter = Counter()

# Go through each message and count emotes
for message in jan_2026["message"]:
    words = message.split()
    for word in words:
        if word in final_emote_list:
            emote_counter[word] += 1

# Get top 10 emotes
top_10_emotes = emote_counter.most_common(10)

print("\nTop 10 emotes in Jan 2026:")
for emote, count in top_10_emotes:
    print(f"{emote}: {count} times")

# --- Activity Spikes ---
# Make a copy to avoid SettingWithCopyWarning
jan_2026 = jan_2026.copy()

# Round timestamps to nearest 5-minute interval
jan_2026["5min"] = jan_2026["date"].dt.floor("5min")

# Count messages per 5-minute interval
message_counts = jan_2026.groupby("5min").size().reset_index(name="message_count")

# Get top 5 busiest 5-minute intervals
top_5_fastest = message_counts.sort_values("message_count", ascending=False).head(5)
print("\nTop 5 busiest 5-minute intervals in Jan 2026:")
print(top_5_fastest)

# --- Stream Velocity ---
# Group by stream and compute message counts and time range
stream_stats = jan_2026.groupby("stream").agg(
    message_count=("message", "count"),
    start_time=("date", "min"),
    end_time=("date", "max")
).reset_index()

# Compute duration in minutes
stream_stats["duration_min"] = (stream_stats["end_time"] - stream_stats["start_time"]).dt.total_seconds() / 60

# Avoid division by zero
stream_stats = stream_stats[stream_stats["duration_min"] > 0]

# Calculate messages per minute
stream_stats["messages_per_min"] = stream_stats["message_count"] / stream_stats["duration_min"]

# Get the stream with the highest messages per minute
fastest_stream = stream_stats.sort_values("messages_per_min", ascending=False).head(1)
print("\nStream with the highest messages per minute:")
print(fastest_stream)

# --- Loyalty Check ---
# Get all unique stream IDs in Jan
all_streams = set(jan_2026["stream"].unique())

# Group by user and get the set of streams each user chatted in
user_streams = jan_2026.groupby("user")["stream"].apply(set)

# Filter users who chatted in every stream
active_every_stream = user_streams[user_streams == all_streams]

# Get just the user names
users_in_every_stream = active_every_stream.index.tolist()
print("\nChatters who chatted in every stream in Jan 2026:")
print(users_in_every_stream)

Number of new chatters in Jan 2026: 5120
Number of Streams in Jan 2026: 28
Number of Messages in Jan 2026: 245324
Number of Users in Jan 2026: 11642

Top 10 users in Jan 2026:
            user  message_count
0       erdeedge          12388
1      rafa30___          10540
2    polimpompis          10465
3       JBIN2036           9478
4       nishad13           8168
5       HALP____           7328
6        Muuskie           7228
7     balintboss           5970
8        Odah_02           4885
9  lajosbarnabas           4395

Top 10 emotes in Jan 2026:
hi: 9452 times
WW: 6907 times
LOL: 3798 times
omeFaded: 3583 times
ome44: 3560 times
bye: 3328 times
sob: 3252 times
mhm: 3032 times
OMEYES: 2680 times
OOOO: 2627 times

Top 5 busiest 5-minute intervals in Jan 2026:
                          5min  message_count
50   2026-01-03 15:35:00+01:00            618
1409 2026-01-30 16:55:00+01:00            613
1408 2026-01-30 16:50:00+01:00            568
393  2026-01-10 18:30:00+01:00            53