In [1]:

import json
from requests_sse import EventSource
import pandas as pd
import time

url = 'https://stream.wikimedia.org/v2/stream/mediawiki.recentchange'

# Adding headers can help in case the server requires specific request formatting
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}

# Function to determine if the change is to a talk page
def is_talk_page(title):
    # Typically, talk pages start with "Talk:" or "<Language> talk:"
    # This will handle "Talk:", "User talk:", "Wikipedia talk:", etc.
    return any(title.lower().startswith(prefix) for prefix in ['talk:', 'wikipedia talk:', 'file talk:', 
                                                              'template talk:', 'help talk:', 'category talk:', 'portal talk:',
                                                              'book talk:', 'draft talk:', 'timedtext talk:', 'module talk:'])
# Helper function to clean the talk prefix
def clean_talk_prefix(title):
    prefixes = ['talk:', 'wikipedia talk:', 'file talk:', 'template talk:',
                'help talk:', 'category talk:', 'portal talk:', 'book talk:',
                'draft talk:', 'timedtext talk:', 'module talk:']
    title_lower = title.lower()
    for prefix in prefixes:
        if title_lower.startswith(prefix):
            return title[len(prefix):].strip()  # Remove prefix and extra spaces
    return title

data_list= [[],[],[]]
t_end = time.time() + 60
# Setting up the EventSource connection
with EventSource(url, headers=headers) as stream:
    for event in stream:
        if time.time() > t_end:
            break

        if event.type == 'message':
            try:
                # Parse the event data as JSON
                change = json.loads(event.data)
                # Check if the change is related to a talk page from Wikipedia
            
                if change['wiki'].endswith('wiki') and is_talk_page(change['title']) and change['bot'] == False and change['wiki']=='enwiki':
                    #get the number of bytes
                    old_len = change.get('length', {}).get('old') #old bytes
                    new_len = change.get('length', {}).get('new') #edited bytes
                    if old_len is not None and new_len is not None:
                        byte_diff = abs(new_len - old_len)
                    else:
                        byte_diff = None #in case no data
                
                    print('{user} edited {title}: {comment} with {byte_diff} bytes'.format(
                        user=change['user'], title=clean_talk_prefix(change['title']), comment = change['comment'], byte_diff = byte_diff))
                    data_list[0].append(clean_talk_prefix(change['title']))
                    data_list[1].append(change['comment'])
                    data_list[2].append(byte_diff)
                   
                    data = {
                        "Title": data_list[0],
                        "Comment": data_list[1],
                        "Bytes": data_list[2]
                    }
            
            except ValueError: 
                # In case of any issues in parsing JSON data
                continue

df = pd.DataFrame(data)
df.to_csv('edit.csv',index = False)







Shocksingularity edited Vital articles/Level/5/Society: /* Add [[Social stigma]] */ Closing discussion ([[User:DaxServer/DiscussionCloser|DiscussionCloser]] v.1.7.3-8) with 170 bytes
GustavoCza edited Coldplay: /* Lead */ Reply with 528 bytes
Kline edited 2024 British Columbia general election/Cariboo-Chilcotin: [[WP:AES|←]]Created page with '{{WP banner shell| {{WP Canada|bc=yes}} {{WP Elections}} }}' with None bytes
Simeon edited Dimitrios Patrinos: [[Wikipedia:WikiProject|WikiProject]] tagging with 25 bytes
Kline edited 2024 British Columbia general election/Coquitlam-Burke Mountain: [[WP:AES|←]]Created page with '{{WP banner shell| {{WP Canada|bc=yes}} {{WP Elections}} }}' with None bytes
Kline edited 2024 British Columbia general election/Coquitlam-Maillardville: [[WP:AES|←]]Created page with '{{WP banner shell| {{WP Canada|bc=yes}} {{WP Elections}} }}' with None bytes
Kline edited 2024 British Columbia general election/Courtenay-Comox: [[WP:AES|←]]Created page with '{{WP banner s