In [1]:
import pandas as pd

In [None]:
import requests
import csv
import time
import re
from urllib.parse import quote_plus

# === CONFIG ===
WIKI_API = "https://en.wikipedia.org/w/api.php"
PAGE_TITLE = "Queen (band)"         # change to any page title
OUT_CSV = "queen_editors.csv"
SLEEP_BETWEEN = 0.5                # polite pause between requests
USER_AGENT = "YourProjectName/0.1 (your_email@example.com)"

# === Helpers ===
def get_user_type(row):
    if any(c.isdigit() for c in row["user"].split(".") if row["user"].count(".") == 3):
        return "Anonymous (IP)"
    else:
        return "Registered"

def fetch_all_revisions(title):
    session = requests.Session()
    session.headers.update({"User-Agent": USER_AGENT})

    params = {
        "action": "query",
        "format": "json",
        "prop": "revisions",
        "titles": title,
        "rvprop": "user|timestamp|ids|comment",
        "rvlimit": "500",
        "formatversion": "2",
        "rvslots": "main"
    }

    all_revs = []
    cont = {}
    while True:
        if cont:
            params.update(cont)
        resp = session.get(WIKI_API, params=params, timeout=30)
        resp.raise_for_status()
        data = resp.json()
        pages = data.get("query", {}).get("pages", [])
        if not pages:
            break
        page = pages[0]
        revs = page.get("revisions", []) or []
        all_revs.extend(revs)

        # continuation handling
        if "continue" in data:
            cont = data["continue"]
            time.sleep(SLEEP_BETWEEN)
        else:
            break
    return all_revs

# === Run ===
revisions = fetch_all_revisions(PAGE_TITLE)
print(f"Fetched {len(revisions)} revisions")

# collect unique users and counts
user_counts = {}
for r in revisions:
    user = r.get("user", "(unknown)")
    user_counts[user] = user_counts.get(user, 0) + 1

# write CSV
with open(OUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["username", "is_ip", "edit_count"])
    for user, count in sorted(user_counts.items(), key=lambda x: -x[1]):
        writer.writerow([user, is_ip(user), count])

print(f"Wrote {len(user_counts)} unique users to {OUT_CSV}")
