In [1]:
import mwclient
import time

site = mwclient.Site('en.wikipedia.org')
page = site.pages['Bitcoin']

In [2]:
revs = list(page.revisions())

In [3]:
revs[0]

OrderedDict([('revid', 1299150551),
             ('parentid', 1297288956),
             ('user', 'Gjb0zWxOb'),
             ('timestamp',
              time.struct_time(tm_year=2025, tm_mon=7, tm_mday=6, tm_hour=20, tm_min=51, tm_sec=5, tm_wday=6, tm_yday=187, tm_isdst=-1)),
             ('comment', 'state strategic reserves and cleanup/links')])

In [4]:
revs = sorted(revs, key=lambda rev: rev["timestamp"]) 

In [5]:
revs[0]

OrderedDict([('revid', 275832581),
             ('parentid', 0),
             ('user', 'Pratyeka'),
             ('timestamp',
              time.struct_time(tm_year=2009, tm_mon=3, tm_mday=8, tm_hour=16, tm_min=41, tm_sec=7, tm_wday=6, tm_yday=67, tm_isdst=-1)),
             ('comment', 'creation (stub)')])

In [21]:
# First, you need to install the required packages
# Run these commands in a cell before your code:
!pip install torch
!pip install transformers

# Then your original code will work:
import torch
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

def find_sentiment(text):
    sent = sentiment_pipeline([text[:250]])[0]
    score = sent["score"]
    if sent["label"] == "NEGATIVE":
        score *= -1
    return score

Collecting torch
  Downloading torch-2.7.1-cp313-cp313-win_amd64.whl.metadata (28 kB)
Downloading torch-2.7.1-cp313-cp313-win_amd64.whl (216.1 MB)
   ---------------------------------------- 0.0/216.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/216.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/216.1 MB ? eta -:--:--
   ---------------------------------------- 0.3/216.1 MB ? eta -:--:--
   ---------------------------------------- 0.3/216.1 MB ? eta -:--:--
   ---------------------------------------- 0.5/216.1 MB 553.3 kB/s eta 0:06:30
   ---------------------------------------- 0.5/216.1 MB 553.3 kB/s eta 0:06:30
   ---------------------------------------- 0.5/216.1 MB 553.3 kB/s eta 0:06:30
   ---------------------------------------- 0.5/216.1 MB 553.3 kB/s eta 0:06:30
   ---------------------------------------- 0.5/216.1 MB 553.3 kB/s eta 0:06:30
   ---------------------------------------- 0.8/216.1 MB 341.8 kB/s eta 0:10:30
   ---------------

ERROR: Could not install packages due to an OSError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\js573\\AppData\\Local\\Temp\\pip-unpack-mqrgd7x0\\torch-2.7.1-cp313-cp313-win_amd64.whl'
Consider using the `--user` option or check the permissions.





ModuleNotFoundError: No module named 'torch'

In [8]:
# First, define the find_sentiment function or import it
def find_sentiment(text):
    # This is a placeholder implementation
    # Replace with your actual sentiment analysis logic
    return 0  # Return neutral sentiment as default

edits = {}

for rev in revs:        
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)
    
    edits[date]["edit_count"] += 1
    
    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))

In [9]:
from statistics import mean

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0
    
    del edits[key]["sentiments"]

In [10]:
import pandas as pd

edits_df = pd.DataFrame.from_dict(edits, orient="index")

In [11]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,0,0.0
2009-08-05,1,0,0.0
2009-08-06,2,0,0.0
2009-08-14,1,0,0.0
2009-10-13,2,0,0.0
...,...,...,...
2025-05-25,6,0,0.0
2025-06-09,1,0,0.0
2025-06-13,1,0,0.0
2025-06-25,1,0,0.0


In [12]:
edits_df.index = pd.to_datetime(edits_df.index)

In [13]:
from datetime import datetime

dates = pd.date_range(start="2009-03-08",end=datetime.today())

In [14]:
edits_df = edits_df.reindex(dates, fill_value=0)

In [15]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,0,0.0
2009-03-09,0,0,0.0
2009-03-10,0,0,0.0
2009-03-11,0,0,0.0
2009-03-12,0,0,0.0
...,...,...,...
2025-07-03,0,0,0.0
2025-07-04,0,0,0.0
2025-07-05,0,0,0.0
2025-07-06,1,0,0.0


In [16]:
rolling_edits = edits_df.rolling(30, min_periods=30).mean()

In [17]:
rolling_edits = rolling_edits.dropna()

In [18]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-04-06,0.133333,0.0,0.0
2009-04-07,0.000000,0.0,0.0
2009-04-08,0.000000,0.0,0.0
2009-04-09,0.000000,0.0,0.0
2009-04-10,0.000000,0.0,0.0
...,...,...,...
2025-07-03,0.100000,0.0,0.0
2025-07-04,0.100000,0.0,0.0
2025-07-05,0.100000,0.0,0.0
2025-07-06,0.133333,0.0,0.0


In [19]:
rolling_edits.to_csv("wikipedia_edits.csv")