<a href="https://colab.research.google.com/github/jamesodukoya/BitcoinPricePrediction/blob/main/Bitcoin_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Bitcoin Sentiment Analysis Based on Wiki Entries

##Installing Wikipedia Package

In [None]:
!pip install mwclient

##Downloading Bitcoin Wikipedia Page Edits

In [None]:
import mwclient
import time

site=mwclient.Site('en.wikipedia.org')
page=site.pages["Bitcoin"]

In [None]:
revs=list(page.revisions())

In [52]:
revs[0]

  and should_run_async(code)


OrderedDict([('revid', 1186191035),
             ('parentid', 1186190249),
             ('user', 'A455bcd9'),
             ('timestamp',
              time.struct_time(tm_year=2023, tm_mon=11, tm_mday=21, tm_hour=14, tm_min=15, tm_sec=2, tm_wday=1, tm_yday=325, tm_isdst=-1)),
             ('comment', '/* 2017–2019 */ cutting clutter')])

In [None]:
revs=sorted(revs, key=lambda rev: rev['timestamp'])

##Determining the Sentiment of Each Edit Using Transformers Package

In [None]:
from transformers import pipeline

In [None]:
sentiment_pipeline=pipeline("sentiment-analysis")

def find_sentiment(text):
  sent=sentiment_pipeline([text[:250]])[0]
  score=sent['score']
  if sent['label']=='NEGATIVE':
    score*=-1
  return score

###Calculate Total Number of Edits Per Day

In [None]:
edits = {}

for rev in revs:
    date = time.strftime("%Y-%m-%d", rev["timestamp"])
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)

    edits[date]["edit_count"] += 1

    comment = rev.get("comment", "")
    edits[date]["sentiments"].append(find_sentiment(comment))

###Determine the Average Sentiment Score Per Day

In [None]:
from statistics import mean

for key in edits:
    if len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        edits[key]["sentiment"] = 0
        edits[key]["neg_sentiment"] = 0

    del edits[key]["sentiments"]

###Pass Results into a Pandas Dataframe

In [58]:
import pandas as pd

edits_df = pd.DataFrame.from_dict(edits, orient="index")

edits_df

  and should_run_async(code)


Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,-0.550525,0.750000
2009-08-05,1,0.748121,0.000000
2009-08-06,2,0.995746,0.000000
2009-08-14,1,0.930021,0.000000
2009-10-13,2,-0.227501,0.500000
...,...,...,...
2023-11-17,3,0.203373,0.333333
2023-11-18,2,-0.996377,1.000000
2023-11-19,17,-0.977597,1.000000
2023-11-20,120,-0.597189,0.791667


In [None]:
edits_df.index = pd.to_datetime(edits_df.index)

###Filling in Zero Values for Days Without Edits

In [None]:
from datetime import datetime

dates = pd.date_range(start="2009-03-08", end=datetime.today())
edits_df = edits_df.reindex(dates, fill_value=0)

In [61]:
edits_df

  and should_run_async(code)


Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-08,4,-0.550525,0.750000
2009-03-09,0,0.000000,0.000000
2009-03-10,0,0.000000,0.000000
2009-03-11,0,0.000000,0.000000
2009-03-12,0,0.000000,0.000000
...,...,...,...
2023-11-17,3,0.203373,0.333333
2023-11-18,2,-0.996377,1.000000
2023-11-19,17,-0.977597,1.000000
2023-11-20,120,-0.597189,0.791667


In [62]:
rolling_edits = edits_df.rolling(30, min_periods=30).mean()
rolling_edits = rolling_edits.dropna()
rolling_edits

  and should_run_async(code)


Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-04-06,0.133333,-0.018351,0.025000
2009-04-07,0.000000,0.000000,0.000000
2009-04-08,0.000000,0.000000,0.000000
2009-04-09,0.000000,0.000000,0.000000
2009-04-10,0.000000,0.000000,0.000000
...,...,...,...
2023-11-17,0.566667,-0.024696,0.161111
2023-11-18,0.633333,-0.057909,0.194444
2023-11-19,1.200000,-0.090495,0.227778
2023-11-20,5.200000,-0.110402,0.254167


In [None]:
rolling_edits.to_csv("wikipedia_edits.csv")