After cleaning the tweets, we classify tweets into positive, negative, or neutral sentiments using popular sentiment analysis tools such as TextBlob, VADER, BERT, and FinBERT, etc.

In [None]:
import pandas as pd
import re
import datetime
import numpy as np

# Load tweets

In [None]:
tweets = pd.read_csv('02-tweets_cleaned.csv',
                     lineterminator='\n', 
                     parse_dates=['datetime', 'date'])
print (len(tweets))

  interactivity=interactivity, compiler=compiler, result=result)


16865848


In [None]:
%time
tweets.sort_values(by=['datetime'], ignore_index=True, inplace=True)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 8.11 µs


# Sentiment Analysis

### Vader

In [None]:
%%capture
!pip install vaderSentiment

In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
%%capture
nltk.download('vader_lexicon')

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
sid_obj = SentimentIntensityAnalyzer()
def vader(sentence):
  sentiment_dict = sid_obj.polarity_scores(sentence)
  return sentiment_dict['compound']

In [None]:
%%time
#Polarity by vader
tweets['polarity_vader'] = tweets['text_glove'].map(vader)

CPU times: user 14min 18s, sys: 1.29 s, total: 14min 19s
Wall time: 14min 21s


### TextBlob

In [None]:
from textblob import TextBlob

In [None]:
def get_tweet_sentiment(tweet):
        
    '''
    Utility function to classify sentiment of passed tweet
    using textblob's sentiment method
    '''
    # create TextBlob object of passed tweet text
    analysis = TextBlob(tweet)

    return analysis.sentiment.polarity

In [None]:
%%time
#Polarity by textblob
tweets['polarity_textblob'] = tweets['text_glove'].map(get_tweet_sentiment)

### Flair

In [None]:
%%capture
!pip install flair

In [None]:
from flair.models import TextClassifier
from flair.data import Sentence

In [None]:
classifier = TextClassifier.load('en-sentiment')

2022-07-05 06:18:20,108 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpjmvc6g_8


100%|██████████| 265512723/265512723 [00:12<00:00, 20789457.94B/s]

2022-07-05 06:18:33,296 copying /tmp/tmpjmvc6g_8 to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2022-07-05 06:18:34,019 removing temp file /tmp/tmpjmvc6g_8
2022-07-05 06:18:34,093 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

In [None]:
from tqdm import tqdm
tqdm.pandas(desc="Progress: ")

In [None]:
def predict_flair(sentence):
    """ Predict the sentiment of a sentence """
    text = Sentence(sentence)
    # stacked_embeddings.embed(text)
    classifier.predict(text)
    value = text.labels[0].to_dict()['value'] 
    if value == 'POSITIVE':
        result = text.to_dict()['all labels'][0]['confidence']
    else:
        result = -(text.to_dict()['all labels'][0]['confidence'])
    return round(result, 3)

In [None]:
%%time
tweets['text_glove'][:1000].map(predict_flair)

CPU times: user 1min 10s, sys: 248 ms, total: 1min 10s
Wall time: 1min 14s


0      0.980
1      0.999
2      0.998
3     -0.916
4     -0.999
       ...  
995    0.998
996   -1.000
997    0.567
998   -0.808
999   -0.992
Name: text_glove, Length: 1000, dtype: float64

### Transformers

In [None]:
%%capture
!pip install transformers
from transformers import pipeline

#### Bert model

In [None]:
classifier = pipeline('sentiment-analysis')

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [None]:
def bert_sentiment(tweet):
    res = classifier(tweet)[0]
    label = res['label']
    if label == 'POSITIVE':
      result = res['score']
    else:
      result = -(res['score'])
    return round(result, 3)

In [None]:
%%time
tweets['text_glove'][:1000].map(bert_sentiment)

CPU times: user 1min 14s, sys: 269 ms, total: 1min 14s
Wall time: 1min 17s


0      0.996
1      0.998
2      1.000
3     -0.969
4     -0.997
       ...  
995    0.986
996   -0.995
997   -0.978
998   -0.987
999   -0.998
Name: text_glove, Length: 1000, dtype: float64

#### FinBert model

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification

In [None]:
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

In [None]:
nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)

In [None]:
def finbert_sentiment(text):
    res = nlp(text)[0]
    if res['label'] == 'Negative':
      return -1
    elif res['label'] == 'Positive':
      return 1
    else:
      return 0

In [None]:
%%time
tweets['text_glove'][:1000].map(finbert_sentiment)

CPU times: user 2min 25s, sys: 526 ms, total: 2min 25s
Wall time: 2min 27s


0      1
1      1
2      1
3     -1
4      0
      ..
995    0
996    0
997    0
998    0
999    0
Name: text_glove, Length: 1000, dtype: int64

### Pattern

In [None]:
%%capture
!pip install pattern
nltk.download('omw-1.4')

In [None]:
from pattern.en import sentiment

(0.75, 0.8)


In [None]:
def pattern_sentiment(text):
  return sentiment(text)[0]

In [None]:
%%time
tweets['text_glove'][:1000].map(pattern_sentiment)

CPU times: user 280 ms, sys: 2.09 ms, total: 282 ms
Wall time: 283 ms


0      0.617273
1      0.213068
2      0.506534
3      0.112000
4      0.666667
         ...   
995    0.000000
996    0.000000
997    0.125000
998    0.866667
999    0.000000
Name: text_glove, Length: 1000, dtype: float64

### SpaCy

In [None]:
%%capture
!pip install spacy
!pip install spacytextblob

In [None]:
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob

In [None]:
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('spacytextblob')

<spacytextblob.spacytextblob.SpacyTextBlob at 0x7fa11cce2550>

In [None]:
def spacy_sentiment(text):
  doc = nlp(text)
  sentiment = doc._.blob.polarity
  return round(sentiment,3)

In [None]:
%%time
tweets['text_glove'][:1000].map(spacy_sentiment)

CPU times: user 9.92 s, sys: 34.6 ms, total: 9.95 s
Wall time: 9.95 s


0      0.522
1      0.213
2      0.507
3      0.112
4      0.000
       ...  
995    0.000
996    0.000
997    0.125
998    0.600
999    0.000
Name: text_glove, Length: 1000, dtype: float64

# Results

In [None]:
tweets.head()

Unnamed: 0,datetime,timestamp,username,text,text_clean,text_glove,polarity_textblob,polarity_vader
0,2021-01-01 00:00:02+00:00,2021-01-01 00:00:00+00:00,BTCMEXGlobal,"Now is 0:00 UTC on Jan 1, 2021🎇\n\n#BTCMEX her...",now is utc on jan btcmex hereby wishes every u...,now is utc on jan 🎇 btcmex hereby wishes every...,0.521591,0.9694
1,2021-01-01 00:00:03+00:00,2021-01-01 00:00:00+00:00,K_9Girl,#Bitcoin let's start this NEW Year! @CashApp l...,bitcoin let s start this new year let s go hap...,bitcoin let's start this new year! let's go! h...,0.213068,0.0
2,2021-01-01 00:00:03+00:00,2021-01-01 00:00:00+00:00,Bitstamp,Happy new year everybody! What an explosive ye...,happy new year everybody what an explosive yea...,happy new year everybody! what an explosive ye...,0.506534,0.4184
3,2021-01-01 00:00:04+00:00,2021-01-01 00:00:00+00:00,WSJ,High fees and volatility present obstacles to ...,high fees and volatility present obstacles to ...,high fees and volatility present obstacles to ...,0.112,0.1406
5,2021-01-01 00:00:09+00:00,2021-01-01 00:00:00+00:00,abubakarhayus,@WAZIRIHBS @whitenigerian Because I tell you s...,because i tell you say i get bitcoin guy na li...,because i tell you say i get bitcoin? guy na ...,0.0,0.6377


Save Results

In [None]:
tweets = tweets[['text', 'timestamp', 'polarity_textblob', 'polarity_vader', 'hashtags']]
tweets = tweets.rename(columns={'text': 'Text', 'polarity_textblob': 'Polarity_Textblob', 'timestamp' : 'Timestamp', 'polarity_vader' : 'Polarity_Vader', 'hashtags' : 'Hashtags'})