## Pre trained LSTM sentiment analizer

In [1]:
# pip3 install flair
# Attention ~1 GB Disk space

In [3]:
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set()
np.random.seed(0)

FNAME = "./data/btc_year_full.csv"

dataset = pd.read_csv(re.sub('.csv','_cleaned.csv', FNAME))
dataset.head()

Unnamed: 0,date,text,likes,retweets,cleaned_text
0,2018-12-01 15:13:48,Mainstream media is screaming that \xe2\x80\x9...,373,106,mainstream media scream bitcoin dead twitter t...
1,2018-12-01 22:59:36,My friends had no problem buying #bitcoin at $...,684,101,friend problem buy bitcoin $7k way $20k panic ...
2,2018-12-01 15:19:30,If you pay your Ohio taxes using #bitcoin then...,518,52,pay ohio tax use bitcoin owe even tax btc incr...
3,2018-12-01 18:57:06,Bearableguy123 screen shot on #Reddit \n\nBina...,100,39,bearableguy123 screen shot reddit binari code ...
4,2018-12-01 13:57:04,$BTC $BTCUSD #Bitcoin \n\nBulls taking profit ...,66,8,$btc $btcusd bitcoin bull take profit everi sm...


In [4]:
import flair
# Downloading can take 10 min
# Attention ~ 2Gb memory
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')

2019-12-08 06:13:20,570 loading file /home/hans/.flair/models/imdb-v0.4.pt


*Example*

In [5]:
sentence = 'I hate you very much, but sometimes I love lollipops'
s = flair.data.Sentence(sentence)
flair_sentiment.predict(s)
total_sentiment = s.labels
total_sentiment

[POSITIVE (0.9937134385108948)]

In [6]:
def findpolarity(sentence):
    s = flair.data.Sentence(sentence)
    flair_sentiment.predict(s)
    
    if 'POSITIVE' in str(s.labels):  
        sentiment = 1
    elif 'NEGATIVE' in str(s.labels):
        sentiment = -1 
    else: # No NEUTRALITY ?
        sentiment = 0
        
    return sentiment

*Example*

In [8]:
raw_tweets = dataset['text']
cleaned_tweets = dataset['cleaned_text']
idx = np.random.randint(0, len(raw_tweets))

print("Raw: ")
print("Tweet: ", raw_tweets[idx])
print("Polarity: ", findpolarity(raw_tweets[idx]))
print()
print("Cleaned: ")
print("Tweet: ", cleaned_tweets[idx])
print("Polarity: ", findpolarity(cleaned_tweets[idx]))

Raw: 
Tweet:  I\'m really, really excited for this release! Go @Get_Scatter ! #eos #blockchain #wallet #bitcoin #btc #scatterhttps://twitter.com/Get_Scatter/status/1142947586350993408 \xe2\x80\xa6
Polarity:  1

Cleaned: 
Tweet:  realli realli excit releas go scatter eo blockchain wallet bitcoin btc scatter scatter statu 1142947586350993408
Polarity:  1


In [None]:
from tqdm import tqdm

sentiment_cleaned = []
sentiment_raw = []
for i in tqdm(range(0, len(cleaned_tweets))):
    sentiment_cleaned.append(findpolarity(cleaned_tweets[i]))
    sentiment_raw.append(findpolarity(raw_tweets[i]))

  0%|          | 8/38266 [00:35<50:57:46,  4.80s/it]

## Endless executable time -> run on google colab

In [35]:
assert len(cleaned_tweets) == len(dataset['text'])
_n = len(raw_tweets)
_s = 0
for i in range(_n):
    if sentiment_cleaned[i] != sentiment_raw[i]:
        _s+=1
print("Total {} sentiments ain't equal from {}. The ratio is: {}".format(_s, _n, round(_s/_n, 3)))

Total 7 sentiments ain't equal from 20. The ratio is: 0.35


In [36]:
dataset['sentiment_cl'] = sentiment_cleaned
dataset['sentiment_raw'] = sentiment_raw
dataset.to_csv(re.sub('.csv','_sentiments_LSTM.csv',FNAME), header=True, index=False, encoding='utf-8')
dataset.head()

Unnamed: 0,date,text,likes,retweets,cleaned_text,sentiment_cl,sentiment_raw
0,2019-11-29 03:12:48,I will #giveaway $30 in #Bitcoin to someone r...,430,297,giveaway $30 bitcoin someon random follow like...,-1,1
1,2019-11-29 08:46:47,$100 #Bitcoin #Crypto #Giveaway\nONE PERSON GE...,1282,1208,$100 bitcoin crypto giveaway one person get $1...,-1,-1
2,2019-11-30 17:38:14,Announcement CODEO TOKEN for Bounty Program Pa...,57,15,announc codeo token bounti program particip co...,1,-1
3,2019-11-30 09:35:44,I will #Giveaway $100 #bitcoin to 2 people ran...,513,470,giveaway $100 bitcoin 2 peopl randomli $50 fol...,-1,-1
4,2019-11-30 03:02:32,“Bitcoin is shining now when we are worried ab...,13,7,bitcoin shine worri world amal amanpuri btc,1,1
