In [1]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
df = pd.read_csv('data/trumptweets.csv')

In [3]:
df.head()

Unnamed: 0,id,link,content,date,retweets,favorites,mentions,hashtags,geo
0,1698308935,https://twitter.com/realDonaldTrump/status/169...,Be sure to tune in and watch Donald Trump on L...,2009-05-04 20:54:25,500,868,,,
1,1701461182,https://twitter.com/realDonaldTrump/status/170...,Donald Trump will be appearing on The View tom...,2009-05-05 03:00:10,33,273,,,
2,1737479987,https://twitter.com/realDonaldTrump/status/173...,Donald Trump reads Top Ten Financial Tips on L...,2009-05-08 15:38:08,12,18,,,
3,1741160716,https://twitter.com/realDonaldTrump/status/174...,New Blog Post: Celebrity Apprentice Finale and...,2009-05-08 22:40:15,11,24,,,
4,1773561338,https://twitter.com/realDonaldTrump/status/177...,"""My persona will never be that of a wallflower...",2009-05-12 16:07:28,1399,1965,,,


In [4]:
model = SentimentIntensityAnalyzer()

In [5]:
def get_sentiment(text):
    scores = model.polarity_scores(text)
    return scores.get('compound')

In [6]:
%time df['sentiment'] = np.vectorize(get_sentiment)(df.content)

Wall time: 6.86 s


In [7]:
df.head()

Unnamed: 0,id,link,content,date,retweets,favorites,mentions,hashtags,geo,sentiment
0,1698308935,https://twitter.com/realDonaldTrump/status/169...,Be sure to tune in and watch Donald Trump on L...,2009-05-04 20:54:25,500,868,,,,0.5255
1,1701461182,https://twitter.com/realDonaldTrump/status/170...,Donald Trump will be appearing on The View tom...,2009-05-05 03:00:10,33,273,,,,0.7712
2,1737479987,https://twitter.com/realDonaldTrump/status/173...,Donald Trump reads Top Ten Financial Tips on L...,2009-05-08 15:38:08,12,18,,,,0.6468
3,1741160716,https://twitter.com/realDonaldTrump/status/174...,New Blog Post: Celebrity Apprentice Finale and...,2009-05-08 22:40:15,11,24,,,,0.0
4,1773561338,https://twitter.com/realDonaldTrump/status/177...,"""My persona will never be that of a wallflower...",2009-05-12 16:07:28,1399,1965,,,,0.0


In [21]:
df['retweets'] = df['retweets'].replace(np.nan, df['retweets'].mean)

In [26]:
df.to_csv('trump_sentiment.csv')

In [27]:
df['is_positive'] = df.sentiment>0

In [32]:
import spacy
import re

In [36]:
nlp = spacy.load('en_core_web_sm')

def get_entities(text):
    doc = nlp(text)
    list_ = []
    for ent in doc.ents:
        if ent.label_.lower() not in ('time', 'money', 'date'):
            temp_  = re.sub('[^a-z0-9]+', '_', ent.text.lower())
            list_.append('__'.join([temp_, ent.label_.upper()]))
    return ''.join(list_)

In [37]:
%time df['entities'] = np.vectorize(get_entities)(df.content)

Wall time: 5min 57s


In [39]:
df[df['sentiment']<0]

Unnamed: 0,id,link,content,date,retweets,favorites,mentions,hashtags,geo,sentiment,is_positive,entities
26,2222067805,https://twitter.com/realDonaldTrump/status/222...,"RE: FB Vanity URLs: SF Chronicle - ""David Beck...",2009-06-18 15:26:53,18,17,,,,-0.2263,False,sf_chronicle__ORGdavid_beckham__PERSONone__CAR...
34,2650952843,https://twitter.com/realDonaldTrump/status/265...,Donald Trump backs 'Apprentice' Randal Pinkett...,2009-07-15 15:40:35,6,6,,,,-0.0516,False,donald_trump__PERSONrandal_pinkett__PERSONn_j_...
41,3450626731,https://twitter.com/realDonaldTrump/status/345...,Watch the Miss Universe competition LIVE from ...,2009-08-21 16:32:45,15,6,@9pm,,,-0.1531,False,bahamas__GPE8_23__CARDINALnbc__ORG
42,3498743628,https://twitter.com/realDonaldTrump/status/349...,Reminder: The Miss Universe competition will b...,2009-08-23 23:12:37,10,12,@9pm,,,-0.1531,False,universe__PERSONnbc__ORG
44,3688564134,https://twitter.com/realDonaldTrump/status/368...,- More hysterical DSRL videos featuring Donald...,2009-09-01 15:55:34,56,58,,,,-0.1010,False,dsrl__PERSONdonald_trump__PERSONdouble_trump__...
...,...,...,...,...,...,...,...,...,...,...,...,...
41111,1218674016942219265,https://twitter.com/realDonaldTrump/status/121...,"A massive 200 Billion Dollar Sea Wall, built a...",2020-01-19 00:18:22,25758,125103,,,,-0.5562,False,new_york__GPE
41113,1218697758728769536,https://twitter.com/realDonaldTrump/status/121...,If you listened to the flawed advice of @ paul...,2020-01-19 01:52:42,16643,69632,@ @,,,-0.8268,False,the_record_breaking_stock_market__ORGfacts__ORG
41114,1218717833552711680,https://twitter.com/realDonaldTrump/status/121...,"“Nancy Pelosi said, it’s not a question of pro...",2020-01-19 03:12:28,35475,149694,@ @,,,-0.5242,False,nancy_pelosi__PERSON
41118,1219004689716412416,https://twitter.com/realDonaldTrump/status/121...,Now Mini Mike Bloomberg is critical of Jack Wi...,2020-01-19 22:12:20,36239,149571,,,,-0.8070,False,mini_mike_bloomberg__PERSONjack_wilson__PERSON...


In [42]:
df[['sentiment','entities']]

Unnamed: 0,sentiment,entities
0,0.5255,donald_trump__PERSONdavid_letterman__PERSONthe...
1,0.7712,donald_trump__PERSONcelebrity_apprentice__PERS...
2,0.6468,donald_trump__PERSONtop_ten_financial_tips__OR...
3,0.0000,new_blog_post__ORG
4,0.0000,_donald_j_trump__PERSON
...,...,...
41117,0.8858,the_republican_party__ORGstrong__GPE
41118,-0.8070,mini_mike_bloomberg__PERSONjack_wilson__PERSON...
41119,0.9402,the_great_state_of__FACtexas__GPEamerica__GPEf...
41120,-0.6588,house__ORGmark_levin___PERSONfoxnews__PRODUCT
