In [401]:
import pandas as pd
import textblob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 

In [402]:
vader_lex = pd.read_csv('vader_lexicon.txt', 
                   sep='\t',
                   usecols=[0, 1], 
                   names=['token', 'polarity'],
                   index_col='token')

In [400]:
csv_file = 'hashtagcoronavirus_covid19.csv'
column_name = 'tweet_text'
tweets_df = pd.read_csv(csv_file)
tweet_text = tweets_df.loc[:, (column_name)]

In [395]:
def clean_tweet(tweet): 
    '''
    1. Lowercase text
    2. Clean tweet text by removing links, special characters using simple regex statements. 
    
    Note: backslash x has polarity 0. Therefore, it does not affect polarity since vaper takes the sum of normalized lexicon ratings
    '''

    low = tweet.lower()
    #filtered = " ".join(filter(lambda x:x[:2]!='\\x', low.split()))
    no_punct = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", low).split())
    return no_punct
    

In [403]:
# Not used. But can use instead of manually computing polarity from vader_lexicon.txt

def sentiment_scores(sentence): 
  
    # Create a SentimentIntensityAnalyzer object. 
    sid_obj = SentimentIntensityAnalyzer() 
  
    # polarity_scores method of SentimentIntensityAnalyzer 
    # object gives a sentiment dictionary. 
    # which contains pos, neg, neu, and compound scores. 
    sentiment_dict = sid_obj.polarity_scores(sentence) 
    return sentiment_dict['compound']

In [396]:
for i in range(len(tweet_text)):
    tweets_df.at[i, "clean_text"] = clean_tweet(tweet_text[i][1:])

In [397]:
tweets_df

Unnamed: 0,timestamp,tweet_text,username,all_hashtags,followers_count,location,clean_text,polarity
0,2020-04-14 02:31:35,b'He gets up every day? Hasn\xe2\x80\x99t caug...,b'bigmusicfan71',"['coronavirus', 'COVID19', '25thAmendmentNow']",354,b'',he gets up every day hasn xe2 x80 x99t caught ...,0.0
1,2020-04-14 02:31:35,b'@Welptheresthat @NorthmanTrader Not when the...,b'GKeeto',"['china', 'ChinaMustExplain', 'ChinaVirus', 'C...",6,b'',not when the product is complete chinese made ...,0.0
2,2020-04-14 02:31:34,b'Holy \xf0\x9f\x92\xa9 \xe2\x81\xa6@VanityFai...,b'DrumpfsLies',"['INCOMPETENCEKILLS', 'COVID19', 'coronavirus'...",230,"b'California, USA'",holy xf0 x9f x92 xa9 xe2 x81 xa6 xe2 x81 xa9 t...,2.8
3,2020-04-14 02:31:32,b'This could be the end of the line for cruise...,b'1petermartin',"['COVID19', 'coronavirus', 'ausecon', 'auspol']",32382,b'',this could be the end of the line for cruise s...,0.0
4,2020-04-14 02:31:22,"b'Dear @CNN, Can you sincerely claim @realDon...",b'WarmMonkey',"['coronavirus', 'COVID19']",651,b'Out of The Closet',dear can you sincerely claim handling the coro...,1.3
...,...,...,...,...,...,...,...,...
995,2020-04-14 01:34:17,b'@JimWTAE @WTAE And here is a third Smithfiel...,b'realhumanrights',"['Wisconsin', 'Cudahy', 'COVID19', 'coronaviru...",2853,b'Washington DC',and here is a third smithfield food plant wisc...,2.6
996,2020-04-14 01:34:15,b'#TruthBeTold Abso-fucking-lutly!!! #Heartb...,b'AltBadDude',"['TruthBeTold', 'Heartbreaking', 'trumpsameric...",1308,"b'Maryland, USA'",truthbetold abso fucking lutly heartbreaking t...,-2.5
997,2020-04-14 01:34:15,b'Trump assembles his crew of X-Men to save us...,b'BoneKnightmare',"['EndOfDays', 'apocalypse2020', 'COVID19', 'co...",3760,b'Parts Unknown',trump assembles his crew of x men to save us w...,-1.0
998,2020-04-14 01:34:14,"b'A shameful incident of Jaunpur, this is how ...",b'itemads',"['CoronaVirus', 'CoronaVirusPandemic', 'Corona...",63,b'Jeddah',a shameful incident of jaunpur this is how the...,-2.2


In [398]:
tidy_format = (tweets_df["clean_text"].str.split(expand=True).stack().reset_index(level=1)
    .rename(columns={'level_1': 'num', 0: 'word'}))
tweets_df['polarity'] = tidy_format.merge(vader_lex, how='left', left_on='word', right_index=True).reset_index().loc[:, ['index', 'polarity']].groupby('index').sum().fillna(0)

In [399]:
tweets_df

Unnamed: 0,timestamp,tweet_text,username,all_hashtags,followers_count,location,clean_text,polarity
0,2020-04-14 02:31:35,b'He gets up every day? Hasn\xe2\x80\x99t caug...,b'bigmusicfan71',"['coronavirus', 'COVID19', '25thAmendmentNow']",354,b'',he gets up every day hasn xe2 x80 x99t caught ...,0.0
1,2020-04-14 02:31:35,b'@Welptheresthat @NorthmanTrader Not when the...,b'GKeeto',"['china', 'ChinaMustExplain', 'ChinaVirus', 'C...",6,b'',not when the product is complete chinese made ...,0.0
2,2020-04-14 02:31:34,b'Holy \xf0\x9f\x92\xa9 \xe2\x81\xa6@VanityFai...,b'DrumpfsLies',"['INCOMPETENCEKILLS', 'COVID19', 'coronavirus'...",230,"b'California, USA'",holy xf0 x9f x92 xa9 xe2 x81 xa6 xe2 x81 xa9 t...,2.8
3,2020-04-14 02:31:32,b'This could be the end of the line for cruise...,b'1petermartin',"['COVID19', 'coronavirus', 'ausecon', 'auspol']",32382,b'',this could be the end of the line for cruise s...,0.0
4,2020-04-14 02:31:22,"b'Dear @CNN, Can you sincerely claim @realDon...",b'WarmMonkey',"['coronavirus', 'COVID19']",651,b'Out of The Closet',dear can you sincerely claim handling the coro...,1.3
...,...,...,...,...,...,...,...,...
995,2020-04-14 01:34:17,b'@JimWTAE @WTAE And here is a third Smithfiel...,b'realhumanrights',"['Wisconsin', 'Cudahy', 'COVID19', 'coronaviru...",2853,b'Washington DC',and here is a third smithfield food plant wisc...,2.6
996,2020-04-14 01:34:15,b'#TruthBeTold Abso-fucking-lutly!!! #Heartb...,b'AltBadDude',"['TruthBeTold', 'Heartbreaking', 'trumpsameric...",1308,"b'Maryland, USA'",truthbetold abso fucking lutly heartbreaking t...,-2.5
997,2020-04-14 01:34:15,b'Trump assembles his crew of X-Men to save us...,b'BoneKnightmare',"['EndOfDays', 'apocalypse2020', 'COVID19', 'co...",3760,b'Parts Unknown',trump assembles his crew of x men to save us w...,-1.0
998,2020-04-14 01:34:14,"b'A shameful incident of Jaunpur, this is how ...",b'itemads',"['CoronaVirus', 'CoronaVirusPandemic', 'Corona...",63,b'Jeddah',a shameful incident of jaunpur this is how the...,-2.2
