In [4]:
pip install pycld2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import pycld2 as cld2
import pandas as pd 

#Data Clean


In [None]:
df_trump = pd.read_csv('hashtag_donaldtrump.csv',lineterminator='\n')
df_biden = pd.read_csv('hashtag_joebiden.csv',lineterminator='\n')

In [None]:
df_trump.shape[0],df_biden.shape[0]

In [8]:
import pycld2 as cld2
def detect_lang(tweets):
    try:
        isReliable, textBytesFound, details = cld2.detect(tweets)
        return details[0][0]
    except:
        return "not found"

In [9]:
trump_eng=df_trump['tweet'].apply(detect_lang)
biden_eng=df_biden['tweet'].apply(detect_lang)

In [10]:
trump_eng.shape[0]

970919

In [11]:
language_trump = [trump_eng[i] for i in range(len(trump_eng))]
language_biden = [biden_eng[i] for i in range(len(biden_eng))]
df_trump['language'] = language_trump
df_biden['language'] = language_biden

In [12]:
df_trump_eng = df_trump[df_trump['language'] == 'ENGLISH']
df_biden_eng = df_biden[df_biden['language'] == 'ENGLISH']

In [13]:
df_trump_eng_us = df_trump_eng[(df_trump_eng['country']=='United States of America')].reset_index().drop('index',axis=1)
df_biden_eng_us = df_biden_eng[(df_biden_eng['country']=='United States of America')].reset_index().drop('index',axis=1)
df_biden_eng_us.shape[0],df_trump_eng_us.shape[0]

(138367, 166328)

#Topic Modeling 

In [None]:
#the module 'sys' allows istalling module from inside Jupyter
import sys

!{sys.executable} -m pip install numpy
import numpy as np

!{sys.executable} -m pip install pandas
import pandas as pd

#Natrual Language ToolKit (NLTK)
!{sys.executable} -m pip install nltk
import nltk

!{sys.executable} -m pip install sklearn
from sklearn import metrics
#from sklearn.model_selection import GridSearchCV
from sklearn.feature_extraction.text import  CountVectorizer #bag-of-words vectorizer 
from sklearn.decomposition import LatentDirichletAllocation #package for LDA

# Plotting tools

from pprint import pprint
!{sys.executable} -m pip install pyLDAvis #visualizing LDA
import pyLDAvis
import pyLDAvis.sklearn

import matplotlib.pyplot as plt
%matplotlib inline

#define text normalization function
%run ./Text_Normalization_Function.ipynb #defining text normalization function

#ignore warnings about future changes in functions as they take too much space
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Normalization Trump

In [15]:
# Trump normalization
df_trump_eng_us.tweet=normalize_corpus(df_trump_eng_us.tweet)
df_trump_eng_us.tweet.head()

0    trump student use hear year ten year hear chin...
1          tie tie trump rally iowa http co jjaluumh5d
2    clady62 minute long time ago omarosa never rep...
3    deeviousdenise realdonaldtrump nypost wont man...
4    single effective remedy eradicate another roun...
Name: tweet, dtype: object

## Normalization Biden

In [16]:
# Biden normalization
df_biden_eng_us.tweet=normalize_corpus(df_biden_eng_us.tweet)
df_biden_eng_us.tweet.head()

0    islandgirlprv bradbeauregardj meidastouch bide...
1    censorship hunterbiden biden bidenemails biden...
2    nypost censorship censored twitter manipulate ...
3    fbi allegedly obtain hunter biden computer dat...
4    comment democrat understand ruthless china htt...
Name: tweet, dtype: object

In [17]:
#define the bag-of-words vectorizer:
bow_vectorizer_trump = CountVectorizer()
bow_vectorizer_biden = CountVectorizer()
#vectorize the normalized data:
trump=bow_vectorizer_trump.fit_transform(df_trump_eng_us.tweet)
biden=bow_vectorizer_biden.fit_transform(df_biden_eng_us.tweet)

## LDA Trump

In [18]:
lda_trump=LatentDirichletAllocation(n_components=10, max_iter=150, ##change the number of topics and iteration here
                                           doc_topic_prior = 0.9,
                                           topic_word_prior = 0.1).fit(trump)

## LDA Biden

In [19]:
lda_biden=LatentDirichletAllocation(n_components=10, max_iter=150, ##change the number of topics and iteration here
                                           doc_topic_prior = 0.9,
                                           topic_word_prior = 0.1).fit(biden)

In [20]:
def display_topics(model, feature_names, no_top_words):
    for topic_idx, topic in enumerate(model.components_):
        print("Topic %d:" % (topic_idx))
        print(" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))
        
def get_topic_words(vectorizer, lda_model, n_words):
    keywords = np.array(vectorizer.get_feature_names())
    topic_words = []
    for topic_weights in lda_model.components_:
        top_word_locs = (-topic_weights).argsort()[:n_words]
        topic_words.append(keywords.take(top_word_locs).tolist())
    return topic_words

## Topic Modeling Trump

In [21]:
# Trump Topic
no_top_words = 30
display_topics(lda_trump, bow_vectorizer_trump.get_feature_names(), no_top_words)

Topic 0:
trump covid19 covid via news new rally donald coronavirus live day case hold video pandemic watch https president youtube week politics death virus record penny whitehouse include since police number
Topic 1:
trump know think take good lie even debate give well last keep really debates2020 mean much talk never hear question start cant show person anything watch real na tonight thats
Topic 2:
president trump america gop american time us year country care white every great million tax world house pay blue first life votehimout usa could stand next remember history hope kill
Topic 3:
trump need campaign big fact use work medium twitter tweet believe everyone court may sure call claim put russia move fire post maybe fake story attack least face without hard
Topic 4:
biden trump election2020 election joebiden win bidenharris2020 joe electionday elections2020 poll lead electionresults2020 2020election politics fight kamalaharris call electionnight vote2020 biden2020 presidential vic

## Topic Modeling Biden

In [22]:
# Biden Topic
display_topics(lda_biden, bow_vectorizer_biden.get_feature_names(), no_top_words)

Topic 0:
like people america american year good country right need look love work im man life help much job care world great hope party feel hear put god thank ever speak
Topic 1:
trump biden trump2020 republican news voter cnn usa maga co black politics foxnews happen http read presidency donald fraud https breaking follow nbc endorse police ha tv nyc fox trump2020landslide
Topic 2:
obama biden campaign lie hunter family white hunterbiden bidens house run take corruption fact son office nothing never na another corrupt senate joebidens gon fbi email claim administration even crime
Topic 3:
co http biden election2020 bidenharris2020 vote biden2020 electionday elections2020 2020election vote2020 electionnight florida michigan texas wisconsin bidenharristosaveamerica trumpmeltdown pennsylvania votehimout electionresults2020 blacklivesmatter maga2020 counteveryvote bidenharrislandslide2020 2020elections bidenharris2020tosaveamerica votebidenharris2020 voteearly ohio
Topic 4:
biden democra

In [26]:
#prepare to display result in the Jupyter notebook
pyLDAvis.enable_notebook()

#run the visualization [mds is a function to use for visualizing the "distance" between topics]
pyLDAvis.sklearn.prepare(lda_trump, trump, bow_vectorizer_trump, mds='tsne')

In [45]:
#prepare to display result in the Jupyter notebook
pyLDAvis.enable_notebook()

#run the visualization [mds is a function to use for visualizing the "distance" between topics]
pyLDAvis.sklearn.prepare(lda_biden, biden, bow_vectorizer_biden, mds='tsne')

# Sentiment Analysis 



In [27]:
#packages needed

#ignore warnings about future changes in functions as they take too much space
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

#the module 'sys' allows istalling module from inside Jupyter
import sys

!{sys.executable} -m pip install numpy
import numpy as np 

!{sys.executable} -m pip install pandas
import pandas as pd

#Natrual Language ToolKit (NLTK)
!{sys.executable} -m pip install nltk
import nltk

#text normalization function
%run ./Text_Normalization_Function.ipynb

#ignore warnings about future changes in functions as they take too much space
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Original:   <p>The circus dog in a plissé skirt jumped over Python who wasn't that large, just 3 feet long.</p>
Processed:  ['<', 'p', '>', 'The', 'circus', 'dog', 'in', 'a', 'plissé', 'skirt', 'jumped', 'over', 'Python', 'who', 'was', "n't", 'that', 'large', ',', 'just', '3', 'feet', 'long.', '<', '/p', '>']
Original:   <p>The circus dog in a plissé skirt jumped over Python who wasn't that large, just 3 feet long.</p>
Processed:  <p>The circus dog in a plissé skirt jumped over Python who was not that large, just 3 feet long.</p>
Original:   <p>The circus dog in a plissé skirt jumped over Python who wasn't that large, just 3 feet long.</p>
Processed:  [('<', 'a'), ('p', 'n'), ('>', 'v'), ('the', None), ('circus', 'n'), ('dog', 'n'), ('in', None), ('a', None), ('plissé', 'n'), ('skirt', 'n'),

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [28]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [29]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [30]:
def analyze_sentiment_vader_lexicon(review, threshold = 0.1, verbose = False):
    scores = analyzer.polarity_scores(review)  
    binary_sentiment = 'positive' if scores['compound'] >= threshold else 'negative'
    if verbose:                             
        print('VADER Polarity (Binary):', binary_sentiment)
        print('VADER Score:', round(scores['compound'], 2))
    return binary_sentiment,scores['compound']  

## Trump

In [31]:
Trump_VADER_polarity_test = [analyze_sentiment_vader_lexicon(review, threshold=0.1) for review in df_trump_eng_us.tweet]
Trump_VADER_polarity_test_df = pd.DataFrame(Trump_VADER_polarity_test, columns = ['VADER Polarity','VADER Score'])
pd.DataFrame({"count":Trump_VADER_polarity_test_df['VADER Polarity'].value_counts(),"precentage":Trump_VADER_polarity_test_df['VADER Polarity'].value_counts(normalize=True)})


Unnamed: 0,count,precentage
negative,106954,0.643031
positive,59374,0.356969


In [32]:
df_trump_eng_us_polarity = pd.merge(df_trump_eng_us,Trump_VADER_polarity_test_df['VADER Polarity'],left_index=True, right_index=True)
df_trump_eng_us_polarity

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_description,...,lat,long,city,country,continent,state,state_code,collected_at,language,VADER Polarity
0,2020-10-15 00:00:02,1.316529e+18,trump student use hear year ten year hear chin...,2.0,1.0,Twitter Web App,8.436472e+06,snarke,snarke,"Will mock for food! Freelance writer, blogger,...",...,45.520247,-122.674195,Portland,United States of America,North America,Oregon,OR,2020-10-21 00:00:00.746433060,ENGLISH,positive
1,2020-10-15 00:00:08,1.316529e+18,tie tie trump rally iowa http co jjaluumh5d,4.0,3.0,Twitter for iPhone,4.741380e+07,Rana Abtar - رنا أبتر,Ranaabtar,"Washington Correspondent, Lebanese-American ,c...",...,38.894992,-77.036558,Washington,United States of America,North America,District of Columbia,DC,2020-10-21 00:00:01.492866121,ENGLISH,negative
2,2020-10-15 00:00:17,1.316529e+18,clady62 minute long time ago omarosa never rep...,2.0,0.0,Twitter for Android,1.138416e+09,Farris Flagg,FarrisFlagg,#BidenHarris2020 #JoeBiden2020 #KamalaHarrisFo...,...,33.782519,-117.228648,,United States of America,North America,California,CA,2020-10-21 00:00:01.866082651,ENGLISH,negative
3,2020-10-15 00:00:18,1.316529e+18,deeviousdenise realdonaldtrump nypost wont man...,0.0,0.0,Twitter for iPhone,9.007611e+17,Stacey Gulledge 🇺🇸 Patriot ♥️ KAG 🙏 👮‍♀️♥️,sm_gulledge,"Patriot, Wife, “Shaken not Stirred” Mom of two...",...,40.225357,-82.688140,,United States of America,North America,Ohio,OH,2020-10-21 00:00:02.612515712,ENGLISH,positive
4,2020-10-15 00:00:20,1.316529e+18,single effective remedy eradicate another roun...,0.0,0.0,Twitter Web App,5.404769e+08,Jamieo,jamieo33,"Don't know what I am. Can lean left and right,...",...,40.969989,-77.727883,,United States of America,North America,Pennsylvania,PA,2020-10-21 00:00:02.985732243,ENGLISH,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166323,2020-11-08 23:58:46,1.325589e+18,donaldjtrumpjr trump intimately familiar disgu...,0.0,0.0,Twitter for iPad,1.237758e+18,U N Known,pirkka2,Truth will save the world - not tRump. 🌎 VOTE ...,...,40.075738,-74.404162,,United States of America,North America,New Jersey,NJ,2020-11-09 17:47:55.744839,ENGLISH,negative
166324,2020-11-08 23:58:51,1.325589e+18,know decency trump gop know like child die bil...,3.0,0.0,Twitter Web App,4.835231e+07,"Linda ""I Voted for Biden"" Kenney Baden",KenneyBaden,"TrialLawyer https://t.co/1C0kKESSUC, Played by...",...,40.712728,-74.006015,New York,United States of America,North America,New York,NY,2020-11-09 17:47:55.833725,ENGLISH,negative
166325,2020-11-08 23:58:56,1.325589e+18,lionz den present white obama ready live faceb...,1.0,0.0,Twitter for Android,9.868225e+17,IUIC Minnesota,IUICMinnesota,,...,45.989659,-94.611329,,United States of America,North America,Minnesota,MN,2020-11-09 17:47:55.847439,ENGLISH,positive
166326,2020-11-08 23:59:05,1.325589e+18,trump patriot maga2020 http co 2a8fnm86ux,0.0,0.0,Twitter for Android,1.296581e+18,🍿🍷••HellWorld••🍷🍿,HellWor09724785,,...,34.233137,-102.410749,,United States of America,North America,Texas,TX,2020-11-09 17:47:55.939901,ENGLISH,negative


## Biden

In [33]:
Biden_VADER_polarity_test = [analyze_sentiment_vader_lexicon(review, threshold=0.1) for review in df_biden_eng_us.tweet]
Biden_VADER_polarity_test_df = pd.DataFrame(Biden_VADER_polarity_test, columns = ['VADER Polarity','VADER Score'])
Biden_VADER_polarity_test_df
pd.DataFrame({"count":Biden_VADER_polarity_test_df['VADER Polarity'].value_counts(),"precentage":Biden_VADER_polarity_test_df['VADER Polarity'].value_counts(normalize=True)})


Unnamed: 0,count,precentage
negative,79529,0.574769
positive,58838,0.425231


In [34]:
df_biden_eng_us_polarity = pd.merge(df_biden_eng_us,Biden_VADER_polarity_test_df['VADER Polarity'],left_index=True, right_index=True)
df_biden_eng_us_polarity

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_description,...,lat,long,city,country,continent,state,state_code,collected_at,language,VADER Polarity
0,2020-10-15 00:00:20,1.316529e+18,islandgirlprv bradbeauregardj meidastouch bide...,0.0,0.0,Twitter Web App,3.494182e+09,Flag Waver,Flag_Wavers,,...,46.304036,-109.171431,,United States of America,North America,Montana,MT,2020-10-21 00:00:01.035654566,ENGLISH,negative
1,2020-10-15 00:00:22,1.316529e+18,censorship hunterbiden biden bidenemails biden...,1.0,0.0,Twitter Web App,1.032807e+18,the Gold State,theegoldstate,A Silicon Valley #independent #News #Media #St...,...,36.701463,-118.755997,,United States of America,North America,California,CA,2020-10-21 00:00:02.071309132,ENGLISH,negative
2,2020-10-15 00:00:25,1.316529e+18,nypost censorship censored twitter manipulate ...,0.0,0.0,Twitter for iPhone,1.994033e+07,Change Illinois | Biden will increase taxes by...,changeillinois,"Illinois, home of Lincoln and Reagan, used to ...",...,41.875562,-87.624421,Chicago,United States of America,North America,Illinois,IL,2020-10-21 00:00:03.106963698,ENGLISH,negative
3,2020-10-15 00:00:57,1.316529e+18,fbi allegedly obtain hunter biden computer dat...,0.0,0.0,Twitter for Android,9.607387e+17,RLCompton,NewfoundStudio,"""There are things known and there are things u...",...,37.572603,-85.155141,,United States of America,North America,Kentucky,KY,2020-10-21 00:00:05.696100113,ENGLISH,negative
4,2020-10-15 00:01:23,1.316530e+18,comment democrat understand ruthless china htt...,0.0,0.0,Twitter Web App,1.016593e+08,John Ubaldi,ubaldireports,Just Facts... No Fiction on domestic and globa...,...,27.947760,-82.458444,Tampa,United States of America,North America,Florida,FL,2020-10-21 00:00:08.803063811,ENGLISH,negative
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138362,2020-11-08 23:58:09,1.325588e+18,election 2020elections trump biden http co cvk...,0.0,0.0,Twitter for iPhone,9.538012e+17,NY Gay and MAGA,NYMike912,Gay proud. not a liberal. I was always a Democ...,...,40.712728,-74.006015,New York,United States of America,North America,New York,NY,2020-11-09 18:32:45.608982,ENGLISH,negative
138363,2020-11-08 23:58:10,1.325588e+18,gop gopleader senatemajldr lindseygrahamsc tak...,0.0,0.0,Twitter for Android,2.975783e+09,OnCapeCodTime,CapeCodBluesArt,"Indigenous American w/Scottish, Irish, German ...",...,41.798807,-69.996014,,United States of America,North America,Massachusetts,MA,2020-11-09 18:32:45.973973,ENGLISH,negative
138364,2020-11-08 23:58:24,1.325589e+18,flotus im excite flotus whose vagina havent tw...,0.0,0.0,Twitter for iPhone,5.545625e+07,Caroline Billinson,cbillinson,my love language is dismantling the patriarchy.,...,38.894992,-77.036558,Washington,United States of America,North America,District of Columbia,DC,2020-11-09 18:32:45.841439,ENGLISH,positive
138365,2020-11-08 23:58:48,1.325589e+18,man need help usa biden http co f5dj8syt80,0.0,0.0,Twitter for Android,1.248047e+18,Dr J,DrJoeMcCarthy,Human. Free Thinker. Met Mandela. Personal. Fa...,...,43.519630,-114.315320,,United States of America,North America,Idaho,ID,2020-11-09 18:32:45.641087,ENGLISH,positive


#Sentiment by States

## Trump

In [35]:
df_trump_state = df_trump_eng_us_polarity[['state','VADER Polarity']].groupby(['state']).apply(lambda x:x['VADER Polarity'].value_counts(normalize=True))
df_trump_state

state                  
Alabama        negative    0.577640
               positive    0.422360
Alaska         negative    0.542763
               positive    0.457237
Arizona        negative    0.600457
                             ...   
West Virginia  positive    0.391635
Wisconsin      negative    0.659012
               positive    0.340988
Wyoming        negative    0.622568
               positive    0.377432
Name: VADER Polarity, Length: 105, dtype: float64

## Biden

In [36]:
df_biden_state = df_biden_eng_us_polarity[['state','VADER Polarity']].groupby(['state']).apply(lambda x:x['VADER Polarity'].value_counts(normalize=True))
df_biden_state

state                  
Alabama        negative    0.558897
               positive    0.441103
Alaska         negative    0.592593
               positive    0.407407
Arizona        negative    0.578512
                             ...   
West Virginia  positive    0.391026
Wisconsin      negative    0.571240
               positive    0.428760
Wyoming        negative    0.601770
               positive    0.398230
Name: VADER Polarity, Length: 106, dtype: float64

# Sentiment By Topics

## Trump

In [37]:
lda_trump_weights=lda_trump.transform(trump)

In [38]:
trump_by_topic=df_trump_eng_us
lda_trump_weights_df=pd.DataFrame(lda_trump_weights) ##Convert array of topic weights into a dataframe
lda_trump_weights_df.head()
dominant_topic = np.argmax(lda_trump_weights_df.values, axis=1)
lda_trump_weights_df['dominant_topic'] = dominant_topic ##Add dominant as a new column
lda_trump_weights_df.head()
trump_by_topic=pd.concat([trump_by_topic, lda_trump_weights_df], axis=1) ##Concat two dataframe
trump_by_topic.head() ##probability of each topic has been assigned to each tweet and a dominant topic is chosen

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_description,...,1,2,3,4,5,6,7,8,9,dominant_topic
0,2020-10-15 00:00:02,1.316529e+18,trump student use hear year ten year hear chin...,2.0,1.0,Twitter Web App,8436472.0,snarke,snarke,"Will mock for food! Freelance writer, blogger,...",...,0.199622,0.213811,0.110279,0.034179,0.033747,0.074031,0.034645,0.144276,0.074933,2
1,2020-10-15 00:00:08,1.316529e+18,tie tie trump rally iowa http co jjaluumh5d,4.0,3.0,Twitter for iPhone,47413800.0,Rana Abtar - رنا أبتر,Ranaabtar,"Washington Correspondent, Lebanese-American ,c...",...,0.055044,0.054314,0.054431,0.18242,0.237402,0.056226,0.056546,0.054873,0.057077,5
2,2020-10-15 00:00:17,1.316529e+18,clady62 minute long time ago omarosa never rep...,2.0,0.0,Twitter for Android,1138416000.0,Farris Flagg,FarrisFlagg,#BidenHarris2020 #JoeBiden2020 #KamalaHarrisFo...,...,0.098209,0.123463,0.038097,0.038406,0.037944,0.083086,0.038903,0.238485,0.264221,9
3,2020-10-15 00:00:18,1.316529e+18,deeviousdenise realdonaldtrump nypost wont man...,0.0,0.0,Twitter for iPhone,9.007611e+17,Stacey Gulledge 🇺🇸 Patriot ♥️ KAG 🙏 👮‍♀️♥️,sm_gulledge,"Patriot, Wife, “Shaken not Stirred” Mom of two...",...,0.171466,0.06463,0.117673,0.067476,0.031351,0.068288,0.08252,0.209754,0.15254,8
4,2020-10-15 00:00:20,1.316529e+18,single effective remedy eradicate another roun...,0.0,0.0,Twitter Web App,540476900.0,Jamieo,jamieo33,"Don't know what I am. Can lean left and right,...",...,0.047866,0.093292,0.143268,0.056864,0.240296,0.055571,0.045431,0.094431,0.098867,5


In [39]:
VADER_polarity_test = [analyze_sentiment_vader_lexicon(tweet, threshold=0.1) for tweet in trump_by_topic.tweet]
VADER_polarity_test_df = pd.DataFrame(VADER_polarity_test, columns = ['VADER Polarity','VADER Score'])
trump_by_topic=pd.concat([trump_by_topic, VADER_polarity_test_df], axis=1)
trump_by_topic.head()

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_description,...,3,4,5,6,7,8,9,dominant_topic,VADER Polarity,VADER Score
0,2020-10-15 00:00:02,1.316529e+18,trump student use hear year ten year hear chin...,2.0,1.0,Twitter Web App,8436472.0,snarke,snarke,"Will mock for food! Freelance writer, blogger,...",...,0.110279,0.034179,0.033747,0.074031,0.034645,0.144276,0.074933,2,positive,0.3612
1,2020-10-15 00:00:08,1.316529e+18,tie tie trump rally iowa http co jjaluumh5d,4.0,3.0,Twitter for iPhone,47413800.0,Rana Abtar - رنا أبتر,Ranaabtar,"Washington Correspondent, Lebanese-American ,c...",...,0.054431,0.18242,0.237402,0.056226,0.056546,0.054873,0.057077,5,negative,0.0
2,2020-10-15 00:00:17,1.316529e+18,clady62 minute long time ago omarosa never rep...,2.0,0.0,Twitter for Android,1138416000.0,Farris Flagg,FarrisFlagg,#BidenHarris2020 #JoeBiden2020 #KamalaHarrisFo...,...,0.038097,0.038406,0.037944,0.083086,0.038903,0.238485,0.264221,9,negative,-0.4767
3,2020-10-15 00:00:18,1.316529e+18,deeviousdenise realdonaldtrump nypost wont man...,0.0,0.0,Twitter for iPhone,9.007611e+17,Stacey Gulledge 🇺🇸 Patriot ♥️ KAG 🙏 👮‍♀️♥️,sm_gulledge,"Patriot, Wife, “Shaken not Stirred” Mom of two...",...,0.117673,0.067476,0.031351,0.068288,0.08252,0.209754,0.15254,8,positive,0.2732
4,2020-10-15 00:00:20,1.316529e+18,single effective remedy eradicate another roun...,0.0,0.0,Twitter Web App,540476900.0,Jamieo,jamieo33,"Don't know what I am. Can lean left and right,...",...,0.143268,0.056864,0.240296,0.055571,0.045431,0.094431,0.098867,5,positive,0.4767


In [40]:
grouped=trump_by_topic.groupby("dominant_topic")
result = grouped.apply(lambda x: x['VADER Polarity'].value_counts(normalize=True))
print(result) ##Result of VADER sentiment analysis on trump tweets grouped by each of the topic

VADER Polarity  negative  positive
dominant_topic                    
0               0.692155  0.307845
1               0.610124  0.389876
2               0.602238  0.397762
3               0.657921  0.342079
4               0.590526  0.409474
5               0.739008  0.260992
6               0.546771  0.453229
7               0.635976  0.364024
8               0.695245  0.304755
9               0.651227  0.348773


## Biden

In [41]:
lda_biden_weights=lda_biden.transform(biden)

In [42]:
biden_by_topic=df_biden_eng_us
lda_biden_weights_df=pd.DataFrame(lda_biden_weights) ##Convert array of topic weights into a dataframe
lda_biden_weights_df.head()
dominant_topic2 = np.argmax(lda_biden_weights_df.values, axis=1)
lda_biden_weights_df['dominant_topic'] = dominant_topic2 ##Add dominant as a new column
lda_biden_weights_df.head()
biden_by_topic=pd.concat([biden_by_topic, lda_biden_weights_df], axis=1) ##Concat two dataframe
biden_by_topic.head() ##probability of each topic has been assigned to each tweet and a dominant topic is chosen

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_description,...,1,2,3,4,5,6,7,8,9,dominant_topic
0,2020-10-15 00:00:20,1.316529e+18,islandgirlprv bradbeauregardj meidastouch bide...,0.0,0.0,Twitter Web App,3494182000.0,Flag Waver,Flag_Wavers,,...,0.064775,0.061374,0.188237,0.122523,0.068203,0.168768,0.097626,0.055953,0.10604,3
1,2020-10-15 00:00:22,1.316529e+18,censorship hunterbiden biden bidenemails biden...,1.0,0.0,Twitter Web App,1.032807e+18,the Gold State,theegoldstate,A Silicon Valley #independent #News #Media #St...,...,0.060544,0.133866,0.108659,0.273761,0.058779,0.102988,0.10732,0.05254,0.051521,4
2,2020-10-15 00:00:25,1.316529e+18,nypost censorship censored twitter manipulate ...,0.0,0.0,Twitter for iPhone,19940330.0,Change Illinois | Biden will increase taxes by...,changeillinois,"Illinois, home of Lincoln and Reagan, used to ...",...,0.057952,0.028835,0.028125,0.433661,0.121217,0.079897,0.038854,0.092049,0.060034,4
3,2020-10-15 00:00:57,1.316529e+18,fbi allegedly obtain hunter biden computer dat...,0.0,0.0,Twitter for Android,9.607387e+17,RLCompton,NewfoundStudio,"""There are things known and there are things u...",...,0.089438,0.356947,0.057126,0.081374,0.135983,0.116713,0.048532,0.038558,0.03932,2
4,2020-10-15 00:01:23,1.31653e+18,comment democrat understand ruthless china htt...,0.0,0.0,Twitter Web App,101659300.0,John Ubaldi,ubaldireports,Just Facts... No Fiction on domestic and globa...,...,0.127987,0.039581,0.131648,0.115156,0.13052,0.238598,0.032519,0.029151,0.117518,6


In [43]:
VADER_polarity_test = [analyze_sentiment_vader_lexicon(tweet, threshold=0.1) for tweet in biden_by_topic.tweet]
VADER_polarity_test_df = pd.DataFrame(VADER_polarity_test, columns = ['VADER Polarity','VADER Score'])
biden_by_topic=pd.concat([biden_by_topic, VADER_polarity_test_df], axis=1)
biden_by_topic.head()

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_description,...,3,4,5,6,7,8,9,dominant_topic,VADER Polarity,VADER Score
0,2020-10-15 00:00:20,1.316529e+18,islandgirlprv bradbeauregardj meidastouch bide...,0.0,0.0,Twitter Web App,3494182000.0,Flag Waver,Flag_Wavers,,...,0.188237,0.122523,0.068203,0.168768,0.097626,0.055953,0.10604,3,negative,0.0
1,2020-10-15 00:00:22,1.316529e+18,censorship hunterbiden biden bidenemails biden...,1.0,0.0,Twitter Web App,1.032807e+18,the Gold State,theegoldstate,A Silicon Valley #independent #News #Media #St...,...,0.108659,0.273761,0.058779,0.102988,0.10732,0.05254,0.051521,4,negative,0.0
2,2020-10-15 00:00:25,1.316529e+18,nypost censorship censored twitter manipulate ...,0.0,0.0,Twitter for iPhone,19940330.0,Change Illinois | Biden will increase taxes by...,changeillinois,"Illinois, home of Lincoln and Reagan, used to ...",...,0.028125,0.433661,0.121217,0.079897,0.038854,0.092049,0.060034,4,negative,-0.1027
3,2020-10-15 00:00:57,1.316529e+18,fbi allegedly obtain hunter biden computer dat...,0.0,0.0,Twitter for Android,9.607387e+17,RLCompton,NewfoundStudio,"""There are things known and there are things u...",...,0.057126,0.081374,0.135983,0.116713,0.048532,0.038558,0.03932,2,negative,0.0
4,2020-10-15 00:01:23,1.31653e+18,comment democrat understand ruthless china htt...,0.0,0.0,Twitter Web App,101659300.0,John Ubaldi,ubaldireports,Just Facts... No Fiction on domestic and globa...,...,0.131648,0.115156,0.13052,0.238598,0.032519,0.029151,0.117518,6,negative,0.0


In [44]:
grouped2=biden_by_topic.groupby("dominant_topic")
result2 = grouped2.apply(lambda x: x['VADER Polarity'].value_counts(normalize=True))
print(result2) ##Result of VADER sentiment analysis on biden tweets grouped by each of the topic

dominant_topic          
0               positive    0.617560
                negative    0.382440
1               negative    0.623912
                positive    0.376088
2               negative    0.651131
                positive    0.348869
3               negative    0.675412
                positive    0.324588
4               negative    0.648943
                positive    0.351057
5               negative    0.631809
                positive    0.368191
6               negative    0.685034
                positive    0.314966
7               positive    0.557612
                negative    0.442388
8               positive    0.527825
                negative    0.472175
9               negative    0.572921
                positive    0.427079
Name: VADER Polarity, dtype: float64
