<a href="https://colab.research.google.com/github/fawazshah/Reddit-Analysis/blob/main/3_sentiment_reddit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import nltk
import pandas as pd

nltk.download('vader_lexicon')

from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


### Loading data

In [69]:
submissions_lib_dem_con_rep_url = 'https://raw.githubusercontent.com/fawazshah/Reddit-Analysis/master/data/assembled-data/submissions_top300_year_liberal_democrats_conservative_republicans.tsv'
submissions_lib_dem_con_rep_df = pd.read_csv(submissions_lib_dem_con_rep_url, sep='\t')

comments_lib_dem_con_rep_url = 'https://raw.githubusercontent.com/fawazshah/Reddit-Analysis/master/data/assembled-data/comments_top300_year_liberal_democrats_conservative_republicans.tsv'
comments_lib_dem_con_rep_df = pd.read_csv(comments_lib_dem_con_rep_url, sep='\t')

In [70]:
submissions_lib_dem_con_rep_df

Unnamed: 0,submission id,subreddit,article headline,article body,bias
0,l6a0q7,liberal,"Republicans now 'shocked, shocked' that there'...","© Greg Nash Republicans now 'shocked, shocked'...",left
1,jxxs8b,liberal,Georgia certifies election results confirming ...,Georgia Secretary of State Ben Raffensperger h...,left
2,kuscob,liberal,Report: QAnon Congresswoman Was Live-Tweeting ...,"Domestic Terrorist: Rep. Lauren Boebert, a new...",left
3,j2lufw,liberal,"More than 175 current, former law enforcement ...",EXCLUSIVE: More than 175 current and former la...,left
4,l8m3a8,liberal,GOP group launches billboards demanding Cruz a...,GOP campaigners have called on senators Ted Cr...,left
...,...,...,...,...,...
1129,kd595w,republicans,41 days after election day in the United State...,a Google company\n\nDeliver and maintain servi...,right
1130,hz72fr,republicans,98 more days until liberal tears flow...AGAIN!...,,right
1131,hpjde4,republicans,Don Lemon accidentally tells the truth about B...,a Google company\n\nDeliver and maintain servi...,right
1132,hgah1j,republicans,Pelosi: “I Do Not” Apologize For Accusing Repu...,a Google company\n\nDeliver and maintain servi...,right


In [71]:
comments_lib_dem_con_rep_df

Unnamed: 0,comment id,submission id,subreddit,comment body,bias
0,gkzccbm,l6a0q7,liberal,"Hey Republican geniuses, I'll bet you were als...",left
1,gkzg91o,l6a0q7,liberal,The deficit exploded after the republican tax ...,left
2,gkzfown,l6a0q7,liberal,The Republican Party is a fucking cancer on ou...,left
3,gkz73xz,l6a0q7,liberal,"I wish I had gold to give you, just for the ti...",left
4,gkzhm11,l6a0q7,liberal,"it's not these politicians that really bug me,...",left
...,...,...,...,...,...
46168,gj8n6cn,kx5uz2,republicans,I don't understand this way of thinking. Sure ...,right
46169,gi4pnio,kqfpzu,republicans,"Come on guys, when Democrats were pretending t...",right
46170,gi4a4w8,kqfpzu,republicans,"If you believe the results are a sham, and you...",right
46171,gfv271m,kd595w,republicans,"My dudes... It's over. Keep hearing ""Just wa...",right


### Data preprocessing

In [72]:
print(submissions_lib_dem_con_rep_df['article body'].isna().sum())

609


In [73]:
submissions_lib_dem_con_rep_df.dropna(subset=['article body'], inplace=True)
submissions_lib_dem_con_rep_df.reset_index(drop=True, inplace=True)

In [74]:
submissions_lib_dem_con_rep_df

Unnamed: 0,submission id,subreddit,article headline,article body,bias
0,l6a0q7,liberal,"Republicans now 'shocked, shocked' that there'...","© Greg Nash Republicans now 'shocked, shocked'...",left
1,jxxs8b,liberal,Georgia certifies election results confirming ...,Georgia Secretary of State Ben Raffensperger h...,left
2,kuscob,liberal,Report: QAnon Congresswoman Was Live-Tweeting ...,"Domestic Terrorist: Rep. Lauren Boebert, a new...",left
3,j2lufw,liberal,"More than 175 current, former law enforcement ...",EXCLUSIVE: More than 175 current and former la...,left
4,l8m3a8,liberal,GOP group launches billboards demanding Cruz a...,GOP campaigners have called on senators Ted Cr...,left
...,...,...,...,...,...
520,kuk5xm,republicans,Arnold on Twitter: My message to my fellow Ame...,JavaScript is not available.\n\nWe’ve detected...,right
521,kqfpzu,republicans,Worse Than Treason: No amount of rationalizing...,"Today, the “sedition caucus” includes at least...",right
522,kd595w,republicans,41 days after election day in the United State...,a Google company\n\nDeliver and maintain servi...,right
523,hpjde4,republicans,Don Lemon accidentally tells the truth about B...,a Google company\n\nDeliver and maintain servi...,right


### Simple text preprocessing

In [75]:
def preprocess(sentence):

    # No lowercasing since upper-case words will indicate sentiment (anger or joy)
    # Also no punctuation removal since ! and ? can indicate sentiment

    # Whitespace removal
    whitespace = '''\n\t'''

    for ch in sentence: 
        if ch in whitespace:
            sentence = sentence.replace(ch, " ")
    
    return sentence

In [78]:
submissions_lib_dem_con_rep_df['article headline'] = submissions_lib_dem_con_rep_df['article headline'].apply(preprocess)
submissions_lib_dem_con_rep_df['article body'] = submissions_lib_dem_con_rep_df['article body'].apply(preprocess)
comments_lib_dem_con_rep_df['comment body'] = comments_lib_dem_con_rep_df['comment body'].apply(preprocess)

In [79]:
submissions_lib_dem_con_rep_df

Unnamed: 0,submission id,subreddit,article headline,article body,bias
0,l6a0q7,liberal,"republicans now 'shocked, shocked' that there'...","© greg nash republicans now 'shocked, shocked'...",left
1,jxxs8b,liberal,georgia certifies election results confirming ...,georgia secretary of state ben raffensperger h...,left
2,kuscob,liberal,report: qanon congresswoman was live-tweeting ...,"domestic terrorist: rep. lauren boebert, a new...",left
3,j2lufw,liberal,"more than 175 current, former law enforcement ...",exclusive: more than 175 current and former la...,left
4,l8m3a8,liberal,gop group launches billboards demanding cruz a...,gop campaigners have called on senators ted cr...,left
...,...,...,...,...,...
520,kuk5xm,republicans,arnold on twitter: my message to my fellow ame...,javascript is not available. we’ve detected t...,right
521,kqfpzu,republicans,worse than treason: no amount of rationalizing...,"today, the “sedition caucus” includes at least...",right
522,kd595w,republicans,41 days after election day in the united state...,a google company deliver and maintain service...,right
523,hpjde4,republicans,don lemon accidentally tells the truth about b...,a google company deliver and maintain service...,right


In [80]:
comments_lib_dem_con_rep_df

Unnamed: 0,comment id,submission id,subreddit,comment body,bias
0,gkzccbm,l6a0q7,liberal,"hey republican geniuses, i'll bet you were als...",left
1,gkzg91o,l6a0q7,liberal,the deficit exploded after the republican tax ...,left
2,gkzfown,l6a0q7,liberal,the republican party is a fucking cancer on ou...,left
3,gkz73xz,l6a0q7,liberal,"i wish i had gold to give you, just for the ti...",left
4,gkzhm11,l6a0q7,liberal,"it's not these politicians that really bug me,...",left
...,...,...,...,...,...
46168,gj8n6cn,kx5uz2,republicans,i don't understand this way of thinking. sure ...,right
46169,gi4pnio,kqfpzu,republicans,"come on guys, when democrats were pretending t...",right
46170,gi4a4w8,kqfpzu,republicans,"if you believe the results are a sham, and you...",right
46171,gfv271m,kd595w,republicans,"my dudes... it's over. keep hearing ""just wa...",right


### Sentiment analysis

In [None]:
sia = SentimentIntensityAnalyzer()

for i, row in submissions_lib_dem_con_rep_df.iterrows():
    