In [246]:
import numpy as np
import pandas as pd
from textblob import TextBlob
import praw
import plotly
from psaw import PushshiftAPI
import datetime as dt
from praw.models import MoreComments
import matplotlib
import plotly.express as px
pd.options.plotting.backend = "plotly"
import plotly.graph_objects as go
from plots import barplot
import chart_studio.plotly as py

In [4]:
 reddit = praw.Reddit(
     client_id="REDACTED",
     client_secret="REDACTED",
     user_agent="REDACTED"
 )
api = PushshiftAPI()

In [5]:
subreddits = ['donaldtrump','joebiden','conservative','neoliberal','democrats','politics','republican']

In [None]:
data = pd.DataFrame()
for d in [1,2,3,4,5,6]:
    print('On day {}'.format(d))
    for x in subreddits:
        submissions= list(api.search_submissions(after=int(dt.datetime(2020, 11, d).timestamp()), before=int(dt.datetime(2020, 11, d+1).timestamp()), subreddit=x, filter=['id'], sort_type='num_comments',limit = 100))
        sub = []
        pol = []
        time = []
        for y in submissions:
            comments = []
            reddit.submission(y.id).comment_limit=100
            reddit.submission(y.id).comment_sort='top'
            for top_level_comment in reddit.submission(y.id).comments:
                if isinstance(top_level_comment, MoreComments):
                    continue
                if top_level_comment.author != 'AutoModerator':
                    comments.append(top_level_comment)
                    if len(comments) >= 50:
                        continue
            [sub.append(x.subreddit) for x in comments]
            [pol.append(TextBlob(x.body).sentiment.polarity) for x in comments]
            [time.append(dt.datetime.fromtimestamp(x.created_utc)) for x in comments]
            df = pd.DataFrame()
            df['Subreddit'] = sub
            df['Sentiment'] = pol
            df['Time'] = time
            data = data.append(df, ignore_index=True)

In [7]:
data

Unnamed: 0.1,Unnamed: 0,Subreddit,Sentiment,Time
2,2,JoeBiden,0.000000,2020-11-01 03:39:40
4,4,JoeBiden,0.000000,2020-11-01 06:04:27
3,3,JoeBiden,0.010000,2020-11-01 06:23:36
5,5,JoeBiden,0.266667,2020-11-01 07:00:07
10,10,neoliberal,0.000000,2020-11-01 07:07:01
...,...,...,...,...
165,165,donaldtrump,0.600000,2020-11-08 06:52:39
156,156,donaldtrump,0.000000,2020-11-08 07:40:31
175,175,Conservative,0.000000,2020-11-08 16:34:15
157,157,donaldtrump,0.600000,2020-11-12 15:49:24


In [None]:
fig = px.line(data, x= 'Time', y= 'Sentiment', range_x=['2020-11-01','2020-11-08'],title='Sentiment over Election Week in Political Subreddits<br><i>(higher is more positive)',labels={'Time':'Date'},hover_data={'Sentiment':False,'Time':False})
fig.add_annotation(x=5, y=0,
            text="Text annotation with arrow",
            showarrow=True,
            arrowhead=1)
fig.update_layout(hovermode=False)
fig.show()

In [9]:
pivot = data.pivot_table(index="Time",columns='Subreddit',values='Sentiment')

In [10]:
pivot.index = pd.to_datetime(pivot.index)

In [None]:
pivot.interpolate(method='spline',order=1)

In [None]:
fig = px.line(pivot.interpolate(method='spline',order=1),range_x=['2020-11-01','2020-11-08'],title='Sentiment over Election Week in Political Subreddits<br><i>(higher is more positive)</i>',labels={'Time':'Date','value':'Sentiment','Subreddit':'Subreddit<br><i>click any subreddit to remove it'},hover_name='Subreddit')
fig['data'][0]['line']['color']="#B00000"
fig['data'][1]['line']['color']="#33CCFF"
fig['data'][2]['line']['color']="#3300FF"
fig['data'][3]['line']['color']="#FF0000"
fig['data'][4]['line']['color']="#0066CC"
fig['data'][5]['line']['color']="gray"
fig['data'][0]['hovertemplate'] = "<b></b>"
fig['data'][1]['hovertemplate'] = "<b></b>"
fig['data'][2]['hovertemplate'] = "<b></b>"
fig['data'][3]['hovertemplate'] = "<b></b>"
fig['data'][4]['hovertemplate'] = "<b></b>"
fig['data'][5]['hovertemplate'] = "<b></b>"
fig.show()
py.plot(fig, filename = 'sentiment_2', auto_open=True)

In [13]:
data = data.set_index(data.columns[0])

In [14]:
data2 = data

In [15]:
data2['Subreddit'] = data['Subreddit'].replace(to_replace={'JoeBiden': 'Democrat', 'neoliberal': 'Democrat', 'donaldtrump' : 'Republican', 'democrats': 'Democrat', 'politics' : 'r/politics','Conservative' : 'Republican'})

In [308]:
data2

Unnamed: 0_level_0,Subreddit,Sentiment,Time
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,Democrat,0.000000,2020-11-01 03:39:40
4,Democrat,0.000000,2020-11-01 06:04:27
3,Democrat,0.010000,2020-11-01 06:23:36
5,Democrat,0.266667,2020-11-01 07:00:07
10,Democrat,0.000000,2020-11-01 07:07:01
...,...,...,...
165,Republican,0.600000,2020-11-08 06:52:39
156,Republican,0.000000,2020-11-08 07:40:31
175,Republican,0.000000,2020-11-08 16:34:15
157,Republican,0.600000,2020-11-12 15:49:24


In [27]:
pivot2 = data2.pivot_table(index="Time",columns='Subreddit',values='Sentiment')

pivot2.index = pd.to_datetime(pivot2.index)

In [None]:
fig = px.line(pivot2.interpolate(method='spline',order=1),title='Sentiment by Subreddit Affiliation<br><i>(higher is more positive)</i>',labels={'Time':'Date','value':'Sentiment'})
fig['data'][0]['line']['color']="rgb(0, 21, 188)"
fig['data'][2]['line']['color']="rgb(161, 161, 165)"
fig['data'][1]['line']['color']="rgb(255, 0, 0)"
fig.update_layout(hovermode=False)
fig.show()
py.plot(fig, filename = 'sentiment_3', auto_open=True)

In [None]:
subreddits = ['donaldtrump','joebiden','conservative','neoliberal','democrats','politics','republican']
percent = pd.DataFrame()
for d in [1,2,3,4,5,6]:
    print('On day {}'.format(d))
    for x in subreddits:
        elect = len(list(api.search_submissions(after=int(dt.datetime(2020, 11, d).timestamp()), before=int(dt.datetime(2020, 11, d+1).timestamp()), subreddit=x)))
        hist = np.mean([len(list(api.search_submissions(after=int(dt.datetime(2020, 10, d).timestamp()), before=int(dt.datetime(2020, 10, d+1).timestamp()), subreddit=x))),len(list(api.search_submissions(after=int(dt.datetime(2020, 9, d).timestamp()), before=int(dt.datetime(2020, 9, d+1).timestamp()), subreddit=x)))])
        percent = percent.append([[d,x,elect,hist]])
        


In [32]:
percent = percent.drop(percent.columns[0],axis=1)

In [264]:
percent.columns=['Day','Subreddit','Current','Historical']
percent

Unnamed: 0,Day,Subreddit,Current,Historical
0,1,donaldtrump,287,302.0
1,1,joebiden,515,288.0
2,1,conservative,926,822.0
3,1,neoliberal,220,230.5
4,1,democrats,205,145.0
5,1,politics,1479,1970.5
6,1,republican,176,153.0
7,2,donaldtrump,366,297.5
8,2,joebiden,767,295.5
9,2,conservative,1278,777.0


In [212]:
times = [365150000, 366150000, 367150000, 368150000, 369150000,369585001]
subreddits = ['donaldtrump','joebiden','conservative','neoliberal','democrats','politics','republican']
percentmiss = pd.DataFrame()
for d in [5]:
    for x in subreddits:
        print('On subreddit r/{}'.format(x))
        e = []
        for y in np.arange(5):
            print('On SID {}'.format(y))
            elect = len((requests.get('https://beta.pushshift.io/search/reddit/submissions', params= {'subreddit':x,'min_sid': times[y],'max_sid': (times[y+1])-1,'size':1000}).json())['data'])
            e.append(elect)
        percentmiss = percentmiss.append([[sum(e)]])
        


On subreddit r/donaldtrump
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4
On subreddit r/joebiden
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4
On subreddit r/conservative
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4
On subreddit r/neoliberal
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4
On subreddit r/democrats
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4
On subreddit r/politics
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4
On subreddit r/republican
On SID 0
On SID 1
On SID 2
On SID 3
On SID 4


In [None]:
l = [574, 1253, 2003, 813, 357, 2437, 374]
for x in np.arange(7):
    percent['Current'][28 + x] = l[x]

In [None]:
wide_df = percent.pivot_table(columns=['Subreddit']).transpose().reset_index().rename({'Current':'Election Week','Historical':'Historical Weekly Avg'},axis=1)

fig = px.bar(wide_df, x='Subreddit', y=["Historical Weekly Avg","Election Week"], title="Increase in Submissions During Election Week", labels= {'value': 'Submissions','variable':''}, color_discrete_sequence=px.colors.qualitative.D3,hover_data = {'variable':False,'Subreddit':False,'value':':.0f'})

fig.show()

py.plot(fig, filename = 'submissions_1', auto_open=True)

In [74]:
percent2=percent.copy()
percent2['Subreddit'] = percent2['Subreddit'].replace(to_replace={'joebiden': 'Democrat', 'neoliberal': 'Democrat', 'donaldtrump' : 'Republican', 'democrats': 'Democrat', 'politics' : 'r/politics','conservative' : 'Republican','republican':'Republican'})

In [None]:
wide_df = percent2.pivot_table(columns=['Subreddit']).transpose().reset_index().rename({'Current':'Election Week','Historical':'Historical Weekly Avg'},axis=1)

fig = px.bar(wide_df, x='Subreddit', y=["Historical Weekly Avg","Election Week"], title="Increase in Submissions During Election Week", labels= {'value': 'Submissions','variable':''}, color_discrete_sequence=px.colors.qualitative.D3,hover_data = {'variable':False,'Subreddit':False,'value':':.0f'})

fig.show()

py.plot(fig, filename = 'submissions_2', auto_open=True)

In [78]:
norm = percent.pivot_table(columns=['Day','Subreddit']).reindex(['Historical','Current']).pct_change().fillna(.01).transpose().reset_index()

In [79]:
norm

Unnamed: 0,Day,Subreddit,Historical,Current
0,1,conservative,0.01,0.126521
1,1,democrats,0.01,0.413793
2,1,donaldtrump,0.01,-0.049669
3,1,joebiden,0.01,0.788194
4,1,neoliberal,0.01,-0.045553
5,1,politics,0.01,-0.249429
6,1,republican,0.01,0.150327
7,2,conservative,0.01,0.644788
8,2,democrats,0.01,0.852843
9,2,donaldtrump,0.01,0.230252


In [None]:
wide_df = norm.pivot_table(columns=['Subreddit']).transpose().reset_index().rename({'Current':'Increase Over Average'},axis=1)
wide_df['Color']=['Republican','Democrat','Republican','Democrat','Democrat','r/politics','Republican']
fig = px.bar(wide_df, x='Subreddit', y=["Increase Over Average"],color='Color',title="Increase in Submissions During Election Week<br><i>change from historical weekly average", labels= {'value': 'Increase in Submissions','variable':'','Color':'Subreddit'}, color_discrete_sequence= ['rgb(255, 0, 0)','rgb(0, 21, 188)','rgb(161, 161, 165)'],  hover_data={'Color':False})
fig.update_layout(yaxis_tickformat = '%')
fig.show()
py.plot(fig, filename = 'submissions_3', auto_open=True)

In [None]:
norm2=norm.copy()
norm2['Subreddit'] = norm2['Subreddit'].replace(to_replace={'joebiden': 'Democrat', 'neoliberal': 'Democrat', 'donaldtrump' : 'Republican', 'democrats': 'Democrat', 'politics' : 'r/politics','conservative' : 'Republican','republican':'Republican'})

wide_df = norm2.pivot_table(columns=['Subreddit']).transpose().reset_index().rename({'Current':'Increase Over Average'},axis=1)

fig = fig = px.bar(wide_df, x='Subreddit', y=["Increase Over Average"],color='Subreddit',title="Increase in Submissions During Election Week<br><i>change from historical weekly average", labels= {'value': 'Increase in Submissions','variable':''}, color_discrete_sequence= ['rgb(0, 21, 188)','rgb(255, 0, 0)','rgb(161, 161, 165)'],hover_data={'Subreddit':False})
fig.update_layout(yaxis_tickformat = '%')
fig.show()
py.plot(fig, filename = 'submissions_4', auto_open=True)

In [267]:
fig = barplot(norm, item_column='Subreddit', value_column='Current',time_column='Day', top_entries=7)

In [259]:
norm['Day']=norm['Day'].replace({1:'11/1—11/2',2:'11/2—11/3',3:'11/3—11/4',4:'11/4—11/5',5:'11/5—11/6',6:'11/6—11/7'})

In [None]:
fig.plot(item_label = 'Increase in Submissions During Election Week<br><i>change from historical weekly average', value_label = 'Increase in Submissions', frame_duration = 1500,orientation='vertical',filename='animated_1')

In [269]:
norm2['Day']=norm2['Day'].replace({1:'11/1—11/2',2:'11/2—11/3',3:'11/3—11/4',4:'11/4—11/5',5:'11/5—11/6',6:'11/6—11/7'})

In [None]:
fig = barplot(norm2.pivot_table(columns=['Subreddit','Day']).transpose().reset_index(), item_column='Subreddit', value_column='Current',time_column='Day', top_entries=7)
fig.plot(item_label = 'Increase in Submissions During Election Week<br><i>change from historical weekly average',value_label = 'Increase in Submissions', frame_duration = 1000,orientation='vertical',filename="animated_2")

In [452]:
states = pd.DataFrame()
for x in state_names:
    print('On {}'.format(x))
    gen = api.search_comments(subreddit=x,sort_type='score',before=int(dt.datetime(2020, 11, 7).timestamp()),after=int(dt.datetime(2020, 11, 1).timestamp()))
    max_response_cache = 1000
    cache = []
    for c in gen:
        cache.append(c)
        if len(cache) >= max_response_cache:
            break
    trump = 0
    biden = 0
    for comment in cache:
        text = comment.body.lower()
        trumpcount = sum([text.count('trump'),text.count('republican'),
                          text.count('conservative'),text.count('gop'),
                          text.count('donald trump'),text.count('pence')])
        bidencount = sum([text.count('biden'),text.count('democrat'),
                          text.count('dem'),text.count('libs'),
                          text.count('joe biden'),text.count('kamala'),text.count('kamala harris'), text.count('liberal')])
        if trumpcount and bidencount == 0:
            continue
        elif trumpcount > bidencount:
            trump += 1
        elif bidencount > trumpcount:
            biden += 1
    states = states.append([[x,trump,biden]])
                
                         

On Alaska
On Alabama
On Arkansas
On Arizona
On California
On Colorado
On Connecticut
On WashingtonDC
On Delaware
On Florida
On Georgia
On Hawaii
On Iowa
On Idaho
On Illinois
On Indiana
On Kansas
On Kentucky
On Louisiana
On Massachusetts
On Maryland
On Maine
On Michigan
On Minnesota
On Missouri
On Mississippi
On Montana
On NorthCarolina
On NorthDakota
On Nebraska
On NewHampshire
On NewJersey
On NewMexico
On Nevada
On NewYork
On Ohio
On Oklahoma
On Oregon
On Pennsylvania
On RhodeIsland
On SouthCarolina
On SouthDakota
On Tennessee
On Texas
On Utah
On Virginia
On Vermont
On Washington
On Wisconsin
On WestVirginia
On Wyoming


In [454]:
states.columns=['State','Trump','Biden']

In [462]:
states['State'] = states['State'].map(us_state_abbrev)

In [507]:
states['Ratio'] = (states['Biden']/(states['Trump'].replace({0:1})))

In [None]:
fig = px.choropleth(states,locations='State',locationmode='USA-states',scope="usa",projection="albers usa",color='Ratio',color_continuous_scale='blues',
                   labels={'Ratio':'More Biden Mentions'},hover_name='Full',hover_data={'Trump':True,'Biden':True,'State':False,'Ratio':False},title='Popularity of Candidate in State Subreddits')
fig.add_annotation(x=1.15, y=-.15,
            text="Mentions of candidate and related terms from top 1000 comments in subreddit, 11/1-11/7",
            showarrow=False,
            arrowhead=1)
fig.show()
py.plot(fig, filename = 'map_1', auto_open=True)