In [277]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.colors import n_colors
import datetime
from dateutil import tz
import math
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
import numpy as np



# METHOD 1: Hardcode zones:
from_zone = tz.gettz('UTC')
to_zone = tz.gettz('America/New_York')


flip_df = pd.read_csv('battle_state_flips.csv')
flip_df.time_flipped = pd.to_datetime(flip_df.time_flipped)
flip_df.time_flipped = flip_df.time_flipped.apply(lambda x: x.replace(tzinfo=from_zone))
flip_df.time_flipped = flip_df.time_flipped.apply(lambda x: x.astimezone(to_zone))

electoral_df = pd.read_csv('AP_call_times.csv')
electoral_df.call_time = pd.to_datetime(electoral_df.call_time)
electoral_df.call_time = electoral_df.call_time.apply(lambda x: x.astimezone(to_zone))

tweet_df2 = pd.read_csv('trump_tweets_all.txt',sep='\t')
tweet_df2.timestamp = pd.to_datetime(tweet_df2.timestamp)
tweet_df2.timestamp = tweet_df2.timestamp.apply(lambda x: x.replace(tzinfo=from_zone))
tweet_df2.timestamp = tweet_df2.timestamp.apply(lambda x: x.astimezone(to_zone))

In [2]:
def candidate_total_votes(df,date):
    temp_df = df[df.timestamp<=date]
    
    #get latest report from states
    latest_state = temp_df.sort_values('timestamp').groupby('state').tail(1)
    states_reporting = len(latest_state)
    
    #Trump votes
    t_votes_w = latest_state[latest_state.leading_candidate_name=='Trump']['leading_candidate_votes'].sum()
    t_votes_l = latest_state[latest_state.trailing_candidate_name=='Trump']['trailing_candidate_votes'].sum()
    t_states = len(latest_state[latest_state.leading_candidate_name=='Trump'])
 
    #Biden votes
    b_votes_w = latest_state[latest_state.leading_candidate_name=='Biden']['leading_candidate_votes'].sum()
    b_votes_l = latest_state[latest_state.trailing_candidate_name=='Biden']['trailing_candidate_votes'].sum()
    b_states = len(latest_state[latest_state.leading_candidate_name=='Biden'])
    
    #combine
    t_votes = t_votes_w + t_votes_l
    b_votes = b_votes_w + b_votes_l
    
    return t_votes,b_votes
        
    

In [3]:
def date_range(start,end,increment,increment_size):
    date_range=[]
    if increment not in ['hour','day']:
        return "increment must be in ['hour','day']"
    
    else:
        contained=True
        while start<=end:           
            date_range.append(start)
            if increment=='hour':
                start+=datetime.timedelta(hours=increment_size)
            else:
                start+=datetime.timedelta(days=increment_size)
    return date_range
        

In [4]:
def rolling_average(df,date_range,window_size,column_name):
    end_idx = window_size-1
    averages=[]
    centers=[]
    for i in range(len(date_range)-(window_size-1)):
        date_from = date_range[i]
        date_to = date_range[end_idx]
        temp_df=df[(df.timestamp>=date_from) & (df.timestamp<date_to)]
        if len(temp_df)<3:
            end_idx+=1
            continue
        avg = temp_df[column_name].mean()
        averages.append(avg)
        centers.append(date_range[end_idx]) 
        
        end_idx+=1
        
    return averages, centers

In [5]:
def find_lead(df,timestamp):
    df=df[df.call_time<=timestamp]
    biden_score = df[df.winner=='Biden']['votes'].sum()
    trump_score = df[df.winner=='Trump']['votes'].sum()
    
    return trump_score - biden_score

# Sentiment analysis on tweets




In [12]:
tweet_df = pd.read_csv('trump_tweets.txt',sep='\t')
tweet_df.timestamp = pd.to_datetime(tweet_df['timestamp'])

tweet_df.timestamp = tweet_df.timestamp.apply(lambda x: x.replace(tzinfo=from_zone)) 
tweet_df.timestamp = tweet_df.timestamp.apply(lambda x: x.astimezone(to_zone))

#remove URLs which are either video/picture
tweet_df.tweet = tweet_df.tweet.str.replace('((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*','')

#sentiment analysis
vader = SentimentIntensityAnalyzer()
basic = SentimentAnalyzer()
tweet_df['sentiment'] = tweet_df.tweet.apply(lambda x: vader.polarity_scores(x)['compound'])
#tweet_df['sentiment2'] = tweet_df.tweet.apply(lambda x: basic.evaluate(x)['compound'])

tweet_df=tweet_df[tweet_df.tweet!='']

# Split our tweets into 12 hour buckets, average those

In [41]:
start=datetime.datetime(2020,11,1,tzinfo=to_zone)
end=datetime.datetime(2020,11,12,tzinfo=to_zone)

date_range_list = date_range(start,end,'hour',12)
averages, centers = rolling_average(tweet_df,date_range_list,2,'sentiment')


# Calculate Trump defecits/leads

In [374]:

call_times=list(electoral_df.call_time)
states=list(electoral_df.state)
states=states[::-1]
call_times = call_times[::-1]
states=[state for state,time in zip(states,call_times) if time>=datetime.datetime(2020,11,4,1,tzinfo=to_zone) and time<max(centers)]
call_times = [time for time in call_times if time>=datetime.datetime(2020,11,4,1,tzinfo=to_zone) and time<max(centers)]

trump_leads = [find_lead(electoral_df,time) for time in call_times]

# Calculate number of tweets for each day

Include tweets that just include media (picture, video)

In [279]:
day_range= date_range(start,end,'day',1)
number_tweets=[]

for i in range(len(day_range)-1):
    num_tweets = len(tweet_df[(tweet_df2.timestamp>=day_range[i]) & (tweet_df2.timestamp<day_range[i+1])])
    number_tweets.append(num_tweets)
    
day_range=day_range[:-1]


Boolean Series key will be reindexed to match DataFrame index.



# Make scatter

In [415]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=[center-datetime.timedelta(hours=6) for center in centers], y=averages, name="Avg Trump Sentiment",
               line=dict(dash='dot',color='black'),mode='lines+markers',
              hovertemplate='Midpoint of 12 hr window: %{x}<br>Average Sentiment: %{y}'),
    secondary_y=False,
)

fig.add_trace(go.Scatter(x=['2020-11-03','2020-11-04'], y=[10,10],mode='none',opacity=0.1,name='Election Day',
                    fill='tozeroy',showlegend=False, fillcolor='rgba(255, 0, 0, 0.1)'))

fig.add_trace(go.Scatter(x=call_times,y=trump_leads
    , name="Trump Electoral Lead",line=dict(dash='dot',color='red'),mode='lines+markers',customdata=states,
                        hovertemplate='Lead: %{y} votes <br>State Called: %{customdata}<br>Time: %{x}'),
    secondary_y=True,
)

fig.add_trace(go.Scatter(x=['2020-11-03','2020-11-04'], y=[-10,-10],mode='none',opacity=0.1,name='Election Day',
                    fill='tozeroy', fillcolor='rgba(255, 0, 0, 0.1)'))




fig.add_shape(
        # Vertical Line
        go.layout.Shape(
            type="line",
            x0=flip_df['time_flipped'][0],
            y0=-0.5,
            x1=flip_df['time_flipped'][0],
            y1=0.8,
            opacity=0.8,
            name='woo',
            line=dict(
                color="RoyalBlue",
                width=1
            )))

fig.add_shape(
        # Vertical Line
        go.layout.Shape(
            type="line",
            x0=flip_df['time_flipped'][1],
            y0=-0.5,
            x1=flip_df['time_flipped'][1],
            y1=0.8,
            opacity=0.8,
            name='woo',
            line=dict(
                color="RoyalBlue",
                width=1
            )))

fig.add_shape(
        # Vertical Line
        go.layout.Shape(
            type="line",
            x0=flip_df['time_flipped'][2],
            y0=-0.5,
            x1=flip_df['time_flipped'][2],
            y1=0.8,
            opacity=0.8,
            name='woo',
            line=dict(
                color="RoyalBlue",
                width=1
            )))


fig.add_annotation(x=flip_df['time_flipped'][0], y=0.8,
            text="Georgia Flips",
            font=dict(size=13),
            showarrow=False,
            textangle=-45,
            xshift=20,
            yshift=33)

fig.add_annotation(x=flip_df['time_flipped'][1], y=0.8,
            text="Michigan Flips",
            font=dict(size=13),
            showarrow=False,
            textangle=-45,
            xshift=20,
            yshift=33)

fig.add_annotation(x=flip_df['time_flipped'][2], y=0.8,
            text="Pennsylvania Flips",
            font=dict(size=13),
            showarrow=False,
            textangle=-45,
            xshift=42,
            yshift=46)


fig.update_yaxes(secondary_y=False,range=[-0.5,0.8],title="Average Trump Tweet Sentiment (12 hrs)")
fig.update_yaxes(secondary_y=True,title="Trump's Electoral College Lead",tick0 = -50,dtick=20)

fig.update_layout(legend=dict(
    yanchor="top",
    y=0.95,
    xanchor="right",
    x=0.90
))

fig.update_layout(title='Trump\'s Election Twitter Storm',width=1000,margin=dict(b=0.1,r=0.1))

In [416]:
colors = n_colors('rgb(255, 200, 200)', 'rgb(200, 0, 0)', len(set(number_tweets)), colortype='rgb')

day_range2 = [str(date.month)+'/'+str(date.day) for date in day_range]
color_seq = sorted(set(number_tweets))
color_map={k:v for k,v in zip(color_seq,colors)}


table = go.Figure(data=[go.Table(
  header=dict(
    values=['Date']+day_range2,
    line_color='black', fill_color=['white']+['white']*len(day_range2),
    align='center',font=dict(color=['black']+['black']*len(day_range2) ,size=12)
  ),
  cells=dict(
    values=['# of Tweets']+number_tweets,
    line_color='black',
    fill_color=['white']+[color_map[number_tweets[i]] for i in range(len(number_tweets))],
    align='center', font=dict(color=['black']+['white']*len(day_range2), size=11)
    ))
])

table.update_layout(margin=dict(t=0,l=25))
table.show()

In [417]:
paragraph="""**Hypothesis:** As Trump's chances of a second term narrowed in the days following the election,
he has become increasingly negative on Twitter.
\n In the days following the election, we saw Biden near victory and Donald Trump refuse to accept results 
in many different states. With his back against the wall, did his profound tweets change in their sentiment?
All state race call times are from the Associated Press.

\n Sentiment analysis allows us to score a statement on a scale from -1 to 1, where -1 represents a 
very negative statement, and 1 represents a very positive statement.
For each Trump tweet from November 1st through November 12th, a sentiment score is calculated. Sentiment analysis is rather 
noisy, so some form of aggregation is needed discover any sort of pattern. Here we calculate the average sentiment in 
disjoint 12 hour windows. This was done so each day has a point that represents his sentiment in the morning and night. Each 
of these points is plotted at the midpoint of their 12 hour window (6 a.m. and 6 p.m.). Note: Tweets that only
contain media and retweets are removed.

NLTK's Vader is used for sentiment analysis to preserve exclamations and capitalization, which is particularly important for 
Trump's tweets like "I WON THIS ELECTION, BY A LOT!""

Sources: [@AP_politics](https://twitter.com/AP_Politics?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor),
[@realDonaldTrump](https://twitter.com/realDonaldTrump),
[Alex Gaynor & NYT](https://github.com/alex/nyt-2020-election-scraper)
"""

In [418]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import pandas as pd


app = dash.Dash()

app.layout = html.Div(children=[
    html.Div([
        dcc.Graph(figure=fig,id='g1',style={'display':'block','height': 650}),dcc.Graph(figure=table,id='table',style={'display':'block','height': 350,'width':1170})], style={'display': 'inline-block','height':900,'width':1190}),
    html.Div([
        html.P(
           dcc.Markdown(paragraph),style={'height':350,'fontSize':16,'display':'block'})],style={'margin-left':2,'margin-right':0,'height':900,'width':300,'vertical-align':'top','display': 'inline-block'})
        
], style={'width': '100%', 'display': 'inline-block'})



if __name__ == '__main__':
    
    app.run_server(debug=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET /_dash-component-suites/dash_renderer/react@16.8.6.min.js?v=1.1.2&m=1576595738 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET /_dash-component-suites/dash_renderer/prop-types@15.7.2.min.js?v=1.1.2&m=1576595738 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET /_dash-component-suites/dash_renderer/react-dom@16.8.6.min.js?v=1.1.2&m=1576595738 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET /_dash-component-suites/dash_core_components/highlight.pack.js?v=1.3.1&m=1576595950 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET /_dash-component-suites/dash_html_components/dash_html_components.min.js?v=1.0.1&m=1576596177 HTTP/1.1[0m" 200 -
127.0.0.1 - - [21/Nov/2020 20:26:01] "[37mGET /_dash-component-suites/dash_renderer/dash_renderer.min.js?v=1.1.