In [1]:
import os
import sys
import re
import json
import time
import emoji
import random
import numpy as np
import pandas as pd
from pandarallel import pandarallel
from copy import deepcopy
from datetime import date, datetime
from collections import Counter, OrderedDict

from IPython.display import clear_output

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
# t0 = time.time()

In [2]:
cfg = dict()
cfg['input dir']   = 'input'
cfg['results dir'] = 'input/content'

#### Load original tweet data

In [3]:
inFile = os.path.join(cfg['input dir'], 'Tweets_US.csv')
tweets_df = pd.read_csv(inFile, lineterminator='\n')

columns_to_drop = ['user_verified', 'truncated', 'is_retweet', 'coordinates', 'has_media_type', 
                   'in_reply_to_status_id', 'source']
tweets_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
tweets_df['date'] = tweets_df['timestamp'].apply(lambda x: x[:x.find(' ')])

print(f'Number of tweets: {len(tweets_df) :,}')

Number of tweets: 645,253


#### Load results

In [20]:
dt = pd.date_range(start='2020-10-02', end='2020-11-03')
dt = dt.strftime('%Y-%m-%d')

results = dict()
for d in dt:
    fname = os.path.join(cfg['results dir'], f'results_{d}')
    if os.path.isfile(fname) :
        results[d] = pd.read_csv(fname, lineterminator='\n')
#         results[d]['trump_ind'] = results[d]['trump'].apply(lambda x: int(x>0.7))
#         results[d]['biden_ind'] = results[d]['trump'].apply(lambda x: int(x<0.3))

#         results[d] = results[d].merge(tweets_df[['id']], on='id', how='left')
        print(f'results[{str(d)}]: {results[d].shape}')

results[2020-10-02]: (43903, 7)
results[2020-10-03]: (29841, 7)
results[2020-10-04]: (21498, 7)
results[2020-10-05]: (24098, 7)
results[2020-10-06]: (30198, 7)
results[2020-10-07]: (19599, 7)
results[2020-10-08]: (25143, 7)
results[2020-10-09]: (19070, 7)
results[2020-10-10]: (15858, 7)
results[2020-10-11]: (13973, 7)
results[2020-10-12]: (10154, 7)
results[2020-10-13]: (13205, 7)
results[2020-10-14]: (15632, 7)
results[2020-10-15]: (15683, 7)
results[2020-10-16]: (22784, 7)
results[2020-10-17]: (13680, 7)
results[2020-10-18]: (7200, 7)
results[2020-10-19]: (4817, 7)
results[2020-10-20]: (15292, 7)
results[2020-10-21]: (16686, 7)
results[2020-10-22]: (18225, 7)
results[2020-10-23]: (43915, 7)
results[2020-10-24]: (14563, 7)
results[2020-10-25]: (15033, 7)
results[2020-10-26]: (17485, 7)
results[2020-10-27]: (17797, 7)
results[2020-10-28]: (20355, 7)
results[2020-10-29]: (17525, 7)
results[2020-10-30]: (17376, 7)
results[2020-10-31]: (17158, 7)
results[2020-11-01]: (21571, 7)


In [34]:
d = '2020-10-28'
results[d][results[d]['trump']>.7].head(50)

Unnamed: 0,id,user_name,user_description,text,states,date,trump
2,1321602575553253382,Bitmoji Maddow,"Amateur mixologist, truth finder, fishing, reading üíôü•Éüé£ #watchthisspace (Not affiliated with @maddow ) #maddow #bitmojimaddow #trms #rachelmaddow",#plushbiden found a perfect spot to watch @maddow #maddow in an hour! #bitmojimaddow #biden #Biden2020 #JoeBiden https://t.co/CYYd39FcVY,['new york'],2020-10-28,0.800144
7,1321602522260426753,_Christene_,‚ùÄ Danish-American ‚ùÄ #AmericaFirst ‚ùÄ Conservative ‚ùÄ #MAGA Love to all Patriots ‚ùÄ Love our President ‚ùÄ #Trump2020 ‚ùÄ #NeverBiden ‚ùÄ Democrats are Evil ‚ùÄ,Fox News did a piece of the hundreds of Trump Caravan Rallies happening on Long Island New York each weekend. \n\nEnthusiasm is over the top.\n\n I know for a FACT this is going to be a LANDSLIDE ELECTION for President Trump \n#Vote \n#TRUMP https://t.co/mnRjYTlypN,['usa'],2020-10-28,0.844911
13,1321602392564002816,Transition To Greatness 2020,America will never be a Socialist country. ü¶å#MAGA #KAG #Trump2020.,Upcoming #Trump2020 Rally\nThu Oct 29 Tampa FL 01:30 pm (EDT)\nThu Oct 29 Fayetteville NC 06:30 pm (EDT)\nFri Oct 30 Waterford Township MI 01:00 pm (EDT)\nFri Oct 30 Green Bay WI 02:30 pm (CDT)\nFri Oct 30 Rochester MN 05:00 pm (CDT)\n[https://t.co/xHSQnhUZN6]\n#Trump #MAGA2020 https://t.co/6RHwpxFBSL,['usa'],2020-10-28,0.887808
15,1321602359710154753,Michael_Militello,The Truth! Karma! Dignity And Respect! BlackLives Matter! Diversity and Inclusion! üá∫üá∏#SpreadLove #AmericaForAll #BlueWave2020üåä,Does #Trump want Americans to die? Whose side is he on?\n#VoteBlueToSaveAmerica,['usa'],2020-10-28,0.81208
20,1321602189966585862,Bailey4beau,,@realDonaldTrump C‚Äômon out #JoeBiden when are you going to address the #Bobulinksi interview &amp; #PlausibleDeniability ? Take the lid off #BigGuy ... #MAGA #MAGA2020 #Trump #Trump2020 https://t.co/0PfU2OWIZb,['usa'],2020-10-28,0.885682
39,1321601868955557891,Social Opinions,#SocialOpinions,Who do you think will win? (Not who you want to win but rather who do you think will win) #Election2020 #Elections2020 #ElectionDay #electionpoll #trump #biden #kanye,"['new york', 'massachusetts']",2020-10-28,0.833994
46,1321601738961465349,PETER MAER,"Retired White House Corr. \nEdward R. Murrow Award, Merriman Smith Award for Presidential Coverage. Interfaith Alliance Walter Cronkite Faith & Freedom Award.","Another #GOP setback. Supreme Court allows N.C. to extend deadline for receiving mail-in ballots, defeat for #Trump and #Republicans in key battleground state. #Election2020 https://t.co/QXT06KdRHF",['district of columbia'],2020-10-28,0.861693
51,1321601659655626752,NEWS MELT Breaking News Bites,Made-for-Mobile News Video Hub. Always Video. Always Breaking. Always Down The Middle.,"BREAKING TONIGHT: Trump Rallies Republicans In Arizona With Attacks On Biden. Crowds Chant ""Lock Him Up""\n#Trump #Trump2020 #Biden2020 #Election2020 https://t.co/bzLhnLYH2e",['usa'],2020-10-28,0.77792
62,1321601528596111360,A Farmhouse Life,"I‚Äôm a wife and Momma of 5 foremost! I‚Äôm a Latter Day Saint, a mother of a US Marine, I have a degree in Psychology, and a genuine love of God and Country.","B*bulinsky docs verified by S*nate. H*nter‚Äôs laptop really does belong to him as verified by Jill B*den. C*rson T*cker speaks truth and we all know it! R*b W*lker, the spouse of J*ll B*den's top White House aide, says they will ‚Äúall be buried‚Äù. #TwitterCensorship @JoeBiden #trump",['usa'],2020-10-28,0.733207
79,1321601241378627585,Deplorable Chumpüåüüåüüåü,‚≠ê‚≠ê #KeepAmericaGreat ‚≠ê‚≠ê\n#Christian Conservative #JobsNotMobs #MAGA #KAG #Israel #2A #AmericaFirst #Brexit #NRA \n#TrumpPence2020 ‚≠êNo Lists,#Biden supporters are violent thugs. https://t.co/5gNFKhn5be,['usa'],2020-10-28,0.866367


#### Analysis by States and date

In [21]:
states_df = pd.read_csv(os.path.join(cfg['input dir'], 'USA_States_code.csv'))
daily_survey = dict()
for state in states_df.State.values:
    daily_survey[state.lower()] = [0, 0, 0]
daily_survey['usa'] = [0, 0, 0]

In [27]:
%%time

surveys = dict()
for k, result_df in results.items():
    trump_df = result_df[['user_name', 'trump']].groupby('user_name').mean()    
    trump_df['biden'] = 1 - trump_df['trump']
    state_df = result_df[['user_name', 'states']].groupby('user_name').min()    
    daily_result_df = state_df.join(trump_df)

    daily = deepcopy(daily_survey)
    for states, trump, biden in daily_result_df.values:  
        
        # Country level figures----#
        daily['usa'][0] += trump
        daily['usa'][1] += biden
        daily['usa'][2] += 1
        
        # State level figures------#
        states = eval(states) #Convert from string to list
        for state in states:
            if state != 'usa':
                daily[state][0] += trump / len(states) #support for trump  
                daily[state][1] += biden / len(states) #support for biden
                daily[state][2] += 1 #user count
    
    #normalise trump/biden support into percentages
    for states, r in daily.items():    
        daily[states][0] = np.round(100 * r[0]/max(1, r[2]), 1)
        daily[states][1] = np.round(100 * r[1]/max(1, r[2]), 1)
        
    daily = pd.DataFrame(daily).transpose()    
    daily.rename(columns={0: 'trump', 1: 'biden', 2:'count'}, inplace=True)
    
    surveys[k] = daily

CPU times: user 46 s, sys: 678 ms, total: 46.7 s
Wall time: 46 s


In [31]:
def get_ts_df(key):
    df = pd.DataFrame()

    Date, Trump, Biden, Count = [], [], [], []
    for d in dt:
        if d in surveys:
            trump, biden, count = surveys[d].loc[key]
            Date.append(d)
            Trump.append(trump)
            Biden.append(biden)
            Count.append(count)

    for k, v in zip(['date', 'trump', 'biden', 'count'], [Date, Trump, Biden, Count]): 
        df[k] = v
        
    return df

In [32]:
def plot1(df, key):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    fig.add_trace(
        go.Scatter(x=df['date'], y=df['biden'],
                   mode='lines+markers', name='Biden support'),
        secondary_y=True)
    fig.add_trace(
        go.Scatter(x=df['date'], y=df['trump'],
                   mode='lines+markers', name='Trump support'),
        secondary_y=True)
    fig.add_trace(
        go.Bar(x=df['date'], y=df['count'], name='user count', marker_color='khaki'),
        secondary_y=False)
    
    fig.update_yaxes(title_text="User Count", secondary_y=False, showgrid=False)
    fig.update_yaxes(title_text="Support Percentage", secondary_y=True)
    fig.update_layout(title=f'US Election 2020: Trump vs Biden on Twitter in {key}')
    fig.show()


In [33]:
states = surveys['2020-10-25'].index
for state in states:
    plot1(get_ts_df(state), state)