In [1]:
import numpy as np
import matplotlib.dates
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas as pd
from datetime import datetime

In [2]:
votes = pd.DataFrame()

In [3]:
votescsv = pd.read_csv('battleground-state-changes.csv')

In [4]:
#take first word (ignore "(EV: X)")
votes['state'] = votescsv['state']
for i,row in votes.iterrows():
    row['state'] = row['state'].split(' (')[0]

In [5]:
votes['timestamp'] = pd.to_datetime(votescsv['timestamp'],format='%Y-%m-%d %H:%M:%S.%f')

In [6]:
votes['biden']     = np.where(votescsv['leading_candidate_name']=='Biden',votescsv['leading_candidate_votes'],votescsv['trailing_candidate_votes'])
votes['trump']     = np.where(votescsv['leading_candidate_name']=='Trump',votescsv['leading_candidate_votes'],votescsv['trailing_candidate_votes'])

In [7]:
votes.sort_values(by='timestamp',inplace=True)

In [8]:
for i,row in votes.iterrows():
    row

In [13]:
#for state in ['Alaska','Arizona','Georgia','North Carolina','Nevada','Pennsylvania']:
for state in ['Georgia','Pennsylvania']:
    
    stdf  = votes.query(("state == '%s'" % state),inplace=False)
    times = stdf['timestamp'].dt.to_pydatetime()
    biden = stdf['biden']
    trump = stdf['trump']

    #cumulative fractions
    total  = biden.values[:-1] + trump.values[:-1]
    fbiden = biden.values[:-1]/total
    ftrump = trump.values[:-1]/total
    
    #vote increments
    dtimes =                           times[:-1]
    dbiden = biden.values[1:] - biden.values[:-1]
    dtrump = trump.values[1:] - trump.values[:-1]
    dtotal = dbiden + dtrump
    
    #relative increment (change as percentage of former value)
    rbiden = dbiden / biden.values[:-1]
    rtrump = dtrump / trump.values[:-1]
    
    #remove increments with total==0
    dtimes = dtimes[dtotal!=0]
    dbiden = dbiden[dtotal!=0]
    dtrump = dtrump[dtotal!=0]
    rbiden = rbiden[dtotal!=0]
    rtrump = rtrump[dtotal!=0]
    fbiden = fbiden[dtotal!=0]
    ftrump = ftrump[dtotal!=0]
    dtotal = dtotal[dtotal!=0]
    
    dbiden_neg = -1*dbiden
    
    #fractions of total vote increment
    fdbiden = dbiden/dtotal
    fdtrump = dtrump/dtotal

    #performance:= ratio of fraction of new votes to cumulative fraction
    zbiden = fdbiden/fbiden
    ztrump = fdtrump/ftrump
    
    plt.rcParams["figure.figsize"] = [8,6]
    fig, (top,mid,bot) = plt.subplots(3, sharex=True)
    
    top.plot(times,biden,c='b')
    top.plot(times,trump,c='r')
    top.grid(axis='x')
    top.set_ylabel('total votes')

    mid.scatter(dtimes,zbiden,c='b',marker="^",s=5)
    mid.scatter(dtimes,ztrump,c='r',marker="v",s=5)
    #mid.set_ylim(-0.55,0.55)
    mid.grid(axis='x')
    mid.grid(axis='y')
    mid.set_ylabel('# new votes / expected')


    bot.bar(dtimes,fdbiden,width=0.007,color='b')
    bot.bar(dtimes,fdtrump,bottom=fdbiden,width=0.007,color='r')
    #bot.bar(dtimes,dbiden,width=0.01,color='b')
    #bot.bar(dtimes,dtrump,bottom=dbiden,width=0.01,color='r')
    #bot.bar(dtimes,dbiden_neg,width=0.01,color='b')
    #bot.bar(dtimes,dtrump,width=0.01,color='r')
    bot.scatter(dtimes,fdbiden,color='black',marker="_",s=10,zorder=3)
    bot.xaxis_date()
    bot.set_xlabel('date/time')
    bot.set_ylabel('fraction of new votes')
    bot.set_axisbelow(True)
    bot.set_ylim(-0.01,1.01)
    bot.grid(axis='x') 
    bot.grid(axis='y',linestyle='dashed')
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    fig.savefig('%s.pdf' % state)
    plt.close()

In [None]:
'''
state='Georgia'
    
stdf  = votes.query(("state == '%s'" % state),inplace=False)
times = stdf['timestamp'].dt.to_pydatetime()
biden = stdf['biden']
trump = stdf['trump']

#vote increments
dtimes =                           times[:-1]
dbiden = biden.values[1:] - biden.values[:-1]
dtrump = trump.values[1:] - trump.values[:-1]
dtotal = dbiden + dtrump

#remove increments with total==0
dtimes = dtimes[dtotal!=0]
dbiden = dbiden[dtotal!=0]
dtrump = dtrump[dtotal!=0]
dtotal = dtotal[dtotal!=0]

dbiden_neg = -1*dbiden

#fractions of total vote increment
fbiden = dbiden/dtotal
ftrump = dtrump/dtotal

plt.scatter(dbiden,dtotal,c='b')
plt.scatter(dtrump,dtotal,c='r')
#plt.bar(dbiden,color='b')
#plt.hist(dtrump,color='r')
plt.yscale("log")
'''