In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats.distributions import chi2
from os import listdir
import matplotlib as mpl
%matplotlib inline

new_rc_params = {'text.usetex': False,
    "svg.fonttype": 'none'
    }
mpl.rcParams.update(new_rc_params)

In [204]:
#Voting history from all 51 'states' since 1976
votinghistory_full = np.loadtxt('1976-2020-president.csv', delimiter=',', skiprows=1, usecols=(0, 1, 10, 11, 14), 
                        dtype=[('year', int), ('state', 'U25'), ('votes', int), ('total_votes', int), ('party', 'U20')])
print(votinghistory_full)

[(1976, 'ALABAMA', 659170, 1182850, 'DEMOCRAT')
 (1976, 'ALABAMA', 504070, 1182850, 'REPUBLICAN')
 (1976, 'ALABAMA',   9198, 1182850, 'OTHER') ...
 (2020, 'WYOMING',   1739,  278503, 'OTHER')
 (2020, 'WYOMING',    279,  278503, 'OTHER')
 (2020, 'WYOMING',   1459,  278503, 'OTHER')]


In [205]:
#Voting results from only Democratic and Republican candidates
index_dem = votinghistory_full['party'] == 'DEMOCRAT'
index_rep = votinghistory_full['party'] == 'REPUBLICAN'
votinghistory_dem = votinghistory_full[index_dem] # democrat results only
votinghistory_rep = votinghistory_full[index_rep] # republican results only
votinghistory = np.concatenate([votinghistory_dem, votinghistory_rep]) # combined results

In [206]:
#Array of states in alphabetical order
states = np.unique(votinghistory['state'])

In [255]:
# Now, considering all races between 1976 and 2016, we calculate the mean voteshare and variance for each state,
# for each party, along with the full covariance matrix

#Democrats
index_dem_2016 = votinghistory_dem['year'] != 2020
years = np.unique(votinghistory_dem[index_dem_2016]['year'])
votinghistory_dem_2016 = votinghistory_dem[index_dem_2016]
voteshare_dem = []
for i in range(len(states)):
    state = states[i]
    index = votinghistory_dem_2016['state'] == str(state)
    array = votinghistory_dem_2016[index]
    out = []
    for j in range(len(years)):
        voteshare = array[j][2] / array[j][3]
        out.append(voteshare)
    voteshare_dem.append(out)
cov_dem = np.cov(voteshare_dem, bias = True)
var_dem = np.diag(cov_dem)
mean_dem = np.mean(voteshare_dem, axis = 1)

#Republicans
index_rep_2016 = votinghistory_rep['year'] != 2020
years = np.unique(votinghistory_rep[index_rep_2016]['year'])
votinghistory_rep_2016 = votinghistory_rep[index_rep_2016]
voteshare_rep = []
for i in range(len(states)):
    state = states[i]
    index = votinghistory_rep_2016['state'] == str(state)
    array = votinghistory_rep_2016[index]
    out = []
    for j in range(len(years)):
        voteshare = array[j][2] / array[j][3]
        out.append(voteshare)
    voteshare_rep.append(out)
cov_rep = np.cov(voteshare_rep, bias = True)
var_rep = np.diag(voteshare_rep)
mean_rep = np.mean(voteshare_rep, axis = 1)

In [259]:
print(voteshare_dem)

[[0.5572726888447394, 0.47448859067804633, 0.38280781265064545, 0.39863298309147205, 0.4088006350485172, 0.4315608769582409, 0.4156650294789806, 0.36844402322377173, 0.38740434294574916, 0.3835903309875247, 0.3435794575797364], [0.3565313091750692, 0.2640790179557575, 0.29867777751017555, 0.362709628415519, 0.3028711132430195, 0.33267113649532326, 0.27666339823504693, 0.35516861912104364, 0.37889373599389325, 0.40812659112464433, 0.36550871290111986], [0.39799978188251545, 0.28244683589928427, 0.32542643169830887, 0.387438741228785, 0.3652045259671481, 0.4651706594607681, 0.4473458501738885, 0.44396832928795554, 0.4511525087476428, 0.44589766941799386, 0.4512602184469321], [0.6496172812966184, 0.4752263062004675, 0.38290785001458605, 0.4219173216645847, 0.5320795284925204, 0.5373644915194817, 0.4586425626043496, 0.44547630445189085, 0.3886465976512423, 0.36878990301720105, 0.33653124129360934], [0.47954821836112543, 0.35919436910764124, 0.41267775699231596, 0.4755944256460335, 0.460065

In [248]:
# Ingesting polling data from 2020 election
polling_averages = np.loadtxt('presidential_general_averages.csv', delimiter=',', skiprows = 1, usecols = (0, 1, 2, 3), 
                              dtype=[('candidate', 'U20'), ('date', 'U10'), ('approval', float), ('state', 'U25')])

index_biden = polling_averages['candidate'] == 'Joseph R. Biden Jr.'
index_trump = polling_averages['candidate'] == 'Donald Trump'
polls_biden = polling_averages[index_biden]
polls_trump = polling_averages[index_trump]

In [251]:
# Organizing polling data by state to establish covariance matrix for multivariate normal likelihood

states_lowercase = 

[('Donald Trump', '2020-11-03', 57.36126, 'Alabama')
 ('Donald Trump', '2020-11-02', 57.36126, 'Alabama')
 ('Donald Trump', '2020-11-01', 57.47665, 'Alabama') ...
 ('Donald Trump', '2020-10-05', 67.8956 , 'Wyoming')
 ('Donald Trump', '2020-10-04', 67.95186, 'Wyoming')
 ('Donald Trump', '2020-10-03', 68.04157, 'Wyoming')]
