In [57]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats.distributions import chi2
from scipy.stats import multivariate_normal
from os import listdir
import matplotlib as mpl
%matplotlib inline

new_rc_params = {'text.usetex': False,
    "svg.fonttype": 'none'
    }
mpl.rcParams.update(new_rc_params)

In [2]:
#Voting history from all 51 'states' since 1976
votinghistory_full = np.loadtxt('1976-2020-president.csv', delimiter=',', skiprows=1, usecols=(0, 1, 10, 11, 14), 
                        dtype=[('year', int), ('state', 'U25'), ('votes', int), ('total_votes', int), ('party', 'U20')])
print(votinghistory_full)

[(1976, 'ALABAMA', 659170, 1182850, 'DEMOCRAT')
 (1976, 'ALABAMA', 504070, 1182850, 'REPUBLICAN')
 (1976, 'ALABAMA',   9198, 1182850, 'OTHER') ...
 (2020, 'WYOMING',   1739,  278503, 'OTHER')
 (2020, 'WYOMING',    279,  278503, 'OTHER')
 (2020, 'WYOMING',   1459,  278503, 'OTHER')]


In [3]:
#Voting results from only Democratic and Republican candidates
index_dem = votinghistory_full['party'] == 'DEMOCRAT'
index_rep = votinghistory_full['party'] == 'REPUBLICAN'
votinghistory_dem = votinghistory_full[index_dem] # democrat results only
votinghistory_rep = votinghistory_full[index_rep] # republican results only
votinghistory = np.concatenate([votinghistory_dem, votinghistory_rep]) # combined results

In [4]:
#Array of states in alphabetical order
states = np.unique(votinghistory['state'])

In [5]:
# Now, considering all races between 1976 and 2016, we calculate the mean voteshare and variance for each state,
# for each party, along with the full covariance matrix

#Democrats
index_dem_2016 = votinghistory_dem['year'] != 2020
years = np.unique(votinghistory_dem[index_dem_2016]['year'])
votinghistory_dem_2016 = votinghistory_dem[index_dem_2016]
voteshare_dem = []
for i in range(len(states)):
    state = states[i]
    index = votinghistory_dem_2016['state'] == str(state)
    array = votinghistory_dem_2016[index]
    out = []
    for j in range(len(years)):
        voteshare = array[j][2] / array[j][3]
        out.append(voteshare)
    voteshare_dem.append(out)
cov_dem = np.cov(voteshare_dem, bias = True)
var_dem = np.diag(cov_dem)
mean_dem = np.mean(voteshare_dem, axis = 1)

#Republicans
index_rep_2016 = votinghistory_rep['year'] != 2020
years = np.unique(votinghistory_rep[index_rep_2016]['year'])
votinghistory_rep_2016 = votinghistory_rep[index_rep_2016]
voteshare_rep = []
for i in range(len(states)):
    state = states[i]
    index = votinghistory_rep_2016['state'] == str(state)
    array = votinghistory_rep_2016[index]
    out = []
    for j in range(len(years)):
        voteshare = array[j][2] / array[j][3]
        out.append(voteshare)
    voteshare_rep.append(out)
cov_rep = np.cov(voteshare_rep, bias = True)
var_rep = np.diag(voteshare_rep)
mean_rep = np.mean(voteshare_rep, axis = 1)

In [6]:
# Ingesting polling data from 2020 election
polling_averages = np.loadtxt('presidential_general_averages.csv', delimiter=',', skiprows = 1, usecols = (0, 1, 2, 3), 
                              dtype=[('candidate', 'U20'), ('date', 'U10'), ('approval', float), ('state', 'U25')])

index_biden = polling_averages['candidate'] == 'Joseph R. Biden Jr.'
index_trump = polling_averages['candidate'] == 'Donald Trump'
polls_biden = polling_averages[index_biden]
polls_trump = polling_averages[index_trump]

In [56]:
# Organizing polling data by state to establish covariance matrix for multivariate normal likelihood

states_lowercase = np.unique(polling_averages['state'])
polling_dates = np.unique(polling_averages['date'])

# Joe Biden
polls_biden_bystate = []
polls_biden_national = []
for i in range(len(states_lowercase)):
    state = states_lowercase[i]
    index = polls_biden['state'] == str(state)
    array = polls_biden[index]
    mean = np.mean(array['approval'])
    stdv = np.std(array['approval'])
    if state == 'ME-1':
        continue
    if state == 'ME-2':
        continue
    if state == 'NE-2':
        continue
    if state == 'National':
        polls_biden_national.append(array)
    else:
        out = []
        for j in range(len(polling_dates)):
            date = polling_dates[j]
            datapoint = array[array['date'] == str(date)]
            if not datapoint.tolist():
                new_datapoint = np.random.normal(mean, stdv) * 0.01
                out.append(new_datapoint)
            else:
                out.append(datapoint['approval'][0] * 0.01)
        polls_biden_bystate.append(out)
polls_biden_cov = np.cov(polls_biden_bystate, bias = True)

# Donald Trump
polls_trump_bystate = []
polls_trump_national = []
for i in range(len(states_lowercase)):
    state = states_lowercase[i]
    index = polls_trump['state'] == str(state)
    array = polls_trump[index]
    mean = np.mean(array['approval'])
    stdv = np.std(array['approval'])
    if state == 'ME-1':
        continue
    if state == 'ME-2':
        continue
    if state == 'NE-2':
        continue
    if state == 'National':
        polls_trump_national.append(array)
    else:
        out = []
        for j in range(len(polling_dates)):
            date = polling_dates[j]
            datapoint = array[array['date'] == str(date)]
            if not datapoint.tolist():
                new_datapoint = np.random.normal(mean, stdv) * 0.01
                out.append(new_datapoint)
            else:
                out.append(datapoint['approval'][0] * 0.01)
        polls_trump_bystate.append(out)
polls_trump_cov = np.cov(polls_trump_bystate, bias = True)

In [87]:
# Implementing MCMC
print(np.linalg.det(cov_dem))
multivariate_normal.pdf(mean_dem, mean_dem, polls_trump_cov)

# Likelihood


0.0


2.408022210931463e+95