In [1]:
import numpy as np
from matplotlib import pylab as plt
import pandas as pd 
from datetime import datetime as dt
from datetime import date

from poll_weight import add_poll_weights, cleanup_data, combine_districts 
from utils import election_stats, state_results, \
                  plot_electoral_vote_distribution, \
                    plot_nelectoral_vs_popular_vote
from utils import std_weighted

from simulation import simulate_elections

In [3]:
import warnings
# Suppress the specific warning
warnings.filterwarnings("ignore", category=UserWarning, module="pandas.core.arrays.datetimes")


In [5]:
# Read in polling data 
url = './data/president_polls.csv'
pad = pd.read_csv(url)

In [6]:
# Cleanup data 
pad = cleanup_data(pad)
ppl = pad.to_dict(orient = 'list') 

In [8]:
pad

Unnamed: 0,poll_id,pollster_id,pollster,sponsor_ids,sponsors,display_name,pollster_rating_id,pollster_rating_name,numeric_grade,pollscore,...,ranked_choice_reallocated,ranked_choice_round,party,answer,candidate_id,candidate_name,pct,startdate,enddate,weight
0,88518,1741,ActiVote,,,ActiVote,721,ActiVote,,,...,False,,DEM,Harris,16661,Kamala Harris,58.8,2024-09-03,2024-10-05,2.952229e-02
1,88518,1741,ActiVote,,,ActiVote,721,ActiVote,,,...,False,,REP,Trump,16651,Donald Trump,41.2,2024-09-03,2024-10-05,2.952229e-02
6,88507,446,St. Anselm,,,Saint Anselm College Survey Center,494,Saint Anselm College Survey Center,2.4,-0.5,...,False,,DEM,Harris,16661,Kamala Harris,51.0,2024-10-01,2024-10-02,2.594231e-01
7,88507,446,St. Anselm,,,Saint Anselm College Survey Center,494,Saint Anselm College Survey Center,2.4,-0.5,...,False,,REP,Trump,16651,Donald Trump,44.0,2024-10-01,2024-10-02,2.594231e-01
13,88501,1741,ActiVote,,,ActiVote,721,ActiVote,,,...,False,,DEM,Harris,16661,Kamala Harris,56.0,2024-09-04,2024-10-02,2.718399e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14866,75009,1329,Bendixen & Amandi International,,,Bendixen & Amandi International,29,Bendixen & Amandi International,1.0,0.4,...,False,,REP,Trump,16651,Donald Trump,44.0,2021-06-17,2021-06-23,2.023794e-28
14883,74928,446,St. Anselm,1629,John Bolton Super PAC,Saint Anselm College Survey Center,494,Saint Anselm College Survey Center,2.4,-0.5,...,False,,DEM,Biden,19368,Joe Biden,51.0,2021-05-07,2021-05-10,5.194156e-28
14884,74928,446,St. Anselm,1629,John Bolton Super PAC,Saint Anselm College Survey Center,494,Saint Anselm College Survey Center,2.4,-0.5,...,False,,REP,Trump,16651,Donald Trump,43.0,2021-05-07,2021-05-10,5.194156e-28
14889,74706,1056,Remington,421,Missouri Scout,Remington Research Group,279,Remington Research Group,2.6,-0.6,...,False,,DEM,Biden,19368,Joe Biden,38.0,2021-04-21,2021-04-22,2.881951e-28


In [11]:
# states electoral votes
states_electoral = pd.read_csv('./data/states_electoral.csv')
states_abrv = np.array(states_electoral['State'])
states      = np.array(states_electoral['Name'])
electors    = np.array(states_electoral['Electors'])
nstates     = states.size
state_pop = np.array(states_electoral['Population'])

In [13]:
date_range = ['2024-04-01', '2024-10-06']
nsims = 10000
poll_type = 'pct'
#ppl['startdate'] = ppl['start_date']
#ppl['enddate'] = ppl['end_date']

simplest_model_results = simulate_elections(ppl, states=states, electors=electors, date_range=date_range, 
                                            poll_type=poll_type, min_weight = 0.00, nsims = nsims)

Debug info for Alabama:
Values: [29.4]
Weights: [1.]
Sum of weights: 1.0
Sum of squared weights: 1.0
Error in std_weighted: Sum of squared weights is 1, leading to division by zero
Debug info for Alabama:
Values: [55.6]
Weights: [1.]
Sum of weights: 1.0
Sum of squared weights: 1.0
Error in std_weighted: Sum of squared weights is 1, leading to division by zero
Debug info for Idaho:
Values: [24.6]
Weights: [1.]
Sum of weights: 1.0
Sum of squared weights: 1.0
Error in std_weighted: Sum of squared weights is 1, leading to division by zero
Debug info for Idaho:
Values: [53.8]
Weights: [1.]
Sum of weights: 1.0
Sum of squared weights: 1.0
Error in std_weighted: Sum of squared weights is 1, leading to division by zero
Debug info for Kansas:
Values: [38.8]
Weights: [1.]
Sum of weights: 1.0
Sum of squared weights: 1.0
Error in std_weighted: Sum of squared weights is 1, leading to division by zero
Debug info for Kansas:
Values: [43.8]
Weights: [1.]
Sum of weights: 1.0
Sum of squared weights: 1.0


In [14]:
model_name = 'model 1 (simplest model)'
election_stats(model_name, simplest_model_results)

Harris wins in 93.1600 per cent of elections
        average and median Nelectoral = 284.51 and 284.00; in 95 percent range = [241.00  331.00]
Trump   wins in 6.3700 per cent of elections
        average and median Nelectoral = 217.49 and 218.00; in 95 percent range = [171.00  261.00]
 0.4700 per cent of elections end up in electoral college draw


In [19]:
biden_electoral_votes, trump_electoral_votes, biden_vote_dist, trump_vote_dist, ave_biden, ave_trump = simplest_model_results


In [21]:
biden_electoral_votes

array([255., 270., 276., ..., 307., 267., 278.])

In [23]:
trump_electoral_votes

array([247., 232., 226., ..., 195., 235., 224.])

In [25]:
biden_vote_dist

array([[42.09649495, 42.44388959, 44.43902978, ..., 42.85581115,
        43.31321629, 42.6213117 ],
       [        nan,         nan,         nan, ...,         nan,
                nan,         nan],
       [44.76408895, 43.14405467, 43.99325839, ..., 43.87205484,
        39.29382172, 41.00972651],
       ...,
       [50.23067179, 50.71524178, 51.25274233, ..., 49.46358539,
        47.494709  , 48.33531806],
       [32.99904775, 35.04493967, 38.59398517, ..., 37.00651948,
        28.02633708,  4.14285309],
       [        nan,         nan,         nan, ...,         nan,
                nan,         nan]])