In [44]:
import pandas as pd
import numpy as np
from math import exp
from utils import get_names2abbrs_dict

In [49]:
state_po_votes = pd.read_csv('electoral_votes_2020.csv')

In [50]:
d_probs = pd.read_csv('poll_data/all_pres_polls_d_probs.csv')

In [51]:
d_probs

Unnamed: 0,date,poll_id,question_id,state,state_po,sample_size,population,d_prob
0,2018-11-28,57026,92080,Ohio,OH,648.0,v,0.521739
1,2018-12-17,57170,92351,,NAT,1001.0,rv,0.538462
2,2019-01-07,57275,92597,North Carolina,NC,750.0,rv,0.526882
3,2019-01-21,57365,92773,,NAT,760.0,rv,0.563830
4,2019-01-26,57427,92926,Michigan,MI,600.0,lv,0.569444
...,...,...,...,...,...,...,...,...
5385,2020-11-02,72717,136487,,NAT,914.0,lv,0.536082
5386,2020-11-02,72717,136486,,NAT,1146.0,rv,0.536082
5387,2020-11-02,72717,136485,,NAT,1333.0,a,0.525773
5388,2020-11-02,72714,136475,,NAT,1363.0,lv,0.552083


In [62]:
mask = ((d_probs['date'] >= '2020-05-01') & (d_probs['date'] <= '2020-10-31'))
d_probs = d_probs[mask].sort_values(by='date').reset_index(drop=True)

In [63]:
alpha = 0.4
nat_alpha = 0.0  # Don't include national results at all
standard_sample_size = 1000

In [69]:
def logistic(x):
    k = 0.001
    x0 = 1000
    e = exp(-k * (x - x0))
    return 1/(1+e)

In [65]:
all_states = {k: 0.5 for k in set(get_names2abbrs_dict().values())}  # prior = 0.5 for all states
for row in range(len(d_probs)):
    state_po = d_probs.iloc[row]['state_po']
    sample_size = d_probs.iloc[row]['sample_size'].item()
    d_prob = d_probs.iloc[row]['d_prob'].item()
    sample_weight = logistic(sample_size)
    if state_po == 'NAT':
        weight = nat_alpha * sample_weight
        for state_code in all_states.keys():
            all_states[state_code] = (all_states[state_code] * (1 - weight)) + (d_prob * weight)
    else:
        weight = alpha * sample_weight
        all_states[state_po] = (all_states[state_po] * (1 - weight)) + (d_prob * weight)

In [66]:
d_states = list()
r_states = list()
for state_po in all_states:
#     votes = state_po_votes[state_po_votes['state_po'] == state_po]['electoral_votes'].item()
    if all_states[state_po] > 0.5:
        d_states.append(state_po)
    else:
        r_states.append(state_po)
d_votes = state_po_votes[state_po_votes['state_po'].isin(d_states)]['electoral_votes'].sum()
r_votes = state_po_votes[state_po_votes['state_po'].isin(r_states)]['electoral_votes'].sum()

In [67]:
win_str = ''
win_states = None
if d_votes >= 270:
    win_str = f'Biden wins with {d_votes} votes!'
    win_states = d_states
elif r_votes >= 270:
    win_str = f'Trump wins with {r_votes} votes!'
    win_states = r_states
else:
    win_str = f'It\'s a tie! {d_votes} = {r_votes}'
print(win_str)
if win_states:
    print(f'Won with states {sorted(win_states)}')

Biden wins with 356 votes!
Won with states ['AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'IL', 'MA', 'MD', 'ME', 'MI', 'MN', 'NC', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OR', 'PA', 'RI', 'VA', 'VT', 'WA', 'WI']


In [68]:
sorted(all_states.items(), key=lambda kv: kv[1])

[('WV', 0.3274066895919968),
 ('WY', 0.327680211080745),
 ('OK', 0.3699516284397817),
 ('LA', 0.3906973666636962),
 ('SD', 0.3998335339145128),
 ('ID', 0.4009263937023244),
 ('AR', 0.4023625401485357),
 ('AL', 0.4042105764526689),
 ('MS', 0.4042882951789545),
 ('ND', 0.40634944904168563),
 ('KY', 0.40960032356934384),
 ('UT', 0.44257157415348924),
 ('NE', 0.4431929685103449),
 ('MO', 0.44517579115833816),
 ('TN', 0.44844754123505426),
 ('IN', 0.45048087389007485),
 ('KS', 0.4528603982435373),
 ('AK', 0.4530751784922455),
 ('SC', 0.45697949345782973),
 ('MT', 0.4737592279175165),
 ('OH', 0.4902514344830361),
 ('TX', 0.4928141805756722),
 ('IA', 0.5009629914152484),
 ('FL', 0.506678201064659),
 ('NV', 0.5092897092710691),
 ('GA', 0.5177197626436392),
 ('NC', 0.5239177304967546),
 ('PA', 0.5305389646501504),
 ('AZ', 0.530847694404674),
 ('MI', 0.5346517213763574),
 ('NH', 0.5399862278121754),
 ('WI', 0.5497376797280618),
 ('NM', 0.557909118229639),
 ('MN', 0.5589893201402747),
 ('ME', 0.5