In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
df = pd.read_csv('aggpreds2016.csv')
df2 = pd.read_csv('backtest2016.csv')

In [3]:
df = df.rename(columns={"State": "state", "E.V.": "ec"})

In [4]:
df2['actual_win'] = np.where( df2['trump_actual'] > df2['clinton_actual'], 'trump','clinton' )
df2['rcp_win'] = np.where( df2['clinton_rcp'] > df2['trump_rcp'], 'clinton','trump')
df2['polling_error'] = np.where( df2['rcp_win'] != df2['actual_win'], 1,0 ) 

In [5]:
def bias_margin_of_error(x, y, n):
    prob_error = 100 - x - y
    b = n + prob_error
    moex = random.choice(range((n*-1),(b+1),1))
    moey = random.choice(range((n*-1),(n+1),1))
    x += moex
    y += moey
    return (x, y)

def election_bias_sim(x, y, moe, number_of_elections=20000):
    x = int(x)
    y = int(y)
    moe = int(moe)
    no_wins = 0 # number of wins x over y
    for i in range(number_of_elections):
        x1, y1 = bias_margin_of_error(x, y, moe)
        victory = x1 - y1
        if victory >= 1:
            no_wins += 1
    no_wins_percentage = float(no_wins) / number_of_elections
    return no_wins_percentage 

def moe(x, y, n):
    moex = random.choice(range((n*-1),(n+1),1))
    moey = random.choice(range((n*-1),(n+1),1))
    x += moex
    y += moey
    return x, y

def sim(x, y, e, number_of_elections=20000):
    x = int(x)
    y = int(y)
    e = int(e)
    no_wins = 0 # number of wins x over y
    for i in range(number_of_elections):
        x1, y1 = moe(x, y, e)
        victory = x1 - y1
        if victory >= 1:
            no_wins += 1
    no_wins_percentage = float(no_wins) / number_of_elections
    return no_wins_percentage

In [6]:
trump_x = df2['trump_rcp'].values
clinton_y = df2['clinton_rcp'].values

clinton_x = df2['clinton_rcp'].values
trump_y = df2['trump_rcp'].values

In [7]:
trump_rcp_prob = []
for x, y in zip(trump_x, clinton_y):
    nwp = sim(x,y,4)
    trump_rcp_prob.append(100*nwp)

clinton_rcp_prob = []
for x, y in zip(clinton_x, trump_y):
    nwp = sim(x,y,4)
    clinton_rcp_prob.append(100*nwp)
    
df2['trump_rcp_prob'] = trump_rcp_prob
df2['clinton_rcp_prob'] = clinton_rcp_prob

In [8]:
def electoral_college(ec, cand_1, state, sims=10, thumb_on_scale=1):
    cand_1_wins = 0
    cand_2_wins = 0
    cand_1_ec_total = []
    cand_1_states = []
    cand_2_ec_total = []
    cand_2_states = []
    for i in range(sims):
        cand_1_ec = 0
        cand_1_state = []
        cand_2_ec = 0
        cand_2_state = []
        for x, y, z in zip(cand_1, state, ec):
            x = x + (random.choice(range((thumb_on_scale*-1),(1),1))) # weigh down the probabilities
            sim_election = np.random.uniform()*100
            if x > sim_election:
                cand_1_ec += z
                cand_1_state.append(y)
            else: 
                cand_2_ec += z
                cand_2_state.append(y)
        cand_1_ec_total.append(cand_1_ec)
        cand_1_states.append(cand_1_state)
        cand_2_ec_total.append(cand_2_ec)
        cand_2_states.append(cand_2_state)
        if cand_1_ec > cand_2_ec:
            cand_1_wins += 1
        else:
            cand_2_wins += 1
    return cand_1_wins, cand_2_wins, cand_1_ec_total, cand_2_ec_total, cand_1_states, cand_2_states 

In [9]:
print("Recreate RealClearPolitics Probabilities and Electoral College Results")
print()
ec = list(df2.ec.values)
state = list(df2.state.values)
cand_1 = list(df2.clinton_rcp_prob.values)
cand_1_wins, cand_2_wins, cand_1_ec_avg,\
cand_2_ec_avg, cand_1_states, cand_2_states = electoral_college(ec, cand_1, state, sims=20000, thumb_on_scale=2)
print('Trump Wins:', cand_2_wins)
print('Trump Average EC:', np.average(cand_2_ec_avg))
print('Trump Most Likely EC:', np.sum(df2['ec'][df2['clinton_rcp_prob'] <= 50]))
print('Trump Win Prob:', (cand_2_wins/20000)*100)
print()
print('Clinton Wins:', cand_1_wins)
print('Clinton Average EC:', np.average(cand_1_ec_avg))
print('Clinton Most Likely EC:', np.sum(df2['ec'][df2['clinton_rcp_prob'] >= 50]))
print('Clinton Win Prob:', (cand_1_wins/20000)*100)

Recreate RealClearPolitics Probabilities and Electoral College Results

Trump Wins: 6652
Trump Average EC: 258.1842
Trump Most Likely EC: 266
Trump Win Prob: 33.26

Clinton Wins: 13348
Clinton Average EC: 279.8158
Clinton Most Likely EC: 272
Clinton Win Prob: 66.74


In [10]:
print("Recreate NY Times Probabilities and Electoral College Results")
print()
ec = list(df.ec.values)
state = list(df.state.values)
cand_1 = list(df.NYT.values)
cand_1_wins, cand_2_wins, cand_1_ec_avg,\
cand_2_ec_avg, cand_1_states, cand_2_states = electoral_college(ec, cand_1, state, sims=20000, thumb_on_scale=3)
print('Trump Wins:', cand_2_wins)
print('Trump Average EC:', np.average(cand_2_ec_avg))
print('Trump Most Likely EC:', np.sum(df['ec'][df['NYT'] <= 50]))
print('Trump Win Prob:', (cand_2_wins/20000)*100)
print()
print('Clinton Wins:', cand_1_wins)
print('Clinton Average EC:', np.average(cand_1_ec_avg))
print('Clinton Most Likely EC:', np.sum(df['ec'][df['NYT'] >= 50]))
print('Clinton Win Prob:', (cand_1_wins/20000)*100)

Recreate NY Times Probabilities and Electoral College Results

Trump Wins: 934
Trump Average EC: 225.756
Trump Most Likely EC: 216
Trump Win Prob: 4.67

Clinton Wins: 19066
Clinton Average EC: 312.244
Clinton Most Likely EC: 322
Clinton Win Prob: 95.33


In [11]:
print("Recreate 538 Probabilities and Electoral College Results")
print()
ec = list(df.ec.values)
state = list(df.state.values)
cand_1 = list(df['538'].values)
cand_1_wins, cand_2_wins, cand_1_ec_avg,\
cand_2_ec_avg, cand_1_states, cand_2_states = electoral_college(ec, cand_1, state, sims=20000, thumb_on_scale=6)
print('Trump Wins:', cand_2_wins)
print('Trump Average EC:', np.average(cand_2_ec_avg))
print('Trump Most Likely EC:', np.sum(df['ec'][df['538'] <= 50]))
print('Trump Win Prob:', (cand_2_wins/20000)*100)
print()
print('Clinton Wins:', cand_1_wins)
print('Clinton Average EC:', np.average(cand_1_ec_avg))
print('Clinton Most Likely EC:', np.sum(df['ec'][df['538'] >= 50]))
print('Clinton Win Prob:', (cand_1_wins/20000)*100)

Recreate 538 Probabilities and Electoral College Results

Trump Wins: 4816
Trump Average EC: 248.9347
Trump Most Likely EC: 215
Trump Win Prob: 24.08

Clinton Wins: 15184
Clinton Average EC: 289.0653
Clinton Most Likely EC: 323
Clinton Win Prob: 75.92


In [12]:
print("Recreate HuffPost Probabilities and Electoral College Results")
print()
ec = list(df.ec.values)
state = list(df.state.values)
cand_1 = list(df.HuffPost.values)
cand_1_wins, cand_2_wins, cand_1_ec_avg,\
cand_2_ec_avg, cand_1_states, cand_2_states = electoral_college(ec, cand_1, state, sims=20000)
print('Trump Wins:', cand_2_wins)
print('Trump Average EC:', np.average(cand_2_ec_avg))
print('Trump Most Likely EC:', np.sum(df['ec'][df['HuffPost'] <= 50]))
print('Trump Win Prob:', (cand_2_wins/20000)*100)
print()
print('Clinton Wins:', cand_1_wins)
print('Clinton Average EC:', np.average(cand_1_ec_avg))
print('Clinton Most Likely EC:', np.sum(df['ec'][df['HuffPost'] >= 50]))
print('Clinton Win Prob:', (cand_1_wins/20000)*100)

Recreate HuffPost Probabilities and Electoral College Results

Trump Wins: 108
Trump Average EC: 216.5722
Trump Most Likely EC: 216
Trump Win Prob: 0.54

Clinton Wins: 19892
Clinton Average EC: 321.4278
Clinton Most Likely EC: 324
Clinton Win Prob: 99.46000000000001


In [13]:
print("Recreate PEC Probabilities and Electoral College Results")
print()
ec = list(df.ec.values)
state = list(df.state.values)
cand_1 = list(df.PEC.values)
cand_1_wins, cand_2_wins, cand_1_ec_avg,\
cand_2_ec_avg, cand_1_states, cand_2_states = electoral_college(ec, cand_1, state, sims=20000)
print('Trump Wins:', cand_2_wins)
print('Trump Average EC:', np.average(cand_2_ec_avg))
print('Trump Most Likely EC:', np.sum(df['ec'][df['PEC'] <= 50]))
print('Trump Win Prob:', (cand_2_wins/20000)*100)
print()
print('Clinton Wins:', cand_1_wins)
print('Clinton Average EC:', np.average(cand_1_ec_avg))
print('Clinton Most Likely EC:', np.sum(df['ec'][df['PEC'] >= 50]))
print('Clinton Win Prob:', (cand_1_wins/20000)*100)

Recreate PEC Probabilities and Electoral College Results

Trump Wins: 1155
Trump Average EC: 231.27395
Trump Most Likely EC: 215
Trump Win Prob: 5.775

Clinton Wins: 18845
Clinton Average EC: 306.72605
Clinton Most Likely EC: 323
Clinton Win Prob: 94.22500000000001


### Correct for the known polling errors

In [14]:
df2[['state','actual_win']][df2['polling_error'] == 1]

Unnamed: 0,state,actual_win
2,Michigan,trump
3,Pennsylvania,trump
11,Nevada,clinton
15,Wisconsin,trump


In [15]:
trump_bias = []
for x, y in zip(trump_x, clinton_y):
    nwp = election_bias_sim(x,y,4)
    trump_bias.append(100*nwp)
    
clinton_bias = []
for x, y in zip(clinton_x, trump_y):
    nwp = election_bias_sim(x,y,4)
    clinton_bias.append(100*nwp)
    
df2['trump_bias'] = trump_bias
df2['clinton_bias'] = clinton_bias

In [16]:
adj_clinton_prob = []
for i in range(len(df2)):
    if df2['polling_error'][i] == 1:
        if df2['actual_win'][i] == 'clinton':
            prob = df2['clinton_bias'][i]
            adj_clinton_prob.append(prob)
        else:
            prob = 100 - df2['trump_bias'][i]
            adj_clinton_prob.append(prob)
    else:
        prob = df2['clinton_rcp_prob'][i] 
        adj_clinton_prob.append(prob)
        
df2['adj_clinton_prob'] = adj_clinton_prob

In [17]:
print("True 2016 Probabilities and Electoral College Results")
print()
ec = list(df2.ec.values)
state = list(df2.state.values)
cand_1 = list(df2.adj_clinton_prob.values)
cand_1_wins, cand_2_wins, cand_1_ec_avg,\
cand_2_ec_avg, cand_1_states, cand_2_states = electoral_college(ec, cand_1, state, sims=20000)
print('Trump Wins:', cand_2_wins)
print('Trump Average EC:', np.average(cand_2_ec_avg))
print('Trump Most Likely EC:', np.sum(df2['ec'][df2['adj_clinton_prob'] <= 50]))
print('Trump Win Prob:', (cand_2_wins/20000)*100)
print()
print('Clinton Wins:', cand_1_wins)
print('Clinton Average EC:', np.average(cand_1_ec_avg))
print('Clinton Most Likely EC:', np.sum(df2['ec'][df2['adj_clinton_prob'] >= 50]))
print('Clinton Win Prob:', (cand_1_wins/20000)*100)

True 2016 Probabilities and Electoral College Results

Trump Wins: 10854
Trump Average EC: 270.85765
Trump Most Likely EC: 306
Trump Win Prob: 54.269999999999996

Clinton Wins: 9146
Clinton Average EC: 267.14235
Clinton Most Likely EC: 232
Clinton Win Prob: 45.73
