In [1]:
import pandas as pd
import numpy as np
import datetime

from util.OddsCalculator import OddsCalculator as util

In [2]:
#Raw odds from two sources
# 1. https://www.sportsbettingdime.com/politics/2020-us-presidential-election-odds/
# 2. https://www.gamblingsites.org/blog/2020-election-betting-odds-election-night-changes/

odds = pd.read_excel('data/Election Odds Raw.xlsx').set_index('date')

In [3]:
odds

Unnamed: 0_level_0,trump_odds,biden_odds,source,pre_first_results,notes,notes_source
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-04-08 00:00:00,-121,114,1,True,"Bernie Sanders suspends campaign, all but assu...",6
2020-04-09 00:00:00,-121,114,1,True,,
2020-04-10 00:00:00,-121,114,1,True,,
2020-04-11 00:00:00,-121,114,1,True,,
2020-04-12 00:00:00,-121,114,1,True,,
...,...,...,...,...,...,...
2020-11-04 17:00:00,490,-780,1,False,AP calls MI for Biden,3
2020-11-05 00:25:00,528,-867,1,False,,
2020-11-05 10:20:00,476,-716,1,False,,
2020-11-05 15:40:00,675,-1340,1,False,,


In [4]:
odds['trump_odds'] = odds['trump_odds'].astype(str)
odds['biden_odds'] = odds['biden_odds'].astype(str)

In [5]:
#adds a plus sign to the American odds. Essential for conversion to probabilities

def add_plus(val):
    if '-' not in val and '+' not in val:
        val = '+' + val
    return val

In [6]:
odds['trump_odds'] = odds['trump_odds'].apply(add_plus)
odds['biden_odds'] = odds['biden_odds'].apply(add_plus)

In [7]:
odds['trump_odds_dec'] = odds['trump_odds'].apply(util.calculate_odds).str[1]
odds['biden_odds_dec'] = odds['biden_odds'].apply(util.calculate_odds).str[1]

In [8]:
#Calculate the book's hold, for fun

odds['hold'] = odds.apply(lambda x: util.calculate_hold([x['trump_odds_dec'], x['biden_odds_dec']]),axis=1)

In [9]:
#Calculate the true odds of each candidate winning

odds['trump_win_perc'] = odds.apply(lambda x: util.actual_probability([x['trump_odds'], x['biden_odds_dec']]),axis=1).str[0]
odds['biden_win_perc'] = odds.apply(lambda x: util.actual_probability([x['trump_odds'], x['biden_odds_dec']]),axis=1).str[1]

In [10]:
#Multiply by 100 for visualization purposes

odds['trump_win_perc'] *= 100
odds['biden_win_perc'] *= 100

In [11]:
#Append a final row indicating Biden's win

final_date = pd.Timestamp(2020,11,7,11,25)
final_pre_first_resuilts = False
final_trump_win_perc = 0
final_biden_win_perc = 100
final_note = 'AP calls PA for Biden. Joe Biden wins the 2020 Presidential Election'
final_note_source = 3

d = {
    'date': [final_date], 
    'pre_first_results': [final_pre_first_resuilts],
    'trump_win_perc': [final_trump_win_perc], 
    'biden_win_perc': [final_biden_win_perc],
    'notes': [final_note],
    'notes_source': [final_note_source]
}

final_row = pd.DataFrame(data=d).set_index('date')

In [12]:
final_row

Unnamed: 0_level_0,pre_first_results,trump_win_perc,biden_win_perc,notes,notes_source
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-11-07 11:25:00,False,0,100,AP calls PA for Biden. Joe Biden wins the 2020...,3


In [13]:
odds = odds.append(final_row, sort=False)

In [14]:
odds.to_csv('data/final_odds.csv')

In [15]:
odds

Unnamed: 0_level_0,trump_odds,biden_odds,source,pre_first_results,notes,notes_source,trump_odds_dec,biden_odds_dec,hold,trump_win_perc,biden_win_perc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-04-08 00:00:00,-121,+114,1.0,True,"Bernie Sanders suspends campaign, all but assu...",6,1.8264462809917354,2.1399999999999997,1.458520,53.95,46.05
2020-04-09 00:00:00,-121,+114,1.0,True,,,1.8264462809917354,2.1399999999999997,1.458520,53.95,46.05
2020-04-10 00:00:00,-121,+114,1.0,True,,,1.8264462809917354,2.1399999999999997,1.458520,53.95,46.05
2020-04-11 00:00:00,-121,+114,1.0,True,,,1.8264462809917354,2.1399999999999997,1.458520,53.95,46.05
2020-04-12 00:00:00,-121,+114,1.0,True,,,1.8264462809917354,2.1399999999999997,1.458520,53.95,46.05
...,...,...,...,...,...,...,...,...,...,...,...
2020-11-05 00:25:00,+528,-867,1.0,False,,,6.28,1.1153402537485582,5.287165,15.08,84.92
2020-11-05 10:20:00,+476,-716,1.0,False,,,5.76,1.1396648044692737,4.858144,16.52,83.48
2020-11-05 15:40:00,+675,-1340,1.0,False,,,7.75,1.0746268656716418,5.623682,12.17,87.83
2020-11-06 09:20:00,+1050,-3967,1.0,False,,,11.5,1.0252079657171667,5.870694,8.19,91.81
