In [203]:
import requests
from bs4 import BeautifulSoup
import json
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier

In [204]:
resp = requests.get('https://projects.fivethirtyeight.com/2020-election-forecast/arizona/',headers={"content-type":"text"})

In [205]:
soup = BeautifulSoup(resp.text,'html.parser')

In [206]:
state_list = []
for l in soup.find_all('select',class_='state-dropdown'):
    for option in l.find_all('option'):
        state_list.append(option.text)
state_list = state_list[1:]

In [207]:
state_list = [state.replace(' ', '-').lower() for state in state_list]

In [208]:
state_list = [state[:-11] if 'district' in state[-11:] else state for state in state_list]

In [209]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta content="2020-10-24T13:21:23-04:00" property="article:modified_time"/>
  <title>
   2020 Election Forecast | FiveThirtyEight
  </title>
  <meta content="2020 Election Forecast" property="og:title"/>
  <meta content="Latest forecast of the 2020 presidential election between President Donald Trump and Joe Biden by Nate Silver’s FiveThirtyEight" property="og:description"/>
  <meta content="Latest forecast of the 2020 presidential election between President Donald Trump and Joe Biden by Nate Silver’s FiveThirtyEight" name="description"/>
  <link href="https://projects.fivethirtyeight.com/2020-election-forecast/" rel="canonical"/>
  <meta content="https://projects.fivethirtyeight.com/2020-election-forecast/" property="og:url"/>
  <meta content="https://fivethirtyeight.com/wp-content/uploads/2020/08/2020-FTE-PresPromo-16x9-sized.jpg" property="og:image"/>
  <meta content="https://fivethirtyeight.com/wp-content/uploads/2020/08/2020-FTE-PresProm

In [210]:
soup.find_all('text')

[]

In [211]:
outcome = requests.get('https://projects.fivethirtyeight.com/2020-election-forecast/state_recirc.json')

In [212]:
election_prediction = json.loads(outcome.text)

In [213]:
state_by_state = election_prediction['president'][0]['states']

In [214]:
state_to_win_prob = {
    'state':[],
    'trump':[],
    'biden':[]
}
for state in state_by_state:
    if state['state'] not in ['NE1','NE2','NE3','ME1','ME2','DC']:
        state_to_win_prob['state'].append(state['state'])
        state_to_win_prob['trump'].append(state['candidates'][0]['winprob'])
        state_to_win_prob['biden'].append(state['candidates'][1]['winprob'])

In [215]:
import pandas as pd

In [216]:
df = pd.DataFrame(data=state_to_win_prob) 

In [217]:
us_simulations = json.loads(requests.get('https://projects.fivethirtyeight.com/2020-election-forecast/us_simulations.json').text)

In [218]:
state_abbrev = sorted(list(df.state.unique()))

In [219]:
us_simulations[0]

{'type': 'polls-plus',
 'simulations': [{'winner': 'Biden',
   'evs': {'Biden': 375, 'Trump': 163},
   'states': {'AK': {'state': 'AK', 'winner': 'Trump'},
    'AL': {'state': 'AL', 'winner': 'Trump'},
    'AR': {'state': 'AR', 'winner': 'Trump'},
    'AZ': {'state': 'AZ', 'winner': 'Biden'},
    'CA': {'state': 'CA', 'winner': 'Biden'},
    'CO': {'state': 'CO', 'winner': 'Biden'},
    'CT': {'state': 'CT', 'winner': 'Biden'},
    'DC': {'state': 'DC', 'winner': 'Biden'},
    'DE': {'state': 'DE', 'winner': 'Biden'},
    'FL': {'state': 'FL', 'winner': 'Biden'},
    'GA': {'state': 'GA', 'winner': 'Biden'},
    'HI': {'state': 'HI', 'winner': 'Biden'},
    'IA': {'state': 'IA', 'winner': 'Biden'},
    'ID': {'state': 'ID', 'winner': 'Trump'},
    'IL': {'state': 'IL', 'winner': 'Biden'},
    'IN': {'state': 'IN', 'winner': 'Trump'},
    'KS': {'state': 'KS', 'winner': 'Trump'},
    'KY': {'state': 'KY', 'winner': 'Trump'},
    'LA': {'state': 'LA', 'winner': 'Trump'},
    'MA': {'stat

In [220]:
d = {state:[] for state in state_abbrev}
d['overall'] = []

for simulation in us_simulations[0]['simulations']:
    for state in state_abbrev:
        d[state].append(int(simulation['states'][state]['winner']=='Biden'))
    d['overall'].append(int(simulation['winner']=='Biden'))

In [221]:
sim_outcomes = pd.DataFrame(data=d)

In [222]:
np.sum(sim_outcomes.overall)

87

In [223]:
cov = {}

# https://stats.stackexchange.com/questions/67019/variance-and-covariance-of-binary-data/67218
for state_a in sim_outcomes.columns:
    for state_b in sim_outcomes.columns:
        k_ab = len(sim_outcomes[sim_outcomes[state_a] + sim_outcomes[state_b] == 2])
        k_a = np.sum(sim_outcomes[state_a])
        k_b = np.sum(sim_outcomes[state_b])
        cov[(state_a,state_b)] = (100*k_ab - k_a*k_b)/100**2

In [224]:
np.min([v for k,v in cov.items()])

-0.0097

In [243]:
cov_order = '''FL
overall
MI
PA
TX
NC
WI
MN
OH
IA
AZ
NV
GA
NH
NM
VA
NY
CT
DE
IL
CA
CO
OR
RI
WA
HI
MD
MA
NJ
VT
AK
WY
AL
AR
NE
ID
OK
KY
IN'''.split('\n')

In [245]:
l = []

for row_state in cov_order:
    row = []
    for col_state in cov_order:
        row.append(cov[(row_state,col_state)])
    l.append(row)

In [246]:
cov[('AL','AK')]

0.0087

In [247]:
cov[('AK','AL')]

0.0087

In [248]:
for state, values in cov.items():
    if cov[state[0],state[1]] != cov[state[1],state[0]]:
        print(state[0],state[1])

In [249]:
# for state in state_abbrev:
#     print(state, cov['overall',state])

In [250]:
all([cov[state[0],state[1]] == cov[state[1],state[0]]  for state, value in cov.items()])

True

In [251]:
cov_mat = np.array(l)

In [252]:
returns = '''0.46181
0.3595062
0.329469875
0.259193375
0.23264
0.218074325
0.19213875
0.18279825
0.166805
0.15296875
0.15036575
0.14933735
0.1424675
0.08453125
0.078919875
0.039495275
0.03387075
0.03237145
0.024974375
0.024154375
0.023231875
0.022885325
0.02186
0.018929
0.016108125
0.0158775
0.014670125
0.014467125
0.013135625
0.0120185
0.0098375
0.00886375
0.008679
0.008113125
0.007974125
0.00457125
0.003031
0.002939975
0.00114875'''

In [253]:
returns_list = [float(ret) for ret in returns.split('\n')]

In [254]:
len(returns_list)

39

In [255]:
cov_mat.shape

(39, 39)

In [323]:
ef = EfficientFrontier(np.array(returns_list),cov_mat)
weights = ef.efficient_return(.3)

In [324]:
np.array_equal(cov_mat,cov_mat.T)

True

In [325]:
ef.portfolio_performance(verbose=True)

Expected annual return: 30.0%
Annual volatility: 22.3%
Sharpe Ratio: 1.26


(0.3, 0.22297466822371337, 1.255748028377252)

In [326]:
w = [weights[i] for i in range(39)]
pd.DataFrame(data={'weight':w,'state':cov_order})

Unnamed: 0,weight,state
0,0.234803,FL
1,0.134715,overall
2,0.353035,MI
3,0.0,PA
4,0.009649,TX
5,0.0,NC
6,0.0,WI
7,0.097688,MN
8,0.0,OH
9,0.0,IA
