In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import urllib
from collections import OrderedDict

## Scrape Wikipedia Page

In [2]:
# list of state abbreviations
states = ['AL','AK','AZ','AR','CA','CO','CT','DC','DE','FL',
          'GA','HI','ID','IL','IN','IA','KS','KY','LA','ME',
          'MD','MA','MI','MN','MS','MO','MT','NE','NV','NH',
          'NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI',
          'SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY', 'US']

ec_votes=[9,  3,  11, 6,  55, 9,  7,  3,  3,  29,
          16, 4,  4,  20, 11, 6,  6,  8,  8,  4,
          10, 11, 16, 10, 6,  10, 3,  5,  6,  4,
          14, 5,  29, 15, 3,  18, 7,  7,  20, 4,
          9,  3,  11, 38, 6,  3,  13, 12, 5,  10, 3, 538]

ec_votes_dict = OrderedDict()
for i in range(len(states)):
    ec_votes_dict[states[i]] = ec_votes[i]

In [3]:
url = 'https://en.wikipedia.org/wiki/United_States_presidential_election,_2016'
page = BeautifulSoup(urllib.request.urlopen(url).read(), "html.parser")

In [4]:
header = page.find('span', id='Results_by_state').parent
table = header.find_next_sibling('div').table
head_row = table.find('tr')
headings = []
for cell in head_row.find_all('th', attrs={'colspan': '3'}):
    children = list(cell.children)
    if len(children) == 3:
        headings.append((children[0].strip(), children[2].strip()))
    else:
        headings.append((children[0], None))
data = OrderedDict()
all_rows = list(table.find_all('tr'))
for row in all_rows[2:-1]:
    cells = list(row.find_all('td'))
    abbr = cells[21].string.strip()
    if len(abbr)==2 or len(abbr)==6:
        abbr = abbr[:2]
        data[abbr] = [cells[i].text.strip().replace(',','').replace('.', '').replace('-','0').replace('N/A', '0')
                      for i in range(2,23, 3)]
cells = all_rows[-1].find_all('th')
abbr = cells[21].string.strip()
data[abbr] = [cells[i].text.strip().replace(',','').replace('-','0') for i in range(2,23, 3)]
for district, votes in data.items():
    data[district] = [int(i) if len(i) else 0 for i in votes]

In [5]:
abbreviations  = []
for h in headings:
    abbreviations.append(''.join([s[0] for s in h[0].split()]))
vote_data = pd.DataFrame.from_dict(data, orient='index')
vote_data.columns=abbreviations
vote_data['E'] = pd.DataFrame.from_dict(ec_votes_dict, orient='index')[0]
print(vote_data)

          HC        DT       GJ       JS      EM        O          T    E
AL    729547   1318255    44467     9391       0    21712    2123372    9
AK    116454    163387    18725     5735       0    14307     318608    3
AZ   1161167   1252401   106327    34345   17449     1476    2573165   11
AR    380494    684872    29829     9473   13255    12712    1130635    6
CA   8753788   4483810   478500   278657   39596   147244   14181595   55
CO   1338870   1202484   144121    38437   28917    27391    2780220    9
CT    897572    673215    48676    22841    2108      508    1644920    7
DE    235603    185127    14757     6103       0        0     441590    3
DC    282830     12723     4906     4258       0     7858     312575    3
FL   4504975   4617886   207043    64399       0    25736    9420039   29
GA   1877963   2089104   125306        0       0        0    4092373   16
HI    266891    128847    15954    12737       0    13235     437664    4
ID    189765    409055    28331     84

## Export data

In [6]:
filename = 'vote_data.csv'
vote_data.to_csv(filename,sep=',')

In [7]:
from math import ceil, floor
from operator import itemgetter
ec_votes = OrderedDict()
wasted = OrderedDict()
for i in range(51):
    st = vote_data.iloc[i]
    c = OrderedDict(st.loc['HC':'O'])
    e = OrderedDict()
    r = {}
    E = st.loc['E']
    V = st.loc['T']
    if V == 0:
        V = sum(list(st)[:6])
    for candidate, pop_votes in c.items():
        e[candidate] = floor(E * pop_votes / V)
        r[candidate] = ceil(pop_votes - V * e[candidate] / E)
    r = OrderedDict(sorted(r.items(), key=itemgetter(1), reverse=True))
    remainder = E - sum(e.values())
    for candidate in r.keys():
        if candidate != 'O': # not mappable to a single candidate
            e[candidate] += 1
            remainder -= 1
            r[candidate] = 0
            if remainder == 0:
                break
    ec_votes[st.name] = e
    wasted[st.name] = r
ec_votes = pd.DataFrame.from_dict(ec_votes, orient='index')
wasted = pd.DataFrame.from_dict(wasted, orient='index')
print('Electoral College Votes by State:\n{}\n'.format(ec_votes))
print('Wasted Popular Votes by State:\n{}\n'.format(wasted))
print('Electoral College Tally:\n{}\n'.format(ec_votes.sum()))
print('Wasted popular votes:\n{}'.format(wasted.sum()))

Electoral College Votes by State:
    HC  DT  GJ  JS  EM  O
AK   1   2   0   0   0  0
AL   3   6   0   0   0  0
AR   2   4   0   0   0  0
AZ   5   5   1   0   0  0
CA  34  18   2   1   0  0
CO   4   4   1   0   0  0
CT   4   3   0   0   0  0
DC   3   0   0   0   0  0
DE   2   1   0   0   0  0
FL  14  14   1   0   0  0
GA   7   8   1   0   0  0
HI   3   1   0   0   0  0
IA   3   3   0   0   0  0
ID   1   3   0   0   0  0
IL  11   8   1   0   0  0
IN   4   6   1   0   0  0
KS   2   4   0   0   0  0
KY   3   5   0   0   0  0
LA   3   5   0   0   0  0
MA   7   4   0   0   0  0
MD   6   4   0   0   0  0
ME   2   2   0   0   0  0
MI   7   8   1   0   0  0
MN   5   5   0   0   0  0
MO   4   6   0   0   0  0
MS   2   4   0   0   0  0
MT   1   2   0   0   0  0
NC   7   8   0   0   0  0
ND   1   2   0   0   0  0
NE   2   3   0   0   0  0
NH   2   2   0   0   0  0
NJ   8   6   0   0   0  0
NM   2   2   1   0   0  0
NV   3   3   0   0   0  0
NY  17  11   1   0   0  0
OH   8   9   1   0   0  0
OK  