In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import urllib
from collections import OrderedDict

## Scrape Wikipedia Page

In [2]:
# list of state abbreviations
states = ['AL','AK','AZ','AR','CA','CO','CT','DC','DE','FL',
          'GA','HI','ID','IL','IN','IA','KS','KY','LA','ME',
          'MD','MA','MI','MN','MS','MO','MT','NE','NV','NH',
          'NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI',
          'SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY', 'US']

ec_votes=[9,  3,  11, 6,  55, 9,  7,  3,  3,  29,
          16, 4,  4,  20, 11, 6,  6,  8,  8,  4,
          10, 11, 16, 10, 6,  10, 3,  5,  6,  4,
          14, 5,  29, 15, 3,  18, 7,  7,  20, 4,
          9,  3,  11, 38, 6,  3,  13, 12, 5,  10, 3, 538]

ec_votes_dict = OrderedDict()
for i in range(len(states)):
    ec_votes_dict[states[i]] = ec_votes[i]

In [3]:
url = 'https://en.wikipedia.org/wiki/United_States_presidential_election,_2012'
page = BeautifulSoup(urllib.request.urlopen(url).read(), "html.parser")

In [4]:
header = page.find('span', id='Results_by_state').parent
table = header.find_next_sibling('div').table
head_row = table.find('tr')
headings = []
for cell in head_row.find_all('th', attrs={'colspan': '3'}):
    children = list(cell.children)
    if len(children) == 3:
        headings.append((children[0].strip(), children[2].strip()))
    else:
        headings.append((children[0], None))
headings.append(('Total', None))
data = OrderedDict()
all_rows = list(table.find_all('tr'))
columns_with_vote_counts = list(range(2,15,3))
columns_with_vote_counts.append(19)
for row in all_rows[2:-1]:
    cells = list(row.find_all('td'))
    abbr = cells[20].string.strip()
    if len(abbr)==2 or len(abbr)==6:
        abbr = abbr[:2]
        data[abbr] = [cells[i].text.strip().replace(',','').replace('.', '').replace('-','0').replace('N/A', '0')
                      for i in columns_with_vote_counts]
cells = all_rows[-1].find_all('th')
abbr = cells[20].string.strip()
data[abbr] = [cells[i].text.strip().replace(',','').replace('-','0') for i in columns_with_vote_counts]
for district, votes in data.items():
    data[district] = [int(i) if len(i) else 0 for i in votes]

In [5]:
headings

[('Barack Obama', 'Democratic'),
 ('Mitt Romney', 'Republican'),
 ('Gary Johnson', 'Libertarian'),
 ('Jill Stein', 'Green'),
 ('Others', None),
 ('Total', None)]

In [6]:
abbreviations  = []
for h in headings:
    abbreviations.append(''.join([s[0] for s in h[0].split()]))
vote_data = pd.DataFrame.from_dict(data, orient='index')
vote_data.columns=abbreviations
vote_data['E'] = pd.DataFrame.from_dict(ec_votes_dict, orient='index')[0]
print(vote_data)

          BO        MR       GJ      JS       O          T    E
AL    795696   1255925    12328    3397    6992    2074338    9
AK    122640    164676     7392    2917    2870     300495    3
AZ   1025232   1233654    32100    7816     452    2299254   11
AR    394409    647744    16276    9305    1734    1069468    6
CA   7854285   4839958   143221   85638  115445   13038547   55
CO   1323102   1185243    35545    7508   18123    2569520    9
CT    905083    634892    12580     863    5542    1558960    7
DE    242584    165484     3882    1940      31     413921    3
DC    267070     21381     2083    2458     772     293764    3
FL   4237756   4163447    44726    8947   19303    8474179   29
GA   1773827   2078688    45324    1516     695    3900050   16
HI    306658    121015     3840    3184       0     434697    4
ID    212787    420911     9453    4402    4721     652274    4
IL   3019512   2135216    56229   30222     835    5242014   20
IN   1152887   1420543    50111     625 

## Export data

In [7]:
filename = 'vote_data.csv'
vote_data.to_csv(filename,sep=',')

In [8]:
from math import ceil, floor
from operator import itemgetter
ec_votes = OrderedDict()
wasted = OrderedDict()
for i in range(51):
    st = vote_data.iloc[i]
    c = OrderedDict(st.loc['BO':'O'])
    e = OrderedDict()
    r = {}
    E = st.loc['E']
    V = st.loc['T']
    if V == 0:
        V = sum(list(st)[:6])
    for candidate, pop_votes in c.items():
        e[candidate] = floor(E * pop_votes / V)
        r[candidate] = ceil(pop_votes - V * e[candidate] / E)
    r = OrderedDict(sorted(r.items(), key=itemgetter(1), reverse=True))
    remainder = E - sum(e.values())
    for candidate in r.keys():
        if candidate != 'O': # not mappable to a single candidate
            e[candidate] += 1
            remainder -= 1
            r[candidate] = 0
            if remainder == 0:
                break
    ec_votes[st.name] = e
    wasted[st.name] = r
ec_votes = pd.DataFrame.from_dict(ec_votes, orient='index')
wasted = pd.DataFrame.from_dict(wasted, orient='index')
print('Electoral College Votes by State:\n{}\n'.format(ec_votes))
print('Wasted Popular Votes by State:\n{}\n'.format(wasted))
print('Electoral College Tally:\n{}\n'.format(ec_votes.sum()))
print('Wasted popular votes:\n{}'.format(wasted.sum()))

Electoral College Votes by State:
    BO  MR  GJ  JS  O
AK   1   2   0   0  0
AL   4   5   0   0  0
AR   2   4   0   0  0
AZ   5   6   0   0  0
CA  33  21   1   0  0
CO   5   4   0   0  0
CT   4   3   0   0  0
DC   3   0   0   0  0
DE   2   1   0   0  0
FL  15  14   0   0  0
GA   7   9   0   0  0
HI   3   1   0   0  0
IA   3   3   0   0  0
ID   1   3   0   0  0
IL  12   8   0   0  0
IN   5   6   0   0  0
KS   2   4   0   0  0
KY   3   5   0   0  0
LA   3   5   0   0  0
MA   7   4   0   0  0
MD   6   4   0   0  0
ME   2   2   0   0  0
MI   9   7   0   0  0
MN   5   5   0   0  0
MO   5   5   0   0  0
MS   3   3   0   0  0
MT   1   2   0   0  0
NC   7   8   0   0  0
ND   1   2   0   0  0
NE   2   3   0   0  0
NH   2   2   0   0  0
NJ   8   6   0   0  0
NM   3   2   0   0  0
NV   3   3   0   0  0
NY  19  10   0   0  0
OH   9   9   0   0  0
OK   2   5   0   0  0
OR   4   3   0   0  0
PA  11   9   0   0  0
RI   3   1   0   0  0
SC   4   5   0   0  0
SD   1   2   0   0  0
TN   4   7   0   0  