In [1]:
import requests as r
import pandas as pd
from bs4 import BeautifulSoup

%matplotlib inline

In [2]:
url = 'https://results.thecaucuses.org/'

In [3]:
resp = r.get(url)

In [5]:
soup = BeautifulSoup(resp.text)

In [10]:
precinct_table = soup.find('div', {'class': 'precinct-table'})
thead = [li.text for li in soup.find('ul', {'class': 'thead'}).findAll('li')]
sub_head = [li.text for li in soup.find('ul', {'class': 'sub-head'}).findAll('li')]

In [19]:
candidates = [thead[i] for i in range(2,len(thead), 3)]

In [21]:
counties = precinct_table.findAll('div', attrs = {'class': 'precinct-rows'})

In [25]:
county = counties[0]

In [26]:
rows = []
for county in counties:
    county_name = county.find('div', {'class': 'precinct-county'}).find('div', {'class': 'wrap'}).text
    county_data = county.find('div', {'class': 'precinct-data'}).findAll('ul', lambda v: v != 'total-row')
    for row in county_data:
        row_data = [col.text for col in row.findAll('li')]
        precinct_name = row_data[0]
        for i in range(1,len(row_data), 3):
            first_exp, final_exp, sde = row_data[i:(i+3)]
            candidate = candidates[i//3]
            rows.append(dict(county=county_name, precinct=precinct_name, candidate=candidate, 
                             first_exp=int(first_exp), final_exp=int(final_exp), sde=float(sde)))

In [27]:
df = pd.DataFrame(rows).sort_values(['county', 'precinct', 'candidate'])

In [28]:
df.groupby(['candidate', 'county'])[['first_exp', 'final_exp', 'sde']].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,first_exp,final_exp,sde
candidate,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bennet,Adair,0,0,0.0000
Bennet,Adams,0,0,0.0000
Bennet,Allamakee,1,0,0.0000
Bennet,Appanoose,0,0,0.0000
Bennet,Audubon,0,0,0.0000
...,...,...,...,...
Yang,Winnebago,34,29,0.4500
Yang,Winneshiek,168,49,0.5120
Yang,Woodbury,167,40,0.6375
Yang,Worth,28,23,0.4000


In [12]:
df.to_csv('iowa_data.csv', index=False)

In [29]:
def highlight_max(s):
    '''
    highlight the maximum in a Series green.
    '''
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

summary = df.groupby(['candidate'])[['first_exp','final_exp','sde']].sum()
summary = summary.sort_values('sde', ascending=False)
summary.style.apply(highlight_max)

Unnamed: 0_level_0,first_exp,final_exp,sde
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Buttigieg,37596,43274,564.302
Sanders,43699,45842,561.528
Warren,32611,34934,388.48
Biden,26322,23630,339.678
Klobuchar,22474,21121,263.883
Yang,8929,1759,21.8559
Steyer,3054,413,6.6189
Uncommitted,1000,1451,3.7321
Other,159,205,0.6931
Bloomberg,215,20,0.2096


In [30]:
df

Unnamed: 0,county,precinct,candidate,first_exp,final_exp,sde
0,Adair,1NW ADAIR,Bennet,0,0,0.0000
1,Adair,1NW ADAIR,Biden,6,6,0.0784
2,Adair,1NW ADAIR,Bloomberg,0,0,0.0000
3,Adair,1NW ADAIR,Buttigieg,8,8,0.0784
4,Adair,1NW ADAIR,Delaney,0,0,0.0000
...,...,...,...,...,...,...
23444,Wright,Rural Clarion,Sanders,1,0,0.0000
23445,Wright,Rural Clarion,Steyer,0,0,0.0000
23449,Wright,Rural Clarion,Uncommitted,0,0,0.0000
23446,Wright,Rural Clarion,Warren,3,0,0.0000
