# 2018 General Election Midterm Candidate Scraper

In [1]:
from urllib.request import urlopen
import re # Regex Library
import pandas as pd

In [8]:
def get_html_from_url(url): 
    return urlopen(url).read().decode("utf-8")

In [9]:
states = []
with open('states.txt', 'r') as f:
    for state in f:
        states.append(state[:-1].replace(' ', '_'))

In [10]:
states

['Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'Florida',
 'Georgia',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'Maine',
 'Maryland',
 'Massachusetts',
 'Michigan',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'Nebraska',
 'Nevada',
 'New_Hampshire',
 'New_Jersey',
 'New_Mexico',
 'New_York',
 'North_Carolina',
 'North_Dakota',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Rhode_Island',
 'South_Carolina',
 'South_Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Washington',
 'West_Virginia',
 'Wisconsin',
 'Wyoming']

## House of Representatives

### Get Ballotpedia HTML for each state

I'm encountering a problem where some lists of general election candidates are formatted differently than others. I have identified 2 patterns: a more modern html approach where "General election candidates" is between b tags and candidate names are enclosed in a tags, and an older approach where "General election candidates" is enclosed in span tags and the candidate names are in ul tags.

In [11]:
def clean_names(possible_names):
    names = []
    for string in possible_names:
        if "election" not in string and "candidate" not in string:
                    names.append(string)
    return names

In [12]:
def get_candidates_for_state(state, chamber):
    chamber_urls = {'house': 'https://ballotpedia.org/United_States_House_of_Representatives_elections_in_{},_2018', 'senate': 'https://ballotpedia.org/United_States_Senate_election_in_{},_2018'}
    html = get_html_from_url(chamber_urls[chamber].format(state))
    districts = re.findall(r"District \d{1,2}.{1,300}General election.+?Primary", html, re.DOTALL)
#     print(districts)
    data = pd.DataFrame()
    if districts:
        for district in districts:
            district_string = re.findall(r"District \d{1,2}", district)[0]
            candidate_names = clean_names(re.findall(r"<a href=.+>(.+)</a>", district))
            data = data.append(pd.DataFrame({'district': [district_string]*len(candidate_names), 'name': candidate_names, 'state': state}), ignore_index=True)
    else:
        general_candidates = re.findall(r"General election candidates.+?Primary", html, re.DOTALL)
        candidate_names = clean_names(re.findall(r"<a href=.+>(.+)</a>", general_candidates[0]))
        data = data.append(pd.DataFrame({'district': [None]*len(candidate_names), 'name': candidate_names, 'state': state}), ignore_index=True)
    return data

In [279]:
get_candidates_for_state('California', 'senate')

Unnamed: 0,district,name,state
0,,Early voting,California
1,,Absentee voting deadline,California
2,,Yes,California
3,,Same-day registration,California
4,,Voter ID,California
5,,Poll times,California
6,,Recalls,California


## Get House candidates for each state

In [272]:
df = pd.DataFrame()
for state in states:
    df = df.append(get_candidates_for_state(state, 'house'), ignore_index=True)
    print(get_candidates_for_state(state))

      district                name    state
0   District 1  Robert Kennedy Jr.  Alabama
1   District 1       Bradley Byrne  Alabama
2   District 2       Tabitha Isner  Alabama
3   District 2         Martha Roby  Alabama
4   District 3       Mallory Hagan  Alabama
5   District 3         Mike Rogers  Alabama
6   District 4           Lee Auman  Alabama
7   District 4     Robert Aderholt  Alabama
8   District 5      Peter Joffrion  Alabama
9   District 5           Mo Brooks  Alabama
10  District 6        Danner Kline  Alabama
11  District 6         Gary Palmer  Alabama
12  District 7        Terri Sewell  Alabama
  district          name   state
0     None     Don Young  Alaska
1     None  Alyse Galvin  Alaska
2     None    Greg Fitch  Alaska
3     None      Sid Hill  Alaska
      district                     name    state
0   District 1       Tom O&#39;Halleran  Arizona
1   District 1             Wendy Rogers  Arizona
2   District 1              David Shock  Arizona
3   District 2         

Empty DataFrame
Columns: [district, name, state]
Index: []
  district                  name     state
0     None  Lisa Blunt Rochester  Delaware
1     None          Scott Walker  Delaware
2     None           Andrew Webb  Delaware
3     None          Marvin Davis  Delaware
4     None         Paul Johnston  Delaware
       district                      name    state
0    District 2                 Neal Dunn  Florida
1    District 2              Bob Rackleff  Florida
2    District 3                  Ted Yoho  Florida
3    District 3       Yvonne Hayes Hinson  Florida
4    District 4           John Rutherford  Florida
5    District 4               Ges Selmont  Florida
6    District 4          Joceline Berrios  Florida
7    District 4              Jason Bulger  Florida
8    District 5             Alfred Lawson  Florida
9    District 5           Virginia Fuller  Florida
10   District 6           Nancy Soderberg  Florida
11   District 6             Michael Waltz  Florida
12   District 7     

      district                name    state
0   District 1     Peter Visclosky  Indiana
1   District 1          Mark Leyva  Indiana
2   District 1   Jonathan Kleinman  Indiana
3   District 2     Jackie Walorski  Indiana
4   District 2            Mel Hall  Indiana
5   District 2        Richard Wolf  Indiana
6   District 3           Jim Banks  Indiana
7   District 3     Courtney Tritch  Indiana
8   District 4           Tobi Beck  Indiana
9   District 4           Jim Baird  Indiana
10  District 5        Susan Brooks  Indiana
11  District 5        Dee Thornton  Indiana
12  District 6   Jeannine Lee Lake  Indiana
13  District 6          Greg Pence  Indiana
14  District 6         &#91;3&#93;  Indiana
15  District 6       Heather Meloy  Indiana
16  District 6         John Miller  Indiana
17  District 7        Andre Carson  Indiana
18  District 7        Wayne Harmon  Indiana
19  District 8       Larry Bucshon  Indiana
20  District 8      William Tanoos  Indiana
21  District 9  Trey Hollingswor

     district                 name  state
0  District 1      Chellie Pingree  Maine
1  District 1        Mark Holbrook  Maine
2  District 1       Martin Grohman  Maine
3  District 2       Bruce Poliquin  Maine
4  District 2         Jared Golden  Maine
5  District 2         Tiffany Bond  Maine
6  District 2            Will Hoar  Maine
7  District 2  Dennis O&#39;Connor  Maine
8  District 2  Danielle VanHelsing  Maine
9  District 2      Henry John Bear  Maine
      district                      name     state
0   District 1               Andy Harris  Maryland
1   District 1              Jesse Colvin  Maryland
2   District 1             Jenica Martin  Maryland
3   District 2       Dutch Ruppersberger  Maryland
4   District 2                Liz Matory  Maryland
5   District 2                Guy Mimoun  Maryland
6   District 2            Michael Carney  Maryland
7   District 3             John Sarbanes  Maryland
8   District 3           Charles Anthony  Maryland
9   District 3              

     district                  name       state
0  District 1         Debra Haaland  New_Mexico
1  District 1   Janice Arnold-Jones  New_Mexico
2  District 1       Lloyd Princeton  New_Mexico
3  District 2  Xochitl Torres Small  New_Mexico
4  District 2        Yvette Herrell  New_Mexico
5  District 3         Ben Ray Lujan  New_Mexico
6  District 3   Jerald Steve McFall  New_Mexico
7  District 3   Christopher Manning  New_Mexico
        district                                               name     state
0     District 1                                         Democratic  New_York
1     District 1                                         Republican  New_York
2     District 1                                 Conservative Party  New_York
3     District 1                                        Green Party  New_York
4     District 1                                 Independence Party  New_York
5     District 1                                       Reform Party  New_York
6     District 1      

  district             name         state
0     None    Mac Schneider  North_Dakota
1     None  Kelly Armstrong  North_Dakota
2     None   Charles Tuttle  North_Dakota
       district                          name state
0    District 1                  Steve Chabot  Ohio
1    District 1                 Aftab Pureval  Ohio
2    District 1                   Dirk Kubala  Ohio
3    District 1              Mike Goldschmidt  Ohio
4    District 1                 Kiumars Kiani  Ohio
5    District 2                 Brad Wenstrup  Ohio
6    District 2                 Jill Schiller  Ohio
7    District 2              James Condit Jr.  Ohio
8    District 2                   Steve Myers  Ohio
9    District 2                   David Baker  Ohio
10   District 3                  Joyce Beatty  Ohio
11   District 3                   Jim Burgess  Ohio
12   District 3                  Millie Milam  Ohio
13   District 4                    Jim Jordan  Ohio
14   District 4                 Janet Garrett  Ohio


      district              name state
0   District 1        Rob Bishop  Utah
1   District 1      Lee Castillo  Utah
2   District 1        Adam Davis  Utah
3   District 1      Eric Eliason  Utah
4   District 2     Chris Stewart  Utah
5   District 2  Shireen Ghorbani  Utah
6   District 2   Jeffrey Whipple  Utah
7   District 2       Jan Garbett  Utah
8   District 3       John Curtis  Utah
9   District 3      James Singer  Utah
10  District 3       &#91;4&#93;  Utah
11  District 3   Gregory Duerden  Utah
12  District 3       &#91;4&#93;  Utah
13  District 4          Mia Love  Utah
14  District 4       Ben McAdams  Utah
  district             name    state
0     None      Peter Welch  Vermont
1     None       Anya Tynio  Vermont
2     None     Laura Potter  Vermont
3     None     Cris Ericson  Vermont
4     None  H. Brooke Paige  Vermont
5     None      &#91;1&#93;  Vermont
       district                   name     state
0    District 1            Rob Wittman  Virginia
1    District 1    

In [273]:
df.to_csv("candidate_names.csv")

## Get party affiliation and incumbency data for each candidate

We manually fixed the csv to get a complete list of candidates for Congress from each state, using Ballotpedia as our source. We will now add incumbency and party affiliation data to the csv we already have.

In [3]:
df = pd.read_csv('candidate_names_final.csv')

In [4]:
df

Unnamed: 0,district,name,state,chamber,party
0,District 1,Robert Kennedy Jr.,Alabama,house,
1,District 1,Bradley Byrne,Alabama,house,
2,District 2,Tabitha Isner,Alabama,house,
3,District 2,Martha Roby,Alabama,house,
4,District 3,Mallory Hagan,Alabama,house,
5,District 3,Mike Rogers,Alabama,house,
6,District 4,Lee Auman,Alabama,house,
7,District 4,Robert Aderholt,Alabama,house,
8,District 5,Peter Joffrion,Alabama,house,
9,District 5,Mo Brooks,Alabama,house,


Get House candidate info

In [497]:
def get_party_and_incumbents(df):
    new_df = pd.DataFrame()
    chamber_urls = {'house': 'https://ballotpedia.org/United_States_House_of_Representatives_elections_in_{},_2018', 'senate': 'https://ballotpedia.org/United_States_Senate_election_in_{},_2018'}
    for state in states:
        print(state)
        state_df = df.loc[(df.chamber == 'house') & (df.state == state)].copy()
        state_html = get_html_from_url(chamber_urls['house'].format(state))
        state_df['incumbent'] = state_df.apply((lambda candidate: get_incumbency(candidate, state_html)), axis=1).copy()
        state_df.party = state_df.apply((lambda candidate: get_party(candidate, state_html)), axis=1).copy()
        new_df = new_df.append(state_df)
        print(state_df)
    return new_df

In [498]:
new_df = get_party_and_incumbents(df)

Alabama
      district                name    state chamber             party  \
0   District 1  Robert Kennedy Jr.  Alabama   house  Democratic Party   
1   District 1       Bradley Byrne  Alabama   house  Republican Party   
2   District 2       Tabitha Isner  Alabama   house  Democratic Party   
3   District 2         Martha Roby  Alabama   house              None   
4   District 3       Mallory Hagan  Alabama   house  Democratic Party   
5   District 3         Mike Rogers  Alabama   house     Alabama Party   
6   District 4           Lee Auman  Alabama   house  Democratic Party   
7   District 4     Robert Aderholt  Alabama   house              None   
8   District 5      Peter Joffrion  Alabama   house  Democratic Party   
9   District 5           Mo Brooks  Alabama   house              None   
10  District 6        Danner Kline  Alabama   house  Democratic Party   
11  District 6         Gary Palmer  Alabama   house              None   
12  District 7        Terri Sewell  Alabama

       district                     name     state chamber  \
166  District 1            Diana DeGette  Colorado   house   
167  District 1          Casper Stockham  Colorado   house   
168  District 1             Raymon Doane  Colorado   house   
169  District 1                Paul Daly  Colorado   house   
170  District 1             Miguel Lopez  Colorado   house   
171  District 2               Joe Neguse  Colorado   house   
172  District 2                 Peter Yu  Colorado   house   
173  District 2             Roger Barris  Colorado   house   
174  District 2              Nick Thomas  Colorado   house   
175  District 2          Kevin Alumbaugh  Colorado   house   
176  District 3             Scott Tipton  Colorado   house   
177  District 3        Diane Mitsch Bush  Colorado   house   
178  District 3              Gaylon Kent  Colorado   house   
179  District 3            Mary Malarsie  Colorado   house   
180  District 3           Allen Tompkins  Colorado   house   
181  Dis

        district                   name    state chamber              party  \
291   District 1    Earl "Buddy" Carter  Georgia   house   Republican Party   
292   District 1              Lisa Ring  Georgia   house   Democratic Party   
293   District 2     Sanford Bishop Jr.  Georgia   house   Democratic Party   
294   District 2        Herman West Jr.  Georgia   house   Republican Party   
295   District 3          Drew Ferguson  Georgia   house               None   
296   District 3         Chuck Enderlin  Georgia   house   Democratic Party   
297   District 4           Hank Johnson  Georgia   house               None   
298   District 4             Joe Profit  Georgia   house   Republican Party   
299   District 5             John Lewis  Georgia   house      Georgia Party   
300   District 6           Karen Handel  Georgia   house               None   
301   District 6            Lucy McBath  Georgia   house   Democratic Party   
302   District 6          Jeremy Stubbs  Georgia   h

       district                name    state chamber              party  \
392  District 1     Peter Visclosky  Indiana   house   Democratic Party   
393  District 1          Mark Leyva  Indiana   house   Republican Party   
394  District 1   Jonathan Kleinman  Indiana   house           Grey.png   
395  District 2     Jackie Walorski  Indiana   house               None   
396  District 2            Mel Hall  Indiana   house   Democratic Party   
397  District 2        Richard Wolf  Indiana   house           Grey.png   
398  District 3           Jim Banks  Indiana   house               None   
399  District 3     Courtney Tritch  Indiana   house   Democratic Party   
400  District 4           Tobi Beck  Indiana   house   Democratic Party   
401  District 4           Jim Baird  Indiana   house   Republican Party   
402  District 5        Susan Brooks  Indiana   house               None   
403  District 5        Dee Thornton  Indiana   house   Democratic Party   
404  District 6   Jeannin

       district                 name  state chamber              party  \
489  District 1      Chellie Pingree  Maine   house   Democratic Party   
490  District 1        Mark Holbrook  Maine   house   Republican Party   
491  District 1       Martin Grohman  Maine   house  Independent Party   
492  District 2       Bruce Poliquin  Maine   house   Republican Party   
493  District 2         Jared Golden  Maine   house   Democratic Party   
494  District 2         Tiffany Bond  Maine   house  Independent Party   
495  District 2            Will Hoar  Maine   house  Independent Party   
496  District 2      Dennis O'Connor  Maine   house               None   
497  District 2  Danielle VanHelsing  Maine   house               None   
498  District 2      Henry John Bear  Maine   house               None   

     incumbent  
489       True  
490      False  
491      False  
492       True  
493      False  
494      False  
495      False  
496      False  
497      False  
498      False 

       district                    name      state chamber  \
589  District 2             Jason Lewis  Minnesota   house   
590  District 2             Angie Craig  Minnesota   house   
591  District 2         Bradley Svenson  Minnesota   house   
592  District 3            Erik Paulsen  Minnesota   house   
593  District 3           Dean Phillips  Minnesota   house   
594  District 4          Betty McCollum  Minnesota   house   
595  District 4               Greg Ryan  Minnesota   house   
596  District 4  Susan Pendergast Sindt  Minnesota   house   
597  District 5              Ilhan Omar  Minnesota   house   
598  District 5      Jennifer Zielinski  Minnesota   house   
599  District 5              Les Lester  Minnesota   house   
600  District 6               Tom Emmer  Minnesota   house   
601  District 6                Ian Todd  Minnesota   house   
602  District 7         Collin Peterson  Minnesota   house   
603  District 7             Dave Hughes  Minnesota   house   

       

       district                  name       state chamber              party  \
701  District 1         Debra Haaland  New_Mexico   house   Democratic Party   
702  District 1   Janice Arnold-Jones  New_Mexico   house   Republican Party   
703  District 1       Lloyd Princeton  New_Mexico   house  Libertarian Party   
704  District 2  Xochitl Torres Small  New_Mexico   house   Democratic Party   
705  District 2        Yvette Herrell  New_Mexico   house   Republican Party   
706  District 3         Ben Ray Lujan  New_Mexico   house               None   
707  District 3   Jerald Steve McFall  New_Mexico   house   Republican Party   
708  District 3   Christopher Manning  New_Mexico   house  Libertarian Party   

     incumbent  
701      False  
702      False  
703      False  
704      False  
705      False  
706       True  
707      False  
708      False  
New_York
        district                   name     state chamber  \
709   District 1             Lee Zeldin  New_York   hous

    district             name         state chamber              party  \
824      NaN    Mac Schneider  North_Dakota   house   Democratic Party   
825      NaN  Kelly Armstrong  North_Dakota   house   Republican Party   
826      NaN   Charles Tuttle  North_Dakota   house  Independent Party   

     incumbent  
824      False  
825      False  
826      False  
Ohio
        district                          name state chamber  \
827   District 1                  Steve Chabot  Ohio   house   
828   District 1                 Aftab Pureval  Ohio   house   
829   District 1                   Dirk Kubala  Ohio   house   
830   District 1              Mike Goldschmidt  Ohio   house   
831   District 1                 Kiumars Kiani  Ohio   house   
832   District 2                 Brad Wenstrup  Ohio   house   
833   District 2                 Jill Schiller  Ohio   house   
834   District 2              James Condit Jr.  Ohio   house   
835   District 2                   Steve Myers  Ohio  

        district               name         state chamber  \
909   District 1  Brian Fitzpatrick  Pennsylvania   house   
910   District 1      Scott Wallace  Pennsylvania   house   
911   District 1      Steve Scheetz  Pennsylvania   house   
912   District 2      Brendan Boyle  Pennsylvania   house   
913   District 2       David Torres  Pennsylvania   house   
914   District 3       Dwight Evans  Pennsylvania   house   
915   District 3         Bryan Leib  Pennsylvania   house   
916   District 4     Madeleine Dean  Pennsylvania   house   
917   District 4          Dan David  Pennsylvania   house   
918   District 5   Mary Gay Scanlon  Pennsylvania   house   
919   District 5          Pearl Kim  Pennsylvania   house   
920   District 6   Chrissy Houlahan  Pennsylvania   house   
921   District 6      Greg McCauley  Pennsylvania   house   
922   District 7         Susan Wild  Pennsylvania   house   
923   District 7    Marty Nothstein  Pennsylvania   house   
924   District 7        

        district              name state chamber              party  incumbent
1089  District 1        Rob Bishop  Utah   house   Republican Party       True
1090  District 1      Lee Castillo  Utah   house   Democratic Party      False
1091  District 1        Adam Davis  Utah   house        Green Party      False
1092  District 1      Eric Eliason  Utah   house           Grey.png      False
1093  District 2     Chris Stewart  Utah   house         Utah Party       True
1094  District 2  Shireen Ghorbani  Utah   house   Democratic Party      False
1095  District 2   Jeffrey Whipple  Utah   house  Libertarian Party      False
1096  District 2       Jan Garbett  Utah   house           Grey.png      False
1097  District 3       John Curtis  Utah   house         Utah Party       True
1098  District 3      James Singer  Utah   house   Democratic Party      False
1099  District 3       Tim Zeidner  Utah   house           Grey.png      False
1100  District 3   Gregory Duerden  Utah   house    

In [499]:
new_df.to_csv("candidate_data.csv")

In [452]:
house_df = df.loc[df.chamber == 'house']

In [475]:
def get_general_election_html(candidate, html):
    if not pd.isnull(candidate.district):
        district_re = candidate.district + ".{1,300}General election.+?" + candidate['name'] + ".+?Primary"
    else:
        district_re = ".{1,300}General election.+?" + str(candidate['name']) + ".+?Primary"
    return re.findall(district_re, html, re.DOTALL)

In [425]:
def get_incumbency(candidate, html):
    """Takes a list of html for the candidate in the district and returns the party"""
    general_election_html = get_general_election_html(candidate, html)
    if "'" in candidate['name']:
        html_near_name_re = candidate['name'].split("'")[0] + '&#39;' + candidate['name'].split("'")[1] + ".{50}"
    else:
        html_near_name_re = candidate['name'] + ".{50}"
    html_near_name = list(map(lambda html: re.findall(html_near_name_re, html, re.DOTALL), general_election_html))
    incumbent = False
    for html_lst in html_near_name:
        for html in html_lst:
            incumbent_html = re.findall('Incumbent', html)
            if incumbent_html:
                incumbent = True
                break
    return incumbent

In [383]:
def get_parenthetical_party(candidate, lst_of_html):
    """Takes a list of html for the candidate in the district and returns the party"""
    if "'" in candidate['name']:
        html_near_name_re = candidate['name'].split("'")[0] + '&#39;' + candidate['name'].split("'")[1] + ".{50}"
    else:
        html_near_name_re = candidate['name'] + ".{50}"
    html_near_name = list(map(lambda html: re.findall(html_near_name_re, html, re.DOTALL), lst_of_html))
    party_abbrevs = {'D': 'Democratic', 'R': 'Republican', 'G': 'Green', 'I': 'Independent', 'L': 'Libertarian', 'NPP': 'No Party Preference'}
    party = None
    for html_lst in html_near_name:
        for html in html_lst:
            party_html = re.findall('\((.+?)\)', html)
            if party_html:
                for string in party_html:
                    if string != 'Incumbent':
                        if string in party_abbrevs.keys():
                            if string == 'NPP':
                                party = party_abbrevs[string]
                            else:
                                party = party_abbrevs[string] + ' Party'
                        else:
                            party = string + ' Party'
                return party

In [424]:
def get_party(candidate, html):
    general_election_html = get_general_election_html(candidate, html)
    try:
        html_near_name = re.findall(r'<a.+?>.*?<img alt=".*?{}'.format(candidate['name']), general_election_html[0])
        relevant_html = min(html_near_name, key=len)
        party = re.findall('<img alt="(.*?)".*?{}'.format(candidate['name']), relevant_html)
        return party[0]
    except:
        return get_parenthetical_party(candidate, general_election_html)

## Test cases - Arizona and California

In [491]:
az_df = house_df.loc[house_df.state == 'Arizona'].copy()

In [492]:
az_df

Unnamed: 0,district,name,state,chamber,party
0,District 1,Robert Kennedy Jr.,Alabama,house,
1,District 1,Bradley Byrne,Alabama,house,
2,District 2,Tabitha Isner,Alabama,house,
3,District 2,Martha Roby,Alabama,house,
4,District 3,Mallory Hagan,Alabama,house,
5,District 3,Mike Rogers,Alabama,house,
6,District 4,Lee Auman,Alabama,house,
7,District 4,Robert Aderholt,Alabama,house,
8,District 5,Peter Joffrion,Alabama,house,
9,District 5,Mo Brooks,Alabama,house,


In [493]:
az_html = get_html_from_url(chamber_urls['house'].format('Arizona'))

In [494]:
az_df['incumbent'] = az_df.apply((lambda candidate: get_incumbency(candidate, az_html)), axis=1).copy()

In [495]:
az_df.party = az_df.apply((lambda candidate: get_party(candidate, az_html)), axis=1).copy()

In [496]:
az_df

Unnamed: 0,district,name,state,chamber,party,incumbent
0,District 1,Robert Kennedy Jr.,Alabama,house,Democratic Party,False
1,District 1,Bradley Byrne,Alabama,house,Republican Party,True
2,District 2,Tabitha Isner,Alabama,house,Democratic Party,False
3,District 2,Martha Roby,Alabama,house,,True
4,District 3,Mallory Hagan,Alabama,house,Democratic Party,False
5,District 3,Mike Rogers,Alabama,house,Alabama Party,True
6,District 4,Lee Auman,Alabama,house,Democratic Party,False
7,District 4,Robert Aderholt,Alabama,house,,True
8,District 5,Peter Joffrion,Alabama,house,Democratic Party,False
9,District 5,Mo Brooks,Alabama,house,,True


In [274]:
ca_df = house_df.loc[house_df.state == 'California'].copy()

In [277]:
ca_df.head().apply((lambda candidate: get_party(candidate, html)), axis=1)

['District 1</a></span></h3>\n<p><big><b>General election candidates:</b></big><br />\n<a href="/Republican_Party" title="Republican Party"><img alt="Republican Party" src="https://cdn.ballotpedia.org/images/f/fd/Ends.png" width="18" height="18" data-file-width="16" data-file-height="16"></a> <a href="/Doug_LaMalfa" title="Doug LaMalfa">Doug LaMalfa</a> - Incumbent<br />\n<a href="/Democratic_Party" title="Democratic Party"><img alt="Democratic Party" src="https://cdn.ballotpedia.org/images/d/d4/Electiondot.png" width="18" height="18" data-file-width="16" data-file-height="16"></a> <a href="/Audrey_Denney" title="Audrey Denney">Audrey Denney</a>\n</p>\n<table cellspacing="0" cellpadding="0" class="multicol" style="background:transparent; width:auto;">\n<p></p>\n<tr>\n<td width="auto" align="left" valign="top">\n<p></p>\n</td>\n<td align="left" valign="top">\n<p><b><big>Primary', 'District 10</a></span></h3>\n<p>\t\t\t\t\t\t\t\t\t\t<span style="font-weight: bold">General election candid

61    Republican Party
62    Democratic Party
63    Democratic Party
64    Republican Party
65    Democratic Party
dtype: object

In [386]:
ca_df.party = ca_df.apply((lambda candidate: get_party(candidate, html)), axis=1).copy()

In [392]:
ca_df['incumbent'] = ca_df.apply((lambda candidate: get_incumbency(candidate, html)), axis=1).copy()

In [393]:
ca_df

Unnamed: 0,district,name,state,chamber,party,incumbent
61,District 1,Doug LaMalfa,California,house,Republican Party,True
62,District 1,Audrey Denney,California,house,Democratic Party,False
63,District 2,Jared Huffman,California,house,Democratic Party,True
64,District 2,Dale Mensing,California,house,Republican Party,False
65,District 3,John Garamendi,California,house,Democratic Party,True
66,District 3,Charlie Schaupp,California,house,Republican Party,False
67,District 4,Tom McClintock,California,house,Republican Party,True
68,District 4,Jessica Morse,California,house,Democratic Party,False
69,District 5,Mike Thompson,California,house,Democratic Party,True
70,District 5,Anthony Mills,California,house,Independent,False
