# Scrape Minnesota - County Dropoff Locations

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import re

In [2]:
url = 'https://www.sos.state.mn.us/elections-voting/find-county-election-office/#'

In [3]:
res = requests.get(url)

In [4]:
res.status_code

200

In [5]:
soup = BeautifulSoup(res.text) #can do just ...(re.text)

# Analyze the format of the text

In [6]:
soup

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<umbraco:item field="secureRedirect" runat="server"></umbraco:item>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="Get contact information for county election officials around Minnesota" name="description"/>
<meta content="Find County Election Office" name="title"/>
<meta content="Secretary of State, Minnesota, SOS Home" name="keywords"/>
<meta content="System.String[]" name="robots"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta content="http://www.sos.state.mn.us/elections-voting/find-county-election-office/" property="og:url"/>
<meta content="website" property="og:type"/>
<meta content="Office of the State Of Minnesota Secretary of State" property="og:title"/>
<meta content="Office of the State Of Minnesota Secretary of State" property="og:description"/>
<title>Minnesota Secretary Of State - Find County Election Office</title>
<script src="https://ajax.aspnetcdn.co

This page is divided into two columns of data. In each column there is a list of counties, and then the data in each list is collapsed.

- The county names are h3 and tagged with a class="contentpage-h3".
- The website can be scraped via the "a", or anchor, tags.
- The details of the addresses only have tags for the section titles, e.g. "ADDRESS" or "Absentee voting contact". Othewise the detailed information is only separated with "br" tags.


# Create a DataFrame to hold the data

In [7]:
data_schema = ['county',
        'location_type',
        'address_1',
        'address_2',
        'city',
        'state',
        'zip',
        'phone', 
        'latitude', 
        'longitude', 
        'has_droppff', 
        'has_phone', 
        'county_website_url', 
        'validate_url', 
        'email', 
        'fax', 
        'social', 
        'inactive', 
        'hours']

In [8]:
data_schema

['county',
 'location_type',
 'address_1',
 'address_2',
 'city',
 'state',
 'zip',
 'phone',
 'latitude',
 'longitude',
 'has_droppff',
 'has_phone',
 'county_website_url',
 'validate_url',
 'email',
 'fax',
 'social',
 'inactive',
 'hours']

In [9]:
all_data_df = pd.DataFrame(columns=data_schema)

In [10]:
all_data_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   county              0 non-null      object
 1   location_type       0 non-null      object
 2   address_1           0 non-null      object
 3   address_2           0 non-null      object
 4   city                0 non-null      object
 5   state               0 non-null      object
 6   zip                 0 non-null      object
 7   phone               0 non-null      object
 8   latitude            0 non-null      object
 9   longitude           0 non-null      object
 10  has_droppff         0 non-null      object
 11  has_phone           0 non-null      object
 12  county_website_url  0 non-null      object
 13  validate_url        0 non-null      object
 14  email               0 non-null      object
 15  fax                 0 non-null      object
 16  social              0 non-null      object


# Scrape county names from _class="contentpage-h2"_

In [11]:
counties = soup.find_all(class_='contentpage-h2')

In [12]:
len(counties)

88

In [13]:
[county for county in counties[:3]]

[<span class="contentpage-h2"><span>Click on your county in the election official directory below to get elections contact information</span></span>,
 <h2 class="contentpage-h2"><a aria-expanded="false" data-target="#1" data-toggle="collapse" href="#">Aitkin County</a></h2>,
 <h2 class="contentpage-h2"><a aria-expanded="false" data-target="#2" data-toggle="collapse" href="#">Anoka County</a></h2>]

**Remove the first record from the scrape - it doesn't contain a county name - total number of counties is 87**

In [14]:
[county for county in counties[85:]]

[<h2 class="contentpage-h2"><a aria-expanded="false" data-target="#85" data-toggle="collapse" href="#">Winona County</a></h2>,
 <h2 class="contentpage-h2"><a aria-expanded="false" data-target="#86" data-toggle="collapse" href="#">Wright County</a></h2>,
 <h2 class="contentpage-h2"><a aria-expanded="false" data-target="#87" data-toggle="collapse" href="#">Yellow Medicine County</a></h2>]

In [15]:
counties_list = [county.text for county in counties[1:]]

In [16]:
len(counties_list)

87

In [17]:
[[num, row] for num,row in enumerate(counties_list)]

[[0, 'Aitkin County'],
 [1, 'Anoka County'],
 [2, 'Becker County'],
 [3, 'Beltrami County'],
 [4, 'Benton County'],
 [5, 'Big Stone County'],
 [6, 'Blue Earth County'],
 [7, 'Brown County'],
 [8, 'Carlton County'],
 [9, 'Carver County'],
 [10, 'Cass County'],
 [11, 'Chippewa County'],
 [12, 'Chisago County'],
 [13, 'Clay County'],
 [14, 'Clearwater County'],
 [15, 'Cook County'],
 [16, 'Cottonwood County'],
 [17, 'Crow Wing County'],
 [18, 'Dakota County'],
 [19, 'Dodge County'],
 [20, 'Douglas County'],
 [21, 'Faribault County'],
 [22, 'Fillmore County'],
 [23, 'Freeborn County'],
 [24, 'Goodhue County'],
 [25, 'Grant County'],
 [26, 'Hennepin County'],
 [27, 'Houston County'],
 [28, 'Hubbard County'],
 [29, 'Isanti County'],
 [30, 'Itasca County'],
 [31, 'Jackson County'],
 [32, 'Kanabec County'],
 [33, 'Kandiyohi County'],
 [34, 'Kittson County'],
 [35, 'Koochiching County'],
 [36, 'Lac qui Parle County'],
 [37, 'Lake County'],
 [38, 'Lake of the Woods County'],
 [39, 'Le Sueur Coun

In [18]:
all_data_df['county'] = counties_list

In [19]:
all_data_df

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
0,Aitkin County,,,,,,,,,,,,,,,,,,
1,Anoka County,,,,,,,,,,,,,,,,,,
2,Becker County,,,,,,,,,,,,,,,,,,
3,Beltrami County,,,,,,,,,,,,,,,,,,
4,Benton County,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,Watonwan County,,,,,,,,,,,,,,,,,,
83,Wilkin County,,,,,,,,,,,,,,,,,,
84,Winona County,,,,,,,,,,,,,,,,,,
85,Wright County,,,,,,,,,,,,,,,,,,


# Scrape address info from _class="collapse"_

In [20]:
divs = soup.find_all(class_='collapse')

In [21]:
len(divs)

88

In [22]:
divs[0]

<nav aria-expanded="false" aria-label="Main menu" class="navbar-collapse bs-navbar-collapse collapse mn-accordion" id="nav-site" role="navigation">
<ul class="sm item-menu nav navbar-nav" id="menu-site" role="menubar">
<li>
<div class="mn-menu-head">
<a class="item-link single-line" href="https://www.sos.state.mn.us/home" title="MN Secretary Of State Home Page"><em class="mn-icon icon-home"></em> Home<br/> </a>
</div>
</li>
<li class="mn-top-menu" role="menuitem">
<div class="mn-menu-head">
<a class="item-link" href="https://www.sos.state.mn.us/elections-voting/" title="Elections &amp; Voting"><em class="mn-icon icon-checkbox2"></em>Elections &amp; <br/>Voting</a>
<a class="btn-top-menu" href="#">
<em class="glyphicon glyphicon-chevron-down"></em><em class="glyphicon glyphicon-chevron-up"></em>
<span class="sr-only">Open Menu</span>
</a>
</div>
<div class="submenu">
<div class="submenu-group">
<ul class="submenu-links">
<li><a href="https://www.sos.state.mn.us/elections-voting/register

**Remove the first div in the scrape - it doesn't contain county info**

## Scrape websites via "a" tags

In [23]:
websites = []
for div in divs:
    website = [x for x in div.find_all('a')]
    websites.append(website)

In [24]:
len(websites)

88

In [25]:
websites[1:]

[[<a href="http://www.co.aitkin.mn.us/">http://www.co.aitkin.mn.us/</a>],
 [<a href="http://www.anokacounty.us">http://www.anokacounty.us</a>],
 [<a href="http://www.co.becker.mn.us/">http://www.co.becker.mn.us/</a>],
 [<a href="http://www.co.beltrami.mn.us/">http://www.co.beltrami.mn.us/</a>],
 [<a href="http://www.co.benton.mn.us/">http://www.co.benton.mn.us/</a>],
 [<a href="http://www.bigstonecounty.org/">http://www.bigstonecounty.org/</a>],
 [<a href="http://www.blueearthcountymn.gov/index.aspx?nid=326">http://www.blueearthcountymn.gov/index.aspx?nid=326</a>],
 [<a href="http://www.co.brown.mn.us">http://www.co.brown.mn.us</a>],
 [<a href="http://www.co.carlton.mn.us/">http://www.co.carlton.mn.us/</a>],
 [<a href="http://www.co.carver.mn.us/">http://www.co.carver.mn.us/</a>],
 [<a href="http://www.co.cass.mn.us/">http://www.co.cass.mn.us/</a>],
 [<a href="http://www.co.chippewa.mn.us">http://www.co.chippewa.mn.us</a>],
 [<a href="http://www.co.chisago.mn.us/">http://www.co.chisago

In [26]:
sites=[]
for site in websites[1:]:
    sites.append(site[0].text)

In [27]:
[[num,row] for num, row in enumerate(sites)]

[[0, 'http://www.co.aitkin.mn.us/'],
 [1, 'http://www.anokacounty.us'],
 [2, 'http://www.co.becker.mn.us/'],
 [3, 'http://www.co.beltrami.mn.us/'],
 [4, 'http://www.co.benton.mn.us/'],
 [5, 'http://www.bigstonecounty.org/'],
 [6, 'http://www.blueearthcountymn.gov/index.aspx?nid=326'],
 [7, 'http://www.co.brown.mn.us'],
 [8, 'http://www.co.carlton.mn.us/'],
 [9, 'http://www.co.carver.mn.us/'],
 [10, 'http://www.co.cass.mn.us/'],
 [11, 'http://www.co.chippewa.mn.us'],
 [12, 'http://www.co.chisago.mn.us/'],
 [13, 'http://www.claycountymn.gov'],
 [14, 'http://www.co.clearwater.mn.us/'],
 [15, 'http://www.co.cook.mn.us/'],
 [16, 'http://www.co.cottonwood.mn.us/'],
 [17, 'http://crowwing.us/index.aspx?nid=110'],
 [18, 'http://www.dakotacounty.us'],
 [19, 'http://www.co.dodge.mn.us/'],
 [20, 'http://www.co.douglas.mn.us/'],
 [21, 'http://www.co.faribault.mn.us/'],
 [22, 'http://www.co.fillmore.mn.us'],
 [23, 'http://www.co.freeborn.mn.us/'],
 [24, 'http://www.co.goodhue.mn.us'],
 [25, 'http:/

In [28]:
len(sites)

87

In [29]:
all_data_df['county_website_url'] = sites

In [30]:
all_data_df

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
0,Aitkin County,,,,,,,,,,,,http://www.co.aitkin.mn.us/,,,,,,
1,Anoka County,,,,,,,,,,,,http://www.anokacounty.us,,,,,,
2,Becker County,,,,,,,,,,,,http://www.co.becker.mn.us/,,,,,,
3,Beltrami County,,,,,,,,,,,,http://www.co.beltrami.mn.us/,,,,,,
4,Benton County,,,,,,,,,,,,http://www.co.benton.mn.us/,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,Watonwan County,,,,,,,,,,,,http://www.co.watonwan.mn.us/,,,,,,
83,Wilkin County,,,,,,,,,,,,http://www.co.wilkin.mn.us/,,,,,,
84,Winona County,,,,,,,,,,,,http://www.co.winona.mn.us/,,,,,,
85,Wright County,,,,,,,,,,,,http://www.co.wright.mn.us/,,,,,,


# Scrape 'br' tags for email, phone, city

In [31]:
divs[1]

<div class="collapse" id="1">
<div>Election official: Kirk Peysar <br/>General phone: 218-927-7354 <br/>Fax: 218-927-7324 <br/>Website: <a href="http://www.co.aitkin.mn.us/">http://www.co.aitkin.mn.us/</a>
<h3 class="contentpage-h3">Address</h3>
Aitkin County Auditor <br/>307 2nd St NW Rm 121 <br/>Aitkin, MN 56431
<h3 class="contentpage-h3">Absentee voting contact</h3>
Sally Huhta <br/>Phone: 218-927-7354 <br/>Fax: 218-927-7324 <br/>Email: sally.huhta@co.aitkin.mn.us
<h3 class="contentpage-h3">Military and overseas absentee voting contact</h3>
Sally Huhta <br/>Phone: 218-927-7354 <br/>Fax: 218-927-7324 <br/>Email: sally.huhta@co.aitkin.mn.us<hr/></div>
</div>

In [32]:
for row in divs[1].find_all(['h3']):
    print(row.text)
    print(row.nextSibling)

Address

Aitkin County Auditor 
Absentee voting contact

Sally Huhta 
Military and overseas absentee voting contact

Sally Huhta 


In [33]:
for row in divs[1].find_all(['h3','br']):
    print(row.text)
    print(row.nextSibling)


General phone: 218-927-7354 

Fax: 218-927-7324 

Website: 
Address

Aitkin County Auditor 

307 2nd St NW Rm 121 

Aitkin, MN 56431

Absentee voting contact

Sally Huhta 

Phone: 218-927-7354 

Fax: 218-927-7324 

Email: sally.huhta@co.aitkin.mn.us

Military and overseas absentee voting contact

Sally Huhta 

Phone: 218-927-7354 

Fax: 218-927-7324 

Email: sally.huhta@co.aitkin.mn.us


In [34]:
for row in divs[1].find_all(['br']):
    print(row.text)
    print(row.nextSibling)


General phone: 218-927-7354 

Fax: 218-927-7324 

Website: 

307 2nd St NW Rm 121 

Aitkin, MN 56431


Phone: 218-927-7354 

Fax: 218-927-7324 

Email: sally.huhta@co.aitkin.mn.us


Phone: 218-927-7354 

Fax: 218-927-7324 

Email: sally.huhta@co.aitkin.mn.us


In [35]:
all_info = []
for div in divs[1:]:
    data = [x.text or x.nextSibling for x in div.find_all(['h3','br'])]    
    all_info.append(data)

In [36]:
[[num,row] for num,row in enumerate(all_info)]

[[0,
  ['General phone: 218-927-7354 ',
   'Fax: 218-927-7324 ',
   'Website: ',
   'Address',
   '307 2nd St NW Rm 121 ',
   'Aitkin, MN 56431\n',
   'Absentee voting contact',
   'Phone: 218-927-7354 ',
   'Fax: 218-927-7324 ',
   'Email: sally.huhta@co.aitkin.mn.us\n',
   'Military and overseas absentee voting contact',
   'Phone: 218-927-7354 ',
   'Fax: 218-927-7324 ',
   'Email: sally.huhta@co.aitkin.mn.us']],
 [1,
  ['General phone: 763-324-1300 ',
   'Fax: 763-324-1160 ',
   'Website: ',
   'Email: elections@co.anoka.mn.us\n',
   'Address',
   '2100 3rd Ave, Suite W130 ',
   'Anoka, MN 55303-5031\n',
   'Absentee voting contact',
   'Phone: 763-324-1300',
   'Fax: 763-324-1160',
   'Email: elections@co.anoka.mn.us\n',
   'Military and overseas absentee voting contact',
   'Phone: 763-324-1300',
   'Fax: 763-324-1160',
   'Email: elections@co.anoka.mn.us']],
 [2,
  ['General phone: 218-846-7311 ',
   'Fax: 218-846-7257 ',
   'Website: ',
   'Email: auditor@co.becker.mn.us\n',
  

In [37]:
len(all_info)

87

**Need to code around these "hr" tags that randomly appear**

In [38]:
hr_tag = all_info[10][18]

In [39]:
hr_tag

<hr/>

**It also appears that row number of the physical address varies, so need to code for all cases**

In [40]:
for div,info in enumerate(all_info):
    for num,row in enumerate(info):
        if row == hr_tag:
            pass
        elif re.search('Address', row) or re.search('Street address', row):
            print(div, num, row)

0 3 Address
1 4 Address
2 4 Address
3 3 Address
4 4 Street address
5 4 Address
6 4 Street address
7 3 Street address
8 3 Street address
9 4 Address
10 4 Street address
11 3 Address
12 4 Address
13 4 Address
14 4 Address
15 4 Address
16 3 Address
17 4 Address
18 4 Address
19 4 Address
20 4 Address
21 4 Street address
22 4 Street address
23 3 Address
24 4 Address
25 3 Address
26 4 Address
27 4 Address
28 4 Address
29 4 Address
30 4 Address
31 3 Address
32 3 Address
33 3 Address
34 3 Address
35 3 Address
36 4 Address
37 4 Address
38 4 Address
39 3 Address
40 4 Address
41 4 Address
42 3 Street address
43 3 Address
44 4 Address
45 4 Address
46 4 Address
47 3 Address
48 4 Address
49 3 Address
50 3 Street address
51 4 Address
52 4 Street address
53 4 Street address
54 4 Mailing Address
55 4 Address
56 4 Street address
57 4 Address
58 3 Address
59 3 Address
60 3 Address
61 4 Street address
62 4 Street address
63 3 Street address
64 4 Address
65 4 Address
66 3 Street address
67 4 Address
68 4 A

In [41]:
all_data = []
for info in all_info:
    for row in info:
        if row == hr_tag:
            pass
        elif re.findall('General phone:', row):
            phone = row.split('General phone:')[1].strip()
        elif re.findall('Fax:', row):
            fax = row.split('Fax: ')[1].strip()
        elif re.findall('Email: ', row):
            email = row.split('Email:')[1].strip()
        else:
            pass
        
# re.search('Address', row) or re.search('Street address', row)        

    if re.findall(', MN ', info[5]):
        address = info[4].strip()
        city = info[5].split(', MN')[0].strip()
        zip_code = info[5].split(', MN ')[1].strip()
    else:
        address = info[5].strip()
        city = info[6].split(', MN')[0].strip()
        zip_code = info[6].split(', MN ')[1].strip()
    
    data= {
        'address_1': address,
        'city': city,
        'phone': phone, 
        'email': email, 
        'fax': fax,
        'zip_code': zip_code
    }

    all_data.append(data)

In [42]:
[[num,row] for num,row in enumerate(all_data)]

[[0,
  {'address_1': '307 2nd St NW Rm 121',
   'city': 'Aitkin',
   'phone': '218-927-7354',
   'email': 'sally.huhta@co.aitkin.mn.us',
   'fax': '218-927-7324',
   'zip_code': '56431'}],
 [1,
  {'address_1': '2100 3rd Ave, Suite W130',
   'city': 'Anoka',
   'phone': '763-324-1300',
   'email': 'elections@co.anoka.mn.us',
   'fax': '763-324-1160',
   'zip_code': '55303-5031'}],
 [2,
  {'address_1': '915 Lake Ave',
   'city': 'Detroit Lakes',
   'phone': '218-846-7311',
   'email': 'elections@co.becker.mn.us',
   'fax': '218-846-7257',
   'zip_code': '56501'}],
 [3,
  {'address_1': '701 Minnesota Ave NW Ste 220',
   'city': 'Bemidji',
   'phone': '218-333-4175',
   'email': 'danielle.m.johnson@co.beltrami.mn.us',
   'fax': '218-333-4246',
   'zip_code': '56601-3178'}],
 [4,
  {'address_1': '531 Dewey St',
   'city': 'Foley',
   'phone': '320-968-5027',
   'email': 'uocavaelections@co.benton.mn.us',
   'fax': '320-968-5337',
   'zip_code': '56329-0129'}],
 [5,
  {'address_1': '20 2nd S

**The street address for Winona is not being picked up because of a div unique to it's format. I'll pick this up in the final DataFrame**

In [43]:
all_info[84]

['General phone: 507-457-8830 ',
 'Fax: 507-454-9368 ',
 'Website: ',
 'Email: elections@co.winona.mn.us\n',
 'Address',
 'Winona, MN 55987\n',
 'Absentee voting contact',
 'Phone: 507-457-8830 ',
 'Fax: 507-454-9368 ',
 'Email: elections@co.winona.mn.us\n',
 'Military and overseas absentee voting contact',
 'Phone: 507-457-8830 ',
 'Fax: 507-454-9368 ',
 'Email: elections@co.winona.mn.us']

In [44]:
divs[85]

<div class="collapse" id="85">
<div>Election official: Sandra J Suchla <br/>General phone: 507-457-8830 <br/>Fax: 507-454-9368 <br/>Website: <a href="http://www.co.winona.mn.us/">http://www.co.winona.mn.us/</a> <br/>Email: elections@co.winona.mn.us
<h3 class="contentpage-h3">Address</h3>
Winona County Auditor-Treasurer</div>
<div>202 W Third St<br/>Winona, MN 55987
<h3 class="contentpage-h3">Absentee voting contact</h3>
Sandra J Suchla <br/>Phone: 507-457-8830 <br/>Fax: 507-454-9368 <br/>Email: elections@co.winona.mn.us
<h3 class="contentpage-h3">Military and overseas absentee voting contact</h3>
Sandra J Suchla <br/>Phone: 507-457-8830 <br/>Fax: 507-454-9368 <br/>Email: elections@co.winona.mn.us<hr/></div>
</div>

In [45]:
all_add_df = pd.DataFrame(all_data)

In [46]:
all_add_df

Unnamed: 0,address_1,city,phone,email,fax,zip_code
0,307 2nd St NW Rm 121,Aitkin,218-927-7354,sally.huhta@co.aitkin.mn.us,218-927-7324,56431
1,"2100 3rd Ave, Suite W130",Anoka,763-324-1300,elections@co.anoka.mn.us,763-324-1160,55303-5031
2,915 Lake Ave,Detroit Lakes,218-846-7311,elections@co.becker.mn.us,218-846-7257,56501
3,701 Minnesota Ave NW Ste 220,Bemidji,218-333-4175,danielle.m.johnson@co.beltrami.mn.us,218-333-4246,56601-3178
4,531 Dewey St,Foley,320-968-5027,uocavaelections@co.benton.mn.us,320-968-5337,56329-0129
...,...,...,...,...,...,...
82,710 2nd Ave S,Saint James,507-375-1210,kelly.pauling@co.watonwan.mn.us,507-375-3547,56081
83,300 S 5th St,Breckenridge,218-643-7165,bconzemius@co.wilkin.mn.us,218-643-7169,56520
84,Address,Winona,507-457-8830,elections@co.winona.mn.us,507-454-9368,55987
85,10 2nd St NW Rm 230,Buffalo,763-682-7579,elections@co.wright.mn.us,763-682-7873,55313-1195


In [47]:
all_data_df['address_1'] = all_add_df['address_1']
all_data_df['city'] = all_add_df['city']
all_data_df['phone'] = all_add_df['phone']
all_data_df['email'] = all_add_df['email']
all_data_df['fax'] = all_add_df['fax']
all_data_df['zip'] = all_add_df['zip_code']
all_data_df['state'] = 'MN'
all_data_df['hours'] = 'The last Saturday before Election Day (10 a.m. — 3 p.m.); The day before Election Day until 5 p.m. (https://www.sos.state.mn.us/elections-voting/other-ways-to-vote/vote-early-in-person/)'

In [48]:
all_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87 entries, 0 to 86
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   county              87 non-null     object
 1   location_type       0 non-null      object
 2   address_1           87 non-null     object
 3   address_2           0 non-null      object
 4   city                87 non-null     object
 5   state               87 non-null     object
 6   zip                 87 non-null     object
 7   phone               87 non-null     object
 8   latitude            0 non-null      object
 9   longitude           0 non-null      object
 10  has_droppff         0 non-null      object
 11  has_phone           0 non-null      object
 12  county_website_url  87 non-null     object
 13  validate_url        0 non-null      object
 14  email               87 non-null     object
 15  fax                 87 non-null     object
 16  social              0 non-nu

In [49]:
all_data_df

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
0,Aitkin County,,307 2nd St NW Rm 121,,Aitkin,MN,56431,218-927-7354,,,,,http://www.co.aitkin.mn.us/,,sally.huhta@co.aitkin.mn.us,218-927-7324,,,The last Saturday before Election Day (10 a.m....
1,Anoka County,,"2100 3rd Ave, Suite W130",,Anoka,MN,55303-5031,763-324-1300,,,,,http://www.anokacounty.us,,elections@co.anoka.mn.us,763-324-1160,,,The last Saturday before Election Day (10 a.m....
2,Becker County,,915 Lake Ave,,Detroit Lakes,MN,56501,218-846-7311,,,,,http://www.co.becker.mn.us/,,elections@co.becker.mn.us,218-846-7257,,,The last Saturday before Election Day (10 a.m....
3,Beltrami County,,701 Minnesota Ave NW Ste 220,,Bemidji,MN,56601-3178,218-333-4175,,,,,http://www.co.beltrami.mn.us/,,danielle.m.johnson@co.beltrami.mn.us,218-333-4246,,,The last Saturday before Election Day (10 a.m....
4,Benton County,,531 Dewey St,,Foley,MN,56329-0129,320-968-5027,,,,,http://www.co.benton.mn.us/,,uocavaelections@co.benton.mn.us,320-968-5337,,,The last Saturday before Election Day (10 a.m....
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,Watonwan County,,710 2nd Ave S,,Saint James,MN,56081,507-375-1210,,,,,http://www.co.watonwan.mn.us/,,kelly.pauling@co.watonwan.mn.us,507-375-3547,,,The last Saturday before Election Day (10 a.m....
83,Wilkin County,,300 S 5th St,,Breckenridge,MN,56520,218-643-7165,,,,,http://www.co.wilkin.mn.us/,,bconzemius@co.wilkin.mn.us,218-643-7169,,,The last Saturday before Election Day (10 a.m....
84,Winona County,,Address,,Winona,MN,55987,507-457-8830,,,,,http://www.co.winona.mn.us/,,elections@co.winona.mn.us,507-454-9368,,,The last Saturday before Election Day (10 a.m....
85,Wright County,,10 2nd St NW Rm 230,,Buffalo,MN,55313-1195,763-682-7579,,,,,http://www.co.wright.mn.us/,,elections@co.wright.mn.us,763-682-7873,,,The last Saturday before Election Day (10 a.m....


# Scrape location_type
**Try using splitlines() to get the location_types**

In [50]:
divs[81].text.splitlines() #Let's take a look at one, random record

['',
 'Election official: Tamara J. Spooner General phone: 507-835-0610 Fax: 507-835-0633 Website: http://www.co.waseca.mn.us/',
 'Street address',
 'Waseca County Auditor 307 N State St Waseca, MN 56093',
 'Mailing address',
 'Waseca County Auditor PO Box 47 Waseca, MN 56093',
 'Absentee voting contact',
 'Mary Jo StanglerPhone: 507-835-0622 Fax: 507-835-0633 Email: maryjo.stangler@co.waseca.mn.us',
 'Military and overseas absentee voting contact',
 'Mary Jo StanglerPhone: 507-835-0622 Fax: 507-835-0633 Email: maryjo.stangler@co.waseca.mn.us']

## Examine every address line to determine consistency of formating

In [51]:
for num,row in enumerate(divs):
    print(num, row.text.splitlines()[3])

0 
1 Aitkin County Auditor 307 2nd St NW Rm 121 Aitkin, MN 56431
2 Anoka County Elections & Voter Registration2100 3rd Ave, Suite W130 Anoka, MN 55303-5031
3 Becker County Courthouse 915 Lake Ave Detroit Lakes, MN 56501
4 Beltrami County Auditor 701 Minnesota Ave NW Ste 220 Bemidji, MN 56601-3178
5 Benton County Auditor-Treasurer's Office 531 Dewey St Foley, MN 56329-0129
6 Big Stone County Auditor 20 2nd St SE Ste 103 Ortonville, MN 56278
7 Blue Earth County Historic Courthouse 204 S 5th St Mankato, MN 56001
8 Brown County Courthouse 14 South State St New Ulm, MN 56073
9 Carlton County Auditor 301 Walnut Ave Carlton, MN 55718
10 Government Center - Administration Bldg 600 E Fourth St Chaska, MN 55318-2102
11 Cass County Auditor-Treasurer's Office 303 Minnesota Ave W Walker, MN 56484
12 Chippewa County Courthouse 629 N 11th St Montevideo, MN 56265
13 Chisago County Gov Center 313 N Main St Rm 271 Center City, MN 55012-7656
14 Clay County Courthouse 807 N 11th St Moorhead, MN 56560
15 C

**Looks like the address lines doesn't always occur on the same line number so we'll need to handle the cases**

In [52]:
for num,row in enumerate(divs):
    if row.text.splitlines()[3]=='Absentee voting contact':
        print(num, row.text.splitlines()[2])
    elif row.text.splitlines()[3]=='Mailing:':
        print(num, row.text.splitlines()[4])
    else:
        print(num, row.text.splitlines()[3])

0 
1 Aitkin County Auditor 307 2nd St NW Rm 121 Aitkin, MN 56431
2 Anoka County Elections & Voter Registration2100 3rd Ave, Suite W130 Anoka, MN 55303-5031
3 Becker County Courthouse 915 Lake Ave Detroit Lakes, MN 56501
4 Beltrami County Auditor 701 Minnesota Ave NW Ste 220 Bemidji, MN 56601-3178
5 Benton County Auditor-Treasurer's Office 531 Dewey St Foley, MN 56329-0129
6 Big Stone County Auditor 20 2nd St SE Ste 103 Ortonville, MN 56278
7 Blue Earth County Historic Courthouse 204 S 5th St Mankato, MN 56001
8 Brown County Courthouse 14 South State St New Ulm, MN 56073
9 Carlton County Auditor 301 Walnut Ave Carlton, MN 55718
10 Government Center - Administration Bldg 600 E Fourth St Chaska, MN 55318-2102
11 Cass County Auditor-Treasurer's Office 303 Minnesota Ave W Walker, MN 56484
12 Chippewa County Courthouse 629 N 11th St Montevideo, MN 56265
13 Chisago County Gov Center 313 N Main St Rm 271 Center City, MN 55012-7656
14 Clay County Courthouse 807 N 11th St Moorhead, MN 56560
15 C

## Parse location_type in address lines

In [53]:
all_loc_types = []
for loc in divs[1:]:
    if loc.text.splitlines()[3]=='Absentee voting contact':
            location = re.split('\d', loc.text.splitlines()[2], 1)[0].strip()

    elif loc.text.splitlines()[3]=='Mailing:':
            location = re.split('\d', loc.text.splitlines()[4], 1)[0].strip()

    elif loc.text.splitlines()[3]=='Stearns County Service Center':
            location = loc.text.splitlines()[3].strip()

    elif loc.text.splitlines()[3]=='Winona County Auditor-Treasurer':
            location = loc.text.splitlines()[3].strip()

    else:
            location = re.split('\d', loc.text.splitlines()[3], 1)[0].strip()

    data = {
            'location_type': location
        }
        
    all_loc_types.append(data)

In [54]:
len(all_loc_types)

87

In [55]:
all_loc_types

[{'location_type': 'Aitkin County Auditor'},
 {'location_type': 'Anoka County Elections & Voter Registration'},
 {'location_type': 'Becker County Courthouse'},
 {'location_type': 'Beltrami County Auditor'},
 {'location_type': "Benton County Auditor-Treasurer's Office"},
 {'location_type': 'Big Stone County Auditor'},
 {'location_type': 'Blue Earth County Historic Courthouse'},
 {'location_type': 'Brown County Courthouse'},
 {'location_type': 'Carlton County Auditor'},
 {'location_type': 'Government Center - Administration Bldg'},
 {'location_type': "Cass County Auditor-Treasurer's Office"},
 {'location_type': 'Chippewa County Courthouse'},
 {'location_type': 'Chisago County Gov Center'},
 {'location_type': 'Clay County Courthouse'},
 {'location_type': 'Clearwater County Auditor-Treasurer'},
 {'location_type': 'Cook County Courthouse'},
 {'location_type': 'Cottonwood County Courthouse'},
 {'location_type': 'Crow Wing County Elections'},
 {'location_type': 'Dakota County Elections'},
 {'

In [56]:
loc_type_df = pd.DataFrame(all_loc_types)

In [57]:
all_data_df['location_type'] = loc_type_df['location_type']

In [58]:
all_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87 entries, 0 to 86
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   county              87 non-null     object
 1   location_type       87 non-null     object
 2   address_1           87 non-null     object
 3   address_2           0 non-null      object
 4   city                87 non-null     object
 5   state               87 non-null     object
 6   zip                 87 non-null     object
 7   phone               87 non-null     object
 8   latitude            0 non-null      object
 9   longitude           0 non-null      object
 10  has_droppff         0 non-null      object
 11  has_phone           0 non-null      object
 12  county_website_url  87 non-null     object
 13  validate_url        0 non-null      object
 14  email               87 non-null     object
 15  fax                 87 non-null     object
 16  social              0 non-nu

In [59]:
all_data_df

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
0,Aitkin County,Aitkin County Auditor,307 2nd St NW Rm 121,,Aitkin,MN,56431,218-927-7354,,,,,http://www.co.aitkin.mn.us/,,sally.huhta@co.aitkin.mn.us,218-927-7324,,,The last Saturday before Election Day (10 a.m....
1,Anoka County,Anoka County Elections & Voter Registration,"2100 3rd Ave, Suite W130",,Anoka,MN,55303-5031,763-324-1300,,,,,http://www.anokacounty.us,,elections@co.anoka.mn.us,763-324-1160,,,The last Saturday before Election Day (10 a.m....
2,Becker County,Becker County Courthouse,915 Lake Ave,,Detroit Lakes,MN,56501,218-846-7311,,,,,http://www.co.becker.mn.us/,,elections@co.becker.mn.us,218-846-7257,,,The last Saturday before Election Day (10 a.m....
3,Beltrami County,Beltrami County Auditor,701 Minnesota Ave NW Ste 220,,Bemidji,MN,56601-3178,218-333-4175,,,,,http://www.co.beltrami.mn.us/,,danielle.m.johnson@co.beltrami.mn.us,218-333-4246,,,The last Saturday before Election Day (10 a.m....
4,Benton County,Benton County Auditor-Treasurer's Office,531 Dewey St,,Foley,MN,56329-0129,320-968-5027,,,,,http://www.co.benton.mn.us/,,uocavaelections@co.benton.mn.us,320-968-5337,,,The last Saturday before Election Day (10 a.m....
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,Watonwan County,Watonwan County Courthouse,710 2nd Ave S,,Saint James,MN,56081,507-375-1210,,,,,http://www.co.watonwan.mn.us/,,kelly.pauling@co.watonwan.mn.us,507-375-3547,,,The last Saturday before Election Day (10 a.m....
83,Wilkin County,Wilkin County Courthouse,300 S 5th St,,Breckenridge,MN,56520,218-643-7165,,,,,http://www.co.wilkin.mn.us/,,bconzemius@co.wilkin.mn.us,218-643-7169,,,The last Saturday before Election Day (10 a.m....
84,Winona County,Winona County Auditor-Treasurer,Address,,Winona,MN,55987,507-457-8830,,,,,http://www.co.winona.mn.us/,,elections@co.winona.mn.us,507-454-9368,,,The last Saturday before Election Day (10 a.m....
85,Wright County,Wright County Government Center,10 2nd St NW Rm 230,,Buffalo,MN,55313-1195,763-682-7579,,,,,http://www.co.wright.mn.us/,,elections@co.wright.mn.us,763-682-7873,,,The last Saturday before Election Day (10 a.m....


# Correct Winona  and spot check a few counties

In [60]:
winona_index = all_data_df[all_data_df['county']=='Winona County'].index.item()

In [61]:
all_data_df.loc[winona_index, 'address_1'] = '202 W Third St'

In [62]:
all_data_df[all_data_df['county']=='Winona County']

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
84,Winona County,Winona County Auditor-Treasurer,202 W Third St,,Winona,MN,55987,507-457-8830,,,,,http://www.co.winona.mn.us/,,elections@co.winona.mn.us,507-454-9368,,,The last Saturday before Election Day (10 a.m....


In [63]:
all_data_df[all_data_df['county']=='Carver County']

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
9,Carver County,Government Center - Administration Bldg,600 E Fourth St,,Chaska,MN,55318-2102,952-361-1981,,,,,http://www.co.carver.mn.us/,,elections@co.carver.mn.us,952-361-1919,,,The last Saturday before Election Day (10 a.m....


In [64]:
all_data_df[all_data_df['county']=='Yellow Medicine County']

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
86,Yellow Medicine County,YMC Government Center,180 8th Ave,,Granite Falls,MN,56241-1508,320-564-3132,,,,,http://www.co.ym.mn.gov,,amyr@co.ym.mn.gov,320-564-0927,,,The last Saturday before Election Day (10 a.m....


In [65]:
all_data_df[all_data_df['county']=='Lake of the Woods County']

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
38,Lake of the Woods County,Lorene Hanson,206 8th Ave SE Ste 260,,Baudette,MN,56623,218-634-2836,,,,,http://www.co.lake-of-the-woods.mn.us/,,lorene_h@co.lake-of-the-woods.mn.us,218-634-2509,,,The last Saturday before Election Day (10 a.m....


In [66]:
all_data_df[all_data_df['county']=='Marshall County']

Unnamed: 0,county,location_type,address_1,address_2,city,state,zip,phone,latitude,longitude,has_droppff,has_phone,county_website_url,validate_url,email,fax,social,inactive,hours
43,Marshall County,Leanne Novacek,"208 E Colvin Ave, Ste 11",,Warren,MN,56762,218-745-4851,,,,,http://www.co.marshall.mn.us/,,leanne.novacek@co.marshall.mn.us,218-745-5089,,,The last Saturday before Election Day (10 a.m....


# Write DataFrame to csv

In [67]:
all_data_df.to_csv('Minnesota.csv', index=False)