# Contents<a id="top"></a>

* <a href="#data">Get the data</a>

## Get the data<a id="data"></id>
[<a href="#top">back to top</a>]

In [277]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd

In [278]:
def get_xml(**kwargs):
    ''' Returns root XML tree structure using King County's API
    
        Kwargs
        ======
        Business_Name - string
        Business_Address - string
        Zip_Code - string
        Inspection_Start - string with valid date in format MM/DD/YYYY
        Inspection_End - string with valid date in format MM/DD/YYYY
        Violation_Points - string with Integer <= 999
        Violation_Red_Points - string with Integer <=999
        City - string
        
        Example
        =======
        Returns inspection data in zip code 98101 since 1/1/2016
        >>> get_data(Zip_Code='98101', Inspection_Start=1/1/2016)
        
        Notes
        =====
        The API appears to break using parameter Inspection_Closed_Business
    '''
    if kwargs == None:
        print('Oh no! You forgot to select at least one parameter')
        return
        
    else:
        url = 'http://info.kingcounty.gov/health/ehs/foodsafety/inspections/XmlRest.aspx?'
        for key in kwargs:
            s = '='.join([key, kwargs[key]])
            url = '&'.join([url, s])
    # retrieve data
    response = requests.get(url)
    root = ET.fromstring(response.content)
    # Remove the King County data disclaimer
    disclaimer_element = root.find('Disclaimer')
    root.remove(disclaimer_element)
    return root

In [543]:
# get some data that would select Toulouse Petit
root = get_xml(Zip_Code='98109', Inspection_Start='1/1/2016', Inspection_End='12/31/2016')

In [620]:
business = root[339] # toulouse petit

In [544]:
def get_attribute(node, attribute):
    if node.find(attribute) is not None:
        return node.find(attribute).text
    else:
        return None

In [633]:
for inspection in business.findall('Inspection'):
        for violation in inspection.findall('Violation'):
            print(get_attribute(business, 'Name'), 
                  get_attribute(inspection, 'Inspection_Date'), 
                  get_attribute(violation, 'Violation_Type'))

TOULOUSE PETIT KITCHEN & LOUNGE 09/27/2016 None
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 RED
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 RED
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 RED
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 RED
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 RED
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 BLUE
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 BLUE
TOULOUSE PETIT KITCHEN & LOUNGE 09/22/2016 BLUE
TOULOUSE PETIT KITCHEN & LOUNGE 06/20/2016 None
TOULOUSE PETIT KITCHEN & LOUNGE 01/26/2016 RED
TOULOUSE PETIT KITCHEN & LOUNGE 01/26/2016 BLUE
TOULOUSE PETIT KITCHEN & LOUNGE 01/26/2016 BLUE


Looks good, now try for multiple businesses

In [634]:
businesses = root[0:5]

for b in range(0, len(businesses)):
    business = businesses[b]
    if business.find('Inspection'): # not every business has inspections in the data, oddly
        for inspection in business.findall('Inspection'):
            
            if inspection.find('Violation'): # not every inspection has violations...
                for violation in inspection.findall('Violation'):
                    print(get_attribute(business, 'Name'), 
                          get_attribute(inspection, 'Inspection_Date'), 
                          get_attribute(violation, 'Violation_Type'))
            else:
                print(get_attribute(business, 'Name'), 
                      get_attribute(inspection, 'Inspection_Date'))

10 MERCER RESTAURANT 07/07/2016 None
10 MERCER RESTAURANT 02/24/2016 RED
10 MERCER RESTAURANT 02/24/2016 BLUE
100 LB CLAM 08/31/2016 None
13 COINS 09/19/2016 RED
13 COINS 09/19/2016 BLUE
13 COINS 06/01/2016 BLUE
13 COINS 05/18/2016 RED
13 COINS 05/18/2016 RED
13 COINS 05/18/2016 RED
13 COINS 05/18/2016 BLUE
13 COINS 01/12/2016 BLUE
5-SPOT 07/29/2016 RED
5-SPOT 07/29/2016 BLUE
5-SPOT 07/29/2016 BLUE
5-SPOT 07/29/2016 BLUE
7-ELEVEN STORE #14368A 02/09/2016 None


In [None]:
# Will eventually set something up like this.. 
'''
business_attributes = {'Name': name, 'Lat': lat, 'Long': long, 
                       'inspect_date': inspect_date, 
                       'vtype': vtype, 'vdesc': vdesc, 'vscore': vscore}
df = pd.DataFrame(business_attributes)
'''