In [1]:
# Run this cell!
import requests
import re
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [50]:
base_url = 'https://kpx-service-bus.kp.org/service/hp/mhpo/healthplanproviderv1rc/'
location = 'Location?'
practitionerRole = 'PractitionerRole?'
organization = 'Organization?'
search_param = 'location.address-city=Manhattan Beach'
full_query = base_url + practitionerRole + search_param

In [3]:
request = requests.get(full_query)
display(request)
json_object = request.json()
total = json_object.get('total')
print(f'Total Entries: {total}')
print(f'Entries per Page: {len(json_object.get("entry"))}')

# Get next url
next_dict = json_object.get('link')[1]
next_url = next_dict.get('url')

<Response [200]>

Total Entries: 362
Entries per Page: 50


In [55]:
requests.get(full_query).json().get('link')

[{'relation': 'self',
  'url': 'https://kpx-service-bus.kp.org/service/hp/mhpo/healthplanproviderv1rc/PractitionerRole?location.address-city=Manhattan%20Beach'}]

In [49]:
base_url = 'https://kpx-service-bus.kp.org/service/hp/mhpo/healthplanproviderv1rc/'
practitionerRole = 'PractitionerRole?'
search_param = 'location.address-city=Berkeley'
full_query = base_url + practitionerRole + search_param
request = requests.get(full_query)
json_object = request.json()
total = json_object.get('total')
entries_per_page = len(json_object.get('entry'))
print(f'Total Entries: {total}')
print(f'Entries per Page: {entries_per_page}')

provider_list = json_object.get('entry')
healthcareService_refs = [provider.get('resource').get('healthcareService')[0].get('reference') for provider in provider_list]
healthcareServiceUrls = [base_url + ref for ref in healthcareService_refs]
location_refs = [provider.get('resource').get('location')[0].get('reference') for provider in provider_list]
location_urls = [base_url + ref for ref in location_refs]

specialties = get_specialties(healthcareServiceUrls)
names, addresses = get_names_and_addresses(location_urls)
coordinates = get_coordinates(addresses)

kaiser = [[names[i]] + [addresses[i]] + coordinates[i] + specialties[i]
          if addresses [i] is not None
          else [None, None, None, None, None, None]
          for i in range(len(coordinates))]

kaiser_providers = pd.DataFrame(data=kaiser, columns=['Name', 'Address', 'Latitude', 'Longitude',
                                                      'Provider Taxonomy Code', 'Specialty'])

next_dict = json_object.get('link')[1]
next_url = next_dict.get('url')
while True:
    next_json_object = requests.get(next_url).json()
    provider_list = next_json_object.get('entry')
    try:
        healthcareService_refs = [provider.get('resource').get('healthcareService')[0].get('reference') for provider in provider_list]
    except 
    healthcareServiceUrls = [base_url + ref for ref in healthcareService_refs]
    location_refs = [provider.get('resource').get('location')[0].get('reference') for provider in provider_list]
    location_urls = [base_url + ref for ref in location_refs]

    specialties = get_specialties(healthcareServiceUrls)
    names, addresses = get_names_and_addresses(location_urls)
    coordinates = get_coordinates(addresses)

    kaiser = [[names[i]] + [addresses[i]] + coordinates[i] + specialties[i]
              if addresses [i] is not None
              else [None, None, None, None, None, None]
              for i in range(len(coordinates))]
    kaiser_df = pd.DataFrame(data=kaiser, columns=['Name', 'Address', 'Latitude', 'Longitude',
                                                      'Provider Taxonomy Code', 'Specialty'])
    
    kaiser_providers = pd.concat([kaiser_providers, kaiser_df])
    
    try:
        next_dict = next_json_object.get('link')[1]
        next_url = next_dict.get('url')
    except TypeError:
        break
    
kaiser_providers    

Total Entries: 362
Entries per Page: 50
https://nominatim.openstreetmap.org/search/919%20The%20Alameda%20Rm%201%20Berkeley%20CA%2094707?format=json
https://nominatim.openstreetmap.org/search/3021%20Telegraph%20Ave%20Ste%20A%20Berkeley%20CA%2094705?format=json
https://nominatim.openstreetmap.org/search/1428%20Bonita%20Ave%20Ste%20B%20Berkeley%20CA%2094709?format=json
https://nominatim.openstreetmap.org/search/4701%20Stoddard%20Rd%20Modesto%20CA%2095356?format=json
https://nominatim.openstreetmap.org/search/3120%20Telegraph%20Ave%20Ste%2011B%20Berkeley%20CA%2094705?format=json


TypeError: 'NoneType' object is not iterable

In [44]:
json_object.keys()

dict_keys(['resourceType', 'id', 'meta', 'type', 'total', 'link', 'entry'])

In [4]:
test_provider_list = json_object.get('entry')

In [5]:
test_provider_refs = [provider.get('resource').get('healthcareService')[0].get('reference') for provider in test_provider_list]

In [6]:
urls = [base_url + ref for ref in test_provider_refs]

In [None]:
#specialties = [[requests.get(urls[i]).json().get('specialty')[0].get('coding')[0].get('code'),
#                requests.get(urls[i]).json().get('specialty')[0].get('coding')[0].get('display')
#               ] if i != 34 else (None, None) for i in range(len(urls))]
def get_names(provider_list):
    '''
    Function that extracts the name of each provider from provider_list
    -----
    Input:
    
    provider_list (list) - List of json objects each corresponding to a single provider
    -----
    Output:
    
    names (list) - List of strings containing the names of each provider in
                   provider_list
    '''
    names = []
    for provider in provider_list:
        name = provider.get('name')
        names.append(name)
    return names

def get_specialties(healthcareServiceUrls):
    '''
    Function that takes a HealthcareService API call and extracts the provider taxonomy code
    and the corresponding plain English specialty
    -----
    Input:
    
    healthcareServiceUrls (list) - List of strings corresponding to HealthcareService resource API call
    -----
    Output:
    
    specialties (list) - 2D list of provider taxonomy codes and corresponding plain English
                         specialty
                         
            [[taxonomy_code, specialty],
             [taxonomy_code, specialty],
                   ...          ...
             [taxonomy_code, specialty]]
    '''
    specialties = []
    for url in healthcareServiceUrls:
        request = requests.get(url)
        json_object = request.json()
        specialty_object = json_object.get('specialty')
        try:
            specialty_dict = specialty_object[0].get('coding')[0]
            code = specialty_dict.get('code')
            display_name = specialty_dict.get('display')
            specialty = [code, display_name]
        except TypeError:
            specialty = [None, None]
        specialties.append(specialty)
    return specialties

def get_names_and_addresses(location_urls):
    '''
    Function that extracts the street addresses from Location API calls
    -----
    Input:
    
    location_urls (list) - List of strings corresponding to Location resource API calls
    -----
    Outputs:
    
    names (list) - List of provider names as strings
    
    addresses (list) - List of street addresses as strings
    '''
    names = []
    addresses = []
    for url in location_urls:
        request = requests.get(url)
        json_object = request.json()
        name = json_object.get('name')
        address_object = json_object.get('address')
        street_address = address_object.get('text')
        names.append(name)
        addresses.append(street_address)
    return names, addresses

def clean_address(address):
    '''
    Helper function that cleans street address to allow for Nominatim 
    seach engine API call
    -----
    Input:
    
    address (string) - Street address (includes whitespaces and suite, 
                       unit, or floor)
    -----
    Output:
    
    cleaned_address (string) - Street address prepared for Nominatim
                               search engine API call
    '''
    unit_pattern = 'Fl\s[\w\d]+\s|Ste\s[\w\d]+\s|Unit\s[\w\d]+\s|Unit\s[\w\d]+\s|Rm\s[\w\d]+\s'
    try:
        clean_w_spaces = re.sub(unit_pattern, '', address)
        cleaned_address = re.sub('\s', '%20', clean_w_spaces)
        return cleaned_address
    except TypeError:
        return None

def nominatim_lookup(address):
    '''
    Helper function that makes a call to Nominatim search engine API
    -----
    Input:
    
    address (string) - Street address
    -----
    Output:
    
    coordinate (list) - List of strings corresponding to coordinates ([latitude, longitude])
    '''
    nominatim_search = 'https://nominatim.openstreetmap.org/search/'
    set_json_format = '?format=json'
    cleaned_address = clean_address(address)
    try:
        nominatim_call = nominatim_search + cleaned_address + set_json_format
        try:
            search_object = requests.get(nominatim_call).json()[0]
            latitude = search_object.get('lat')
            longitude = search_object.get('lon')
            coordinate = [latitude, longitude]
            return coordinate
        except IndexError:
            print(nominatim_call)
            return [None, None]
    except TypeError:
        return [None, None]

def get_coordinates(addresses):
    '''
    Function that uses Nominatim search engine API to extract coordinates from
    a list of street addresses
    -----
    Input:
    
    addresses (list) - List of street addresses as strings
    -----
    Output:
    
    coordinates (list) - 2D list of coordinates (latitude, longitude)
    
            [[latitude, longitude],
             [latitude, longitude],
                ...        ...
             [latitude, longitude]]
    '''
    coordinates = []
    for address in addresses:
        coordinate = nominatim_lookup(address)
        coordinates.append(coordinate)
    return coordinates

In [10]:
location_refs = [provider.get('resource').get('location')[0].get('reference') for provider in test_provider_list]
location_urls = [base_url + ref for ref in location_refs]
addresses = [requests.get(location_urls[i]).json().get('address').get('text')
             if i != 34 else None for i in range(len(location_urls))]

In [None]:
coordinates = [[requests.get(re.sub('Fl%20\d+%20|Ste%20\d+%20|Unit%20\w+%20|Unit%20\d+%20', '', re.sub('\s', '%20', f'https://nominatim.openstreetmap.org/search/{addresses[i]}?format=json'))).json()[0].get('lat'),
                requests.get(re.sub('Fl%20\d+%20|Ste%20\d+%20|Unit%20\w+%20|Unit%20\d+%20', '', re.sub('\s', '%20', f'https://nominatim.openstreetmap.org/search/{addresses[i]}?format=json'))).json()[0].get('lon')]
               if i != 34 else [None, None] for i in range(len(addresses))]

In [None]:
(coordinates[0] + specialties[0]).append(addresses[0])

In [None]:
data = [[addresses[i]] + coordinates[i] + specialties[i] if addresses[i] is not None else [None, None, None, None, None] for i in range(len(coordinates))]

In [None]:
pd.DataFrame(data=data, columns=['Address', 'Latitude', 'Longitude', 'Provider Taxonomy Code', 'Specialty'])

In [11]:
addresses[0]

'2228 6TH St Berkeley CA 94710'

In [29]:
requests.get(location_urls[13]).json().get('name')

'Bay Psychiatric Associates - Berkeley'