In [8]:
# Run this cell!
import requests
import re
import pandas as pd
import geopandas as gpd
import folium
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
base_url = 'https://kpx-service-bus.kp.org/service/hp/mhpo/healthplanproviderv1rc/'
location = 'Location?'
practitionerRole = 'PractitionerRole?'
organization = 'Organization?'
search_param = 'location.address-city=Torrance'
full_query = base_url + practitionerRole + search_param

In [7]:
def get_healthcareService_refs(provider_list):
    '''
    Function that extracts the HealthcareService resource reference from a list of
    provider entries (taken from the PractitionerRole resource)
    -----
    Input:
    
    provider_list (list) - List of json objects each referring to a single provider
    -----
    Output:
    
    healthcareService_refs (list) - List of healthcareService resource references
                                    for the corresponding providers
    '''
    healthcareService_refs = []
    for provider in provider_list:
        provider_entry = provider.get('resource')
        try:
            provider_service = provider_entry.get('healthcareService')[0]
            healthcareService_ref = provider_service.get('reference')
            healthcareService_refs.append(healthcareService_ref)
        except TypeError:
            healthcareService_refs.append('None')
    return healthcareService_refs

def get_specialties(healthcareServiceUrls):
    '''
    Function that takes a HealthcareService API call and extracts the provider taxonomy code
    and the corresponding plain English specialty
    -----
    Input:
    
    healthcareServiceUrls (list) - List of strings corresponding to HealthcareService resource API call
    -----
    Output:
    
    specialties (list) - 2D list of provider taxonomy codes and corresponding plain English
                         specialty
                         
            [[taxonomy_code, specialty],
             [taxonomy_code, specialty],
                   ...          ...
             [taxonomy_code, specialty]]
    '''
    specialties = []
    for url in healthcareServiceUrls:
        request = requests.get(url)
        json_object = request.json()
        specialty_object = json_object.get('specialty')
        specialty = [None, None]
        try:
            specialty_dict = specialty_object[0].get('coding')[0]
            code = specialty_dict.get('code')
            display_name = specialty_dict.get('display')
            specialty = [code, display_name]
        except TypeError:
            specialty = [None, None]
        specialties.append(specialty)
    return specialties

def get_names_and_addresses(location_urls):
    '''
    Function that extracts the street addresses from Location API calls
    -----
    Input:
    
    location_urls (list) - List of strings corresponding to Location resource API calls
    -----
    Outputs:
    
    names (list) - List of provider names as strings
    
    addresses (list) - List of street addresses as strings
    '''
    names = []
    addresses = []
    for url in location_urls:
        request = requests.get(url)
        json_object = request.json()
        name = json_object.get('name')
        address_object = json_object.get('address')
        street_address = address_object.get('text')
        names.append(name)
        addresses.append(street_address)
    return names, addresses

def clean_address(address):
    '''
    Helper function that cleans street address to allow for Nominatim 
    seach engine API call
    -----
    Input:
    
    address (string) - Street address (includes whitespaces and suite, 
                       unit, or floor)
    -----
    Output:
    
    cleaned_address (string) - Street address prepared for Nominatim
                               search engine API call
    '''
    unit_pattern = 'Fl\s[\w\d]+\s|Ste\s[\w\d]+\s|Unit\s[\w\d]+\s|Unit\s[\w\d]+\s|Rm\s[\w\d]+\s'
    try:
        clean_w_spaces = re.sub(unit_pattern, '', address)
        cleaned_address = re.sub('\s', '%20', clean_w_spaces)
        return cleaned_address
    except TypeError:
        return None

def nominatim_lookup(address):
    '''
    Helper function that makes a call to Nominatim search engine API
    -----
    Input:
    
    address (string) - Street address
    -----
    Output:
    
    coordinate (list) - List of strings corresponding to coordinates ([latitude, longitude])
    '''
    nominatim_search = 'https://nominatim.openstreetmap.org/search/'
    set_json_format = '?format=json'
    cleaned_address = clean_address(address)
    try:
        nominatim_call = nominatim_search + cleaned_address + set_json_format
        try:
            search_object = requests.get(nominatim_call).json()[0]
            latitude = search_object.get('lat')
            longitude = search_object.get('lon')
            coordinate = [latitude, longitude]
            return coordinate
        except IndexError:
            print(f'Unable to find coordinates for {address}')
            return [None, None]
    except TypeError:
        return [None, None]

def get_coordinates(addresses):
    '''
    Function that uses Nominatim search engine API to extract coordinates from
    a list of street addresses
    -----
    Input:
    
    addresses (list) - List of street addresses as strings
    -----
    Output:
    
    coordinates (list) - 2D list of coordinates (latitude, longitude)
    
            [[latitude, longitude],
             [latitude, longitude],
                ...        ...
             [latitude, longitude]]
    '''
    coordinates = []
    for address in addresses:
        coordinate = nominatim_lookup(address)
        coordinates.append(coordinate)
    return coordinates

In [4]:
base_url = 'https://kpx-service-bus.kp.org/service/hp/mhpo/healthplanproviderv1rc/'
practitionerRole = 'PractitionerRole?'
search_param = 'location.address-city=Berkeley'
full_query = base_url + practitionerRole + search_param
request = requests.get(full_query)
json_object = request.json()
total = json_object.get('total')
entries_per_page = len(json_object.get('entry'))
total_pages = total // 50 + 1
print(f'Total Entries: {total}')
print(f'Entries per Page: {entries_per_page}')
page = 1
print(f'Working on Page {page}!')

provider_list = json_object.get('entry')
healthcareService_refs = get_healthcareService_refs(provider_list)
healthcareServiceUrls = [base_url + ref for ref in healthcareService_refs]
location_refs = [provider.get('resource').get('location')[0].get('reference') for provider in provider_list]
location_urls = [base_url + ref for ref in location_refs]

specialties = get_specialties(healthcareServiceUrls)
names, addresses = get_names_and_addresses(location_urls)
coordinates = get_coordinates(addresses)

kaiser = [[names[i]] + [addresses[i]] + coordinates[i] + specialties[i]
          if addresses [i] is not None
          else [None, None, None, None, None, None]
          for i in range(len(coordinates))]

kaiser_providers = pd.DataFrame(data=kaiser, columns=['Name', 'Address', 'Latitude', 'Longitude',
                                                      'Provider Taxonomy Code', 'Specialty'])

next_dict = json_object.get('link')[1]
next_url = next_dict.get('url')
page += 1

while page <= total_pages:
    next_json_object = requests.get(next_url).json()
    print(f'Working on Page {page}!')
    provider_list = next_json_object.get('entry')
    try:
        healthcareService_refs = get_healthcareService_refs(provider_list)
    except TypeError:
        page += 1
        continue
    healthcareServiceUrls = [base_url + ref for ref in healthcareService_refs]
    location_refs = [provider.get('resource').get('location')[0].get('reference') for provider in provider_list]
    location_urls = [base_url + ref for ref in location_refs]

    specialties = get_specialties(healthcareServiceUrls)
    names, addresses = get_names_and_addresses(location_urls)
    coordinates = get_coordinates(addresses)

    kaiser = [[names[i]] + [addresses[i]] + coordinates[i] + specialties[i]
              if addresses [i] is not None
              else [None, None, None, None, None, None]
              for i in range(len(coordinates))]
    kaiser_df = pd.DataFrame(data=kaiser, columns=['Name', 'Address', 'Latitude', 'Longitude',
                                                   'Provider Taxonomy Code', 'Specialty'])
    
    kaiser_providers = pd.concat([kaiser_providers, kaiser_df])

    next_dict = next_json_object.get('link')[1]
    next_url = next_dict.get('url')
    page += 1
    
kaiser_providers = kaiser_providers.reset_index() \
                                   .drop('index', axis=1)
kaiser_providers

Total Entries: 367
Entries per Page: 50
Working on Page 1!
Working on Page 2!
Working on Page 3!
Working on Page 4!
Unable to find coordinates for 4701 Stoddard Rd Modesto CA 95356
Working on Page 5!
Working on Page 6!
Working on Page 7!
Working on Page 8!


Unnamed: 0,Name,Address,Latitude,Longitude,Provider Taxonomy Code,Specialty
0,"Nancy Rakela, Omd, Lac",2228 6TH St Berkeley CA 94710,37.8642515,-122.2967957522431,171100000X,Acupuncturist
1,"Nancy Rakela, Omd, Lac",2228 6TH St Berkeley CA 94710,37.8642515,-122.2967957522431,171100000X,Acupuncturist
2,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0804X,Child & Adolescent Psychiatry Physician
3,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0800X,Psychiatry Physician
4,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0800X,Psychiatry Physician
...,...,...,...,...,...,...
323,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,207Q00000X,Family Medicine Physician
324,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,207Q00000X,Family Medicine Physician
325,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,207V00000X,Obstetrics & Gynecology Physician
326,Oakland Medical Center,275 W Macarthur Blvd Oakland CA 94611,37.823935703636046,-122.25792091892124,,


In [6]:
berkeley_kaiser_providers = kaiser_providers.drop_duplicates(subset=['Name',
                                                                     'Provider Taxonomy Code',
                                                                     'Specialty']) \
                                            .reset_index() \
                                            .drop('index', axis=1)
berkeley_kaiser_providers

Unnamed: 0,Name,Address,Latitude,Longitude,Provider Taxonomy Code,Specialty
0,"Nancy Rakela, Omd, Lac",2228 6TH St Berkeley CA 94710,37.8642515,-122.2967957522431,171100000X,Acupuncturist
1,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0804X,Child & Adolescent Psychiatry Physician
2,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0800X,Psychiatry Physician
3,Bay Psychiatric Associates - Herrick Campus,2001 Dwight Way Ste 4190 Berkeley CA 94704,37.8630899,-122.270353,2084P0800X,Psychiatry Physician
4,Bay Psychiatric Associates - Parnassus,350 Parnassus Ave Ste 900 San Francisco CA 94117,37.7641224,-122.456351,2084P0800X,Psychiatry Physician
...,...,...,...,...,...,...
109,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,207V00000X,Obstetrics & Gynecology Physician
110,Pinole Medical Offices,1301 Pinole Valley Road Pinole CA 94564,37.99905495,-122.28591184980776,367A00000X,Advanced Practice Midwife
111,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,208000000X,Pediatrics Physician
112,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,103T00000X,Psychologist


In [None]:
gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.Longitude, df.Latitude)
)

In [23]:
geo_berkeley_kaiser_providers = gpd.GeoDataFrame(berkeley_kaiser_providers,
                                                 geometry=gpd.points_from_xy(berkeley_kaiser_providers.Longitude,
                                                                             berkeley_kaiser_providers.Latitude)) \
                                   .dropna(subset=['Latitude', 'Longitude']) \
                                   .reset_index() \
                                   .drop('index', axis=1)
geo_berkeley_kaiser_providers

Unnamed: 0,Name,Address,Latitude,Longitude,Provider Taxonomy Code,Specialty,geometry
0,"Nancy Rakela, Omd, Lac",2228 6TH St Berkeley CA 94710,37.8642515,-122.2967957522431,171100000X,Acupuncturist,POINT (-122.29680 37.86425)
1,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0804X,Child & Adolescent Psychiatry Physician,POINT (-122.27081 37.87123)
2,Bay Psychiatric Associates - Berkeley,2020 Milvia St Fl 3 Berkeley CA 94704,37.871225465116275,-122.27080786046511,2084P0800X,Psychiatry Physician,POINT (-122.27081 37.87123)
3,Bay Psychiatric Associates - Herrick Campus,2001 Dwight Way Ste 4190 Berkeley CA 94704,37.8630899,-122.270353,2084P0800X,Psychiatry Physician,POINT (-122.27035 37.86309)
4,Bay Psychiatric Associates - Parnassus,350 Parnassus Ave Ste 900 San Francisco CA 94117,37.7641224,-122.456351,2084P0800X,Psychiatry Physician,POINT (-122.45635 37.76412)
...,...,...,...,...,...,...,...
108,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,207V00000X,Obstetrics & Gynecology Physician,POINT (-122.28999 37.85879)
109,Pinole Medical Offices,1301 Pinole Valley Road Pinole CA 94564,37.99905495,-122.28591184980776,367A00000X,Advanced Practice Midwife,POINT (-122.28591 37.99905)
110,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,208000000X,Pediatrics Physician,POINT (-122.28999 37.85879)
111,Berkeley Medical Offices,2621 Tenth Street Berkeley CA 94710,37.85878703644435,-122.28999123655014,103T00000X,Psychologist,POINT (-122.28999 37.85879)


In [52]:
uc_location = (37.8719, -122.2585)
base_map = folium.Map(location=uc_location,
                      zoom_start=14)
#kaiser_icon = folium.features.CustomIcon('Kaiser Logo.png', icon_size=(350, 575))
#kaiser_marker = folium.Marker(icon=kaiser_icon)
kaiser_berkeley = geo_berkeley_kaiser_providers.explore(m=base_map,
                                                        marker_type='marker')

In [53]:
kaiser_berkeley.save('kaiser_test.html')