In [1]:
import requests
import pandas as pd
import numpy as np
from cities import *
from specialties import *

We're starting with Kaiser because Kaiser really slays with this. I also have much more familiarity with Kaiser so we're starting here.

In [2]:
# Provider taxonomy codes
psychology = '103T00000X'
psychiatry = '2084P0800X'
lcsw = '1041C0700X'

In [3]:
# KP Region Organization refs fd9c0ae4-e05f-bf25-aadf-219aff63590b
kpnw = '6195d4cb-11d0-6bc8-6833-9233b7361939'
ncal = 'ef11a240-e52e-8dd4-52db-6a9256d52217'
scal = '5fdd4a0e-7fc3-970a-7b52-bb9198666a22'
kpco = '0ed5a029-9844-c608-ee6a-0bc2a35a0123'
kphi = '493944e8-3364-4923-a8b2-a3044d5269a2'
masr = 'b65fbd8f-abc3-8471-55b3-56a1f6d5d134'
kpga = 'db417117-c802-aa23-cc9a-ab53aedd786e'
kpwa = '3befcdee-20ab-b3bc-d4d2-b3fdf6dc288f'

In [4]:
# KP API variables
base = 'https://kpx-service-bus.kp.org/service/hp/mhpo/healthplanproviderv1rc/'
hcs = 'HealthcareService'
loc = 'Location'
org = 'Organization'
dr = 'Practitioner'
role = 'PractitionerRole'

In [5]:
# Bay Area Cities
bay_area = np.array([east_bay])
bay_area = np.append(bay_area, south_bay).flatten()
bay_area = np.append(bay_area, peninsula).flatten()
bay_area = np.append(bay_area, north_bay).flatten()

bay_area.shape

(196,)

Keep these code snippets for reference. These three resources can successfully be queried by location in the following manner:
* `#requests.get(base + loc + '?address-city=Dublin&address-state=CA').json()`
* `#requests.get(base + hcs + '?location.address-city=Dublin&location.address-state=CA').json()`
* `#requests.get(base + role + '?location.address-city=Dublin&location.address-state=Ca').json()`

For now, we will start with creating a small version of the KP product. We'll start, of course, with the city of Berkeley

In [6]:
berk_loc_bundle = requests.get(base + loc + '?address-city=Berkeley&address-state=CA&_count=100').json()

In [7]:
# Create useful variables
total_items = berk_loc_bundle['total']
total_iterations = (total_items // 100) + 1
obj = berk_loc_bundle['entry']
iteration = 1
loc_refs = np.array([])
loc_names = np.array([])
loc_addresses = np.array([])
loc_statuses = np.array([])

# For loop
while iteration <= total_iterations:
    next_url = berk_loc_bundle['link'][1]['url']
    for item in obj:
        resource = item['resource']
        # Find values in Location resource item
        ref = resource['id']
        name = resource['identifier'][0]['value']
        address = resource['address']['text']
        status = resource['status']
        
        # Append to array
        loc_refs = np.append(loc_refs, ref)
        loc_names = np.append(loc_names, name)
        loc_addresses = np.append(loc_addresses, address)
        loc_statuses = np.append(loc_statuses, status)
    
    # Finished with items in obj; reinitialize variables
    berk_loc_bundle = requests.get(next_url).json()
    obj = berk_loc_bundle['entry']
    iteration += 1
        
# Create Location DataFrame
berk_loc_df = pd.DataFrame(data={'id':loc_refs,
                                 'name':loc_names,
                                 'address':loc_addresses,
                                 'status':loc_statuses})

In [8]:
berk_loc_df.head()

Unnamed: 0,id,name,address,status
0,505ece60-3e81-b340-236a-30c4b1a1eb6c,Clifford_Feldman2855_Telegraph_Ave_Ste_204,"2855 Telegraph Ave Ste 204 Berkeley, CA 94705",active
1,36aaf6d9-7ba6-8994-c1a3-bbc3ac004565,Peter_Freedman3031_Telegraph_Ave_Ste_103,"3031 Telegraph Ave Ste 103 Berkeley, CA 94705",active
2,c6a361af-d878-3a12-28ce-3631c09a1985,Yelena_Sirbiladze1942_University_Ave_Ste_208,"1942 University Ave Ste 208 Berkeley, CA 94704",active
3,6ff0fd5a-2116-3179-7ed7-20ef0b3324b3,Richard_Unger2607_Alcatraz_Ave,"2607 Alcatraz Ave Berkeley, CA 94705",active
4,de104892-e14b-014d-a3d1-156fe9149aa2,Haleh_Nekoorad*Long1942_University_Ave_Ste_208,"1942 University Ave Ste 208 Berkeley, CA 94704",active


Now, let's validate this DataFrame

In [9]:
berk_loc_df.shape[0] == total_items

True

In [10]:
len(berk_loc_df['id'].unique()) == berk_loc_df.shape[0]

True

In [11]:
len(berk_loc_df['name'].unique()) == berk_loc_df.shape[0]

True

Keep these code snippets for reference. These three resources can successfully be queried by location in the following manner:
* `#requests.get(base + loc + '?address-city=Dublin&address-state=CA').json()`
* `#requests.get(base + hcs + '?location.address-city=Dublin&location.address-state=CA').json()`
* `#requests.get(base + role + '?location.address-city=Dublin&location.address-state=Ca').json()`

The Location DataFrame looks good!! Now, let's focus on creating the Provider DataFrame. Again, we'll start with a small subset by looking at the City of Berkeley.

In [12]:
berk_psychology_bundle = requests.get(base + hcs + f'?location.address-city=Berkeley&location.address-state=CA&specialty={lcsw}&_count=100').json()

In [13]:
# Create useful variables
total_items = berk_psychology_bundle['total']
total_iterations = (total_items // 100) + 1
obj = berk_psychology_bundle['entry']
iteration = 1
hcs_refs = np.array([])
hcs_loc_refs = np.array([])
hcs_names = np.array([])
hcs_specialties = np.array([])
hcs_statuses = np.array([])
hcs_phones = np.array([])
hcs_languages = np.array([])

# For loop
while iteration <= total_iterations:
    if total_iterations > 1:
        next_url = berk_psychology_bundle['link'][1]['url']
    else:
        next_url = None
    for item in obj:
        resource = item['resource']
        # Find values in Location resource item
        ref = resource['id']
        loc_ref = resource['location'][0]['reference'][9:]
        name = resource['name']
        specialty = resource['specialty'][0]['coding'][0]['display']
        status = resource['active']
        phone = resource['telecom'][0]['value']
        language = resource['language']
        
        # Append to array
        hcs_refs = np.append(hcs_refs, ref)
        hcs_loc_refs = np.append(hcs_loc_refs, loc_ref)
        hcs_names = np.append(hcs_names, name)
        hcs_specialties = np.append(hcs_specialties, specialty)
        hcs_statuses = np.append(hcs_statuses, status)
        hcs_phones = np.append(hcs_phones, phone)
        hcs_languages = np.append(hcs_languages, language)
    
    # Finished with items in obj; reinitialize variables
    if total_iterations > 1:
        berk_psychology_bundle = requests.get(next_url).json()
        obj = berk_psychology_bundle['entry']
        iteration += 1
    else:
        break
        
# Create Location DataFrame
berk_psychology_df = pd.DataFrame(data={'id':hcs_refs,
                                        'location':hcs_loc_refs,
                                        'name':hcs_names,
                                        'specialty':hcs_specialties,
                                        'staus':hcs_statuses,
                                        'phone':hcs_phones,
                                        'language':hcs_languages})

In [14]:
berk_psychology_df.head()

Unnamed: 0,id,location,name,specialty,staus,phone,language
0,0810d773-67af-4aa9-aa31-3083991765b8,2a05d24c-70a9-4b80-8ac5-27e010df0bb5,Esther Martino LCSW,Clinical Social Worker,1.0,(510) 406-4166,en-US
1,8e4c30bf-0050-4dc8-8ec6-7f59ea473324,981f78e0-d797-45ca-aa26-2363b0c173ea,Family Spring Psychology,Clinical Social Worker,1.0,(510) 470-5777,en-US
2,c04e3025-9a69-49e0-9187-d7dd9e43ec83,981f78e0-d797-45ca-aa26-2363b0c173ea,Family Spring Psychology,Clinical Social Worker,1.0,(510) 470-5777,en-US
3,e472ac41-9321-4f53-9d7c-f54b35e4e1af,f676ca79-bfe9-4c47-a0ab-1b422c9cb4c2,Patricia B. Becker,Clinical Social Worker,1.0,(510) 704-0707,en-US
4,0c66d912-1661-4415-90d3-8867ee096d9b,1bf50e91-5c22-4a83-a99f-52bb62631c3c,Nicholas Rosenberg LCSW,Clinical Social Worker,1.0,(510) 868-5911,en-US


Now that we've successfully created a mini Location DataFrame and Providers DataFrame, let's repeat the process for multiple cities in the East Bay.

In [15]:
loc_refs = np.array([])
loc_names = np.array([])
loc_addresses = np.array([])
loc_cities = np.array([])
loc_zips = np.array([])
loc_statuses = np.array([])

for city in bay_area:
    city_loc_bundle = requests.get(base + loc + f'?address-city={city}&address-state=CA&_count=100').json()
    
    # Check if city has any Locations
    if not city_loc_bundle['total']:
        continue
    else: 
        # Create useful variables
        total_items = city_loc_bundle['total']
        total_iterations = (total_items // 100) + 1
        obj = city_loc_bundle['entry']
        iteration = 1

        # For loop
        while iteration <= total_iterations:
            # Check if no pagination required
            if total_items <= 100:
                next_url = None
            else:
                next_url = city_loc_bundle['link'][1]['url']
            for item in obj:
                resource = item['resource']
                # Find values in Location resource item
                ref = resource['id']
                name = resource['identifier'][0]['value']
                address = resource['address']['text']
                city_name = resource['address']['city']
                zip_code = resource['address']['postalCode']
                status = resource['status']

                # Append to array
                loc_refs = np.append(loc_refs, ref)
                loc_names = np.append(loc_names, name)
                loc_addresses = np.append(loc_addresses, address)
                loc_cities = np.append(loc_cities, city_name)
                loc_zips = np.append(loc_zips, zip_code)
                loc_statuses = np.append(loc_statuses, status)

            # Finished with items in obj; reinitialize variables
            if total_items > 100:
                city_loc_bundle = requests.get(next_url).json()
                obj = city_loc_bundle['entry']
            iteration += 1

# Create Location DataFrame
loc_df = pd.DataFrame(data={'id':loc_refs,
                            'name':loc_names,
                            'address':loc_addresses,
                            'city':loc_cities,
                            'zip':loc_zips,
                            'status':loc_statuses})

In [16]:
loc_df

Unnamed: 0,id,name,address,city,zip,status
0,27e47aee-88fd-8b36-fb64-a886f919bb5e,Michael_San_Jose2179_Harbor_Bay_Pkwy,"2179 Harbor Bay Pkwy Alameda, CA 94502",Alameda,94502,active
1,bc15e6af-0d67-cfcf-dee1-cda4edd906ca,Michael_Knoll_DDS_MD2242_Santa_Clara_Ave,"2242 Santa Clara Ave Alameda, CA 94501",Alameda,94501,active
2,622f3ab9-3128-a996-95bb-444b5c162ab8,Alameda_Landing_Dentistry_Dental_Group2660_5th...,"2660 5th St Ste C Alameda, CA 94501",Alameda,94501,active
3,8f574a93-ca0a-64c9-dec3-2055784e970c,Dennis_Robles2258_Santa_Clara_Ave_Ste_4,"2258 Santa Clara Ave Ste 4 Alameda, CA 94501",Alameda,94501,active
4,9123f200-db09-b591-a655-12828e0d77be,Bright_Now_Dental_Alameda2140_S_Shore_Ctr,"2140 S Shore Ctr Alameda, CA 94501",Alameda,94501,active
...,...,...,...,...,...,...
5985,87d1a900-56aa-4a74-a00e-f7e5b43bbd40,1466788699*STEPHEN_TESSLER_PHD*CN_310017461700...,1140 Pitt School Rd Ste F Dixon CA 95620,Dixon,95620,active
5986,8cf87283-da5a-4604-b063-d38680225971,339937,"1640 N Lincoln St , Dixon , CA , 95620",Dixon,95620,active
5987,b0e93f08-bd24-473f-ace3-3f34cf7bcea1,363608,"1057 N First St , Dixon , CA , 95620-0000",Dixon,95620-0000,active
5988,f07f06d0-7dc7-41e7-a74b-be2be81d5900,363094,"1235 Stratford Ave , Dixon , CA , 95620-2024",Dixon,95620-2024,active


In [17]:
loc_df['city'].value_counts()

San Jose         1454
San Francisco     615
Berkeley          287
Oakland           269
Santa Rosa        241
                 ... 
Deer Park           1
Atherton            1
San Martin          1
Bethel Island       1
Colma               1
Name: city, Length: 112, dtype: int64

In [18]:
hcs_refs = np.array([])
hcs_loc_refs = np.array([])
hcs_names = np.array([])
hcs_specialties = np.array([])
hcs_statuses = np.array([])
hcs_phones = np.array([])
hcs_languages = np.array([])

for city in bay_area:
    for specialty in mental_health:
        city_hcs_bundle = requests.get(base + hcs + f'?location.address-city={city}&location.address-state=CA&specialty={specialty}&_count=100').json()

        # Check if city has any Locations
        if not city_hcs_bundle['total']:
            continue
        else: 
            # Create useful variables
            total_items = city_hcs_bundle['total']
            total_iterations = (total_items // 100) + 1
            obj = city_hcs_bundle['entry']
            iteration = 1

            # For loop
            while iteration <= total_iterations:
                # Check if no pagination required
                if total_items <= 100:
                    next_url = None
                else:
                    next_url = city_hcs_bundle['link'][1]['url']
                for item in obj:
                    resource = item['resource']
                    # Find values in Location resource item
                    ref = resource['id']
                    loc_ref = resource['location'][0]['reference'][9:]
                    name = resource['name']
                    try:
                        specialty = resource['specialty'][0]['coding'][0]['display']
                    except KeyError:
                        specialty = 'N/A'
                    status = resource['active']
                    phone = resource['telecom'][0]['value']
                    language = resource['language']

                    # Append to array
                    hcs_refs = np.append(hcs_refs, ref)
                    hcs_loc_refs = np.append(hcs_loc_refs, loc_ref)
                    hcs_names = np.append(hcs_names, name)
                    hcs_specialties = np.append(hcs_specialties, specialty)
                    hcs_statuses = np.append(hcs_statuses, status)
                    hcs_phones = np.append(hcs_phones, phone)
                    hcs_languages = np.append(hcs_languages, language)

                # Finished with items in obj; reinitialize variables
                if total_items > 100:
                    city_hcs_bundle = requests.get(next_url).json()
                    obj = city_hcs_bundle['entry']
                iteration += 1

# Create Location DataFrame
providers_df = pd.DataFrame(data={'id':hcs_refs,
                                  'location':hcs_loc_refs,
                                  'name':hcs_names,
                                  'specialty':hcs_specialties,
                                  'status':hcs_statuses,
                                  'phone':hcs_phones,
                                  'language':hcs_languages})

In [19]:
providers_df

Unnamed: 0,id,location,name,specialty,status,phone,language
0,f8ea8862-ba0f-4905-b00c-a3ab28f3d819,3e281440-d05e-4be1-84b2-4466c65e9d04,Alameda Medical Offices,Clinical Social Worker,1.0,(866) 454-8855,en-US
1,f589fd77-9d20-457c-a264-a0dd89167469,d18fd0c2-6a6a-4a8c-8786-7d330e527a02,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US
2,df28b621-2f5b-41b4-a74d-ecd88966fe62,d18fd0c2-6a6a-4a8c-8786-7d330e527a02,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US
3,56809ca7-e288-496c-aa49-b7173efd66a0,40d40516-ab20-4bdb-beba-da3d68e8dd6e,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US
4,025bb7c0-1082-4ff7-9f16-97727604cdd0,c56a5310-a5e8-48c8-8fda-fc169257cc36,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US
...,...,...,...,...,...,...,...
2342,8890cf8b-c8a6-441f-98be-4301724a6cbf,c2e7733d-b7be-44e6-8fe7-2b5a23aa7ce7,Vallejo Medical Center,Clinical Social Worker,1.0,(866) 454-8855,en-US
2343,16cc2835-b8fa-4736-a5d8-6fe2cc985346,c2e7733d-b7be-44e6-8fe7-2b5a23aa7ce7,Vallejo Medical Center,Clinical Social Worker,1.0,(866) 454-8855,en-US
2344,88701929-d498-4edd-aacb-b38c443b19ca,9f47965a-5a75-4851-bc8e-e909d22722c3,Sandi Rene Gundersen,Clinical Social Worker,1.0,(707) 373-7987,en-US
2345,3dce5d7a-cdb4-4651-9e42-33c314a250dc,c2e7733d-b7be-44e6-8fe7-2b5a23aa7ce7,Vallejo Medical Center,Clinical Social Worker,1.0,(866) 454-8855,en-US


In [20]:
providers_df['specialty'].value_counts()

Psychologist              806
Clinical Social Worker    748
Psychiatry Physician      449
N/A                       344
Name: specialty, dtype: int64

In [21]:
providers_df['name'].value_counts()

Grow Healthcare Group - Norcal - This Location Offers Telehealth Services Only    364
Oakland Medical Center                                                            156
Walnut Creek Medical Center                                                       112
San Francisco Medical Center                                                      111
San Leandro Medical Center                                                         94
                                                                                 ... 
Vortex Psychiatry                                                                   1
Denise Jones-kazan LCSW                                                             1
Manda Selva LCSW                                                                    1
Minoo Mehdikhan - San Jose                                                          1
Sandi Rene Gundersen                                                                1
Name: name, Length: 283, dtype: int64

Now let's join the DataFrames together. Let's jot down some notes so that we ensure the join is conducted correctly:
* Location DataFrame includes Location items of all specialties. We only care about the ones who are referenced in the Providers DataFrame
* We want all the information from the Providers DataFrame and everything except `name` from the Location DataFrame
* We'll join on Location resource references; in the Providers DataFrame this is `location`, in the Location DataFrame, this is `id`
* We'll perform an inner left join

In [29]:
merged = providers_df.merge(right=loc_df,
                            how='left',
                            left_on='location',
                            right_on='id')
merged = merged.rename(columns={'id_x':'provider_id',
                                'name_x':'name',
                                'status_x':'provider_status',
                                'id_y':'location_id', # Drop
                                'name_y':'location_name', # Drop
                                'status_y':'location_status'}) \
               .drop(['location_id', 'location_name'], axis=1)
merged

Unnamed: 0,provider_id,location,name,specialty,provider_status,phone,language,address,city,zip,location_status
0,f8ea8862-ba0f-4905-b00c-a3ab28f3d819,3e281440-d05e-4be1-84b2-4466c65e9d04,Alameda Medical Offices,Clinical Social Worker,1.0,(866) 454-8855,en-US,2417 Central Avenue Alameda CA 94501,Alameda,94501,active
1,f589fd77-9d20-457c-a264-a0dd89167469,d18fd0c2-6a6a-4a8c-8786-7d330e527a02,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US,3900 Broadway Oakland CA 94611,Oakland,94611,active
2,df28b621-2f5b-41b4-a74d-ecd88966fe62,d18fd0c2-6a6a-4a8c-8786-7d330e527a02,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US,3900 Broadway Oakland CA 94611,Oakland,94611,active
3,56809ca7-e288-496c-aa49-b7173efd66a0,40d40516-ab20-4bdb-beba-da3d68e8dd6e,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US,3801 Howe Street Oakland CA 94611,Oakland,94611,active
4,025bb7c0-1082-4ff7-9f16-97727604cdd0,c56a5310-a5e8-48c8-8fda-fc169257cc36,Oakland Medical Center,Psychologist,1.0,(866) 454-8855,en-US,3505 Broadway Oakland CA 94611,Oakland,94611,active
...,...,...,...,...,...,...,...,...,...,...,...
2352,8890cf8b-c8a6-441f-98be-4301724a6cbf,c2e7733d-b7be-44e6-8fe7-2b5a23aa7ce7,Vallejo Medical Center,Clinical Social Worker,1.0,(866) 454-8855,en-US,1761 Broadway Street Vallejo CA 94589,Vallejo,94589,active
2353,16cc2835-b8fa-4736-a5d8-6fe2cc985346,c2e7733d-b7be-44e6-8fe7-2b5a23aa7ce7,Vallejo Medical Center,Clinical Social Worker,1.0,(866) 454-8855,en-US,1761 Broadway Street Vallejo CA 94589,Vallejo,94589,active
2354,88701929-d498-4edd-aacb-b38c443b19ca,9f47965a-5a75-4851-bc8e-e909d22722c3,Sandi Rene Gundersen,Clinical Social Worker,1.0,(707) 373-7987,en-US,631 Tennessee St Ste 201 Vallejo CA 94590,Vallejo,94590,active
2355,3dce5d7a-cdb4-4651-9e42-33c314a250dc,c2e7733d-b7be-44e6-8fe7-2b5a23aa7ce7,Vallejo Medical Center,Clinical Social Worker,1.0,(866) 454-8855,en-US,1761 Broadway Street Vallejo CA 94589,Vallejo,94589,active


In [30]:
merged['location'].value_counts()

6690f29c-fe37-4fed-af8e-2d9ea3384c25    104
d18fd0c2-6a6a-4a8c-8786-7d330e527a02     76
fc66ad11-718f-4142-9457-c599a7d7db5f     68
2f726f35-0d11-4d35-809c-43a49a70cd69     65
b3efffbe-be6c-4ca4-aafa-b37622ed3a6e     56
                                       ... 
dd5639f1-67c0-4e99-ae58-2e02713af259      1
15d585d6-4406-49df-85f2-ed8f612c141a      1
f16d31dd-358f-43b0-9f48-e71d19f847c6      1
23f56233-6ecf-4d40-a734-d2f0b94d9739      1
9f47965a-5a75-4851-bc8e-e909d22722c3      1
Name: location, Length: 709, dtype: int64