# Import

In [1]:
import pandas as pd
import numpy as np
import sqlite3

# Neo4J Queries

    - provider_nodes
        - organization_nodes
        - taxonomy_nodes
    - hospital_nodes
        - organization_nodes
        - taxonomy_nodes
    - referral_relationships

## provider_nodes
    - entity type 1
    - any cbsa
    - referred to an entity type 2 in cbsa 34980
        - with at least 50 transactions and
        - with less than 50 avg wait time

In [7]:
%%time
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT *
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT npi
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT 
            to_npi,
            referral_id,
            qf.entity_type_code,
            qf.taxonomy_code,
            classification,
            specialization,
            qf."provider_organization_name_(legal_business_name)" AS organization,
            qf.provider_first_line_business_practice_location_address AS address_line1,
            qf.provider_second_line_business_practice_location_address AS address_line2,
            qf.provider_business_practice_location_address_city_name AS city,
            qf.provider_business_practice_location_address_state_name AS state,
            qf.provider_business_practice_location_address_postal_code AS zip
        FROM qualifying_refs AS qf
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""
with sqlite3.connect('../data/hopteam.sqlite') as db:
    groups = pd.read_sql(query, db)

Wall time: 22.8 s


In [3]:
%%time
# profiles of all providers
# with referrals to metro nashville groups (entity 2) having
# referrals of over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT 
                referral_id
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            referral_id,
            entity_type_code,
            "provider_last_name_(legal_name)" || ' ' || provider_first_name AS name,
            taxonomy_code,
            classification,
            specialization,
            "provider_organization_name_(legal_business_name)" AS organization,
            provider_first_line_business_practice_location_address AS address_line1,
            provider_second_line_business_practice_location_address AS address_line2,
            provider_business_practice_location_address_city_name AS city,
            provider_business_practice_location_address_state_name AS state,
            provider_business_practice_location_address_postal_code AS zip     
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    providers = pd.read_sql(query, db)

Wall time: 23.8 s


In [8]:
%%time
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT
                to_npi,
                referral_id,
                patient_count,
                transaction_count,
                average_day_wait,
                std_day_wait  
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            to_npi,
            referral_id,
            patient_count,
            transaction_count,
            average_day_wait,
            std_day_wait
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    referrals = pd.read_sql(query, db)

Wall time: 23.9 s


## Explore the dfs

### Count unique npis and organizations

In [20]:
print('There are ', providers['from_npi'].nunique(), 'unique recommending providers.')
print('There are ', providers['organization'].nunique(), 'unique recommending provider organizations.')
print('There are ', groups['to_npi'].nunique(), 'unique entity 2 npis referred to.')
print('There are ', groups['organization'].nunique(), 'unique entity 2 organizations referred to.')

There are  17773 unique recommending providers.
There are  0 unique recommending provider organizations.
There are  1495 unique entity 2 npis referred to.
There are  1262 unique entity 2 organizations referred to.


### Look at General Acute Care Hospitals in Nashville
VANDERBILT UNIVERSITY MEDICAL CENTER 1601 23RD AVE S is for adult psychiatric outpatient services.

HCA HEALTH SERVICES OF TENNESSEE, INC. 2300 PATTERSON ST is TriStar Centennial.

HCA HEALTH SERVICES OF TENNESSEE, INC. 391 WALLACE RD is TriStar Southern Hills.

SAINT THOMAS WEST HOSPITAL 4220 HARDING RD is Ascension Saint Thomas West on West End.

SAINT THOMAS WEST HOSPITAL 2000 CHURCH ST is Ascension Saint Thomas Midtown.

SETON CORPORATION has same address as Saint Thomas. It's a subsidiary (and a parent of others):

    - Parent organizations: Saint Thomas Health, Brady Corporation
    - Subsidiaries: Baptist Home Care, Baptist Healthcare Group

In [34]:
# Check out General Acute Care Hospitals in Nashville
(groups
     .loc[(groups['classification'].str.contains('General Acute Care Hospital', na = False)) 
           & 
           (groups['city'].str.contains('NASHVILLE'))
           ,
          ['organization', 'classification', 'specialization','address_line1', 'city', 'zip']]
     .drop_duplicates()
     .sort_values('organization')
     .reset_index()
)

Unnamed: 0,index,organization,classification,specialization,address_line1,city,zip
0,33509,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,2300 PATTERSON ST,NASHVILLE,37203
1,49937,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,391 WALLACE RD,NASHVILLE,37211
2,45127,HTI MEMORIAL HOSPITAL CORPORATION,General Acute Care Hospital,,3441 DICKERSON PIKE,NASHVILLE,37207
3,46760,NASHVILLE GENERAL HOSPITAL,General Acute Care Hospital,,1818 ALBION ST,NASHVILLE,37208
4,43782,SAINT THOMAS WEST HOSPITAL,General Acute Care Hospital,,4220 HARDING RD,NASHVILLE,37205
5,65635,SAINT THOMAS WEST HOSPITAL,General Acute Care Hospital,,2000 CHURCH ST,NASHVILLE,37236
6,66249,SETON CORPORATION,General Acute Care Hospital,,2000 CHURCH ST,NASHVILLE,37236
7,51105,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,1601 23RD AVE S,NASHVILLE,37212
8,59044,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,1211 MEDICAL CENTER DRIVE,NASHVILLE,37232


### Look at General Acute Care Hospitals in Nashville CBSA (34 total)


In [31]:
cities = ['NASHVILLE',
          'MURFREESBORO',
          'BRENTWOOD',
          'FRANKLIN',
          'HENDERSONVILLE',
          'GOODLETTSVILLE',
          'LA VERGNE',
          'SMYRNA',
          'MT. JULIET',
          'GALLATIN']
# not currently using the cities list above
(groups
     .loc[(groups['classification'].str.contains('General Acute Care Hospital', na = False)) 
#            & 
#            (groups['city'].isin(cities))
           ,
          ['organization', 'classification', 'specialization', 'address_line1', 'city', 'zip']]
     .drop_duplicates()
     .sort_values('organization')
     .reset_index()
)

Unnamed: 0,index,organization,classification,specialization,address_line1,city,zip
0,7814,CENTRAL TENNESSEE HOSPITAL CORPORATION,General Acute Care Hospital,,111 HIGHWAY 70 E,DICKSON,37055
1,185,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,Critical Access,313 N MAIN ST,ASHLAND CITY,37015
2,49937,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,391 WALLACE RD,NASHVILLE,37211
3,33509,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,2300 PATTERSON ST,NASHVILLE,37203
4,24697,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,200 STONECREST BLVD,SMYRNA,37167
5,16426,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,5655 FRIST BLVD,HERMITAGE,37076
6,14663,HENDERSONVILLE HOSPITAL CORPORATION,General Acute Care Hospital,,355 NEW SHACKLE ISLAND RD,HENDERSONVILLE,37075
7,45127,HTI MEMORIAL HOSPITAL CORPORATION,General Acute Care Hospital,,3441 DICKERSON PIKE,NASHVILLE,37207
8,17729,"MACON COUNTY GENERAL HOSPITAL, INC.",General Acute Care Hospital,Critical Access,204 MEDICAL DRIVE,LAFAYETTE,37083
9,19283,MAURY REGIONAL HOSPITAL,General Acute Care Hospital,Critical Access,1080 N ELLINGTON PKWY,LEWISBURG,37091


### Look at all Vanderbilt organizations

In [16]:
(groups
     .loc[groups['organization'].str.contains('VANDERBILT', na = False),
          ['organization', 'classification', 'specialization', 'zip']]
     .drop_duplicates()
)

Unnamed: 0,organization,classification,specialization,zip
8700,VANDERBILT HEALTH AND WILLIAMSON MEDICAL CENTE...,Internal Medicine,,37064
18178,VANDERBILT UNIVERSITY MEDICAL CENTER,Psychiatric Unit,,37087
18182,VANDERBILT UNIVERSITY MEDICAL CENTER,Rehabilitation Unit,,37087
18185,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,37087
26037,"VANDERBILT MAURY RADIATION ONCOLOGY, LLC",Clinic/Center,"Oncology, Radiation",37174
41143,VANDERBILT UNIVERSITY MEDICAL CENTER,Clinic/Center,End-Stage Renal Disease (ESRD) Treatment,37204
44951,"VANDERBILT IMAGING SERVICES, LLC",Clinic/Center,Radiology,37205
50958,VANDERBILT STALLWORTH REHABILITATION HOSPITAL LP,Rehabilitation Hospital,,37212
51038,"VANDERBILT IMAGING SERVICES, LLC",Radiology,Diagnostic Radiology,37212
51105,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,37212


### Look at all organizations with 'Hospital' in the name

In [27]:
(groups
     .loc[groups['classification'].str.contains('Hospital'), 
          ['organization', 'classification', 'specialization']]
     .drop_duplicates()
     .sort_values('classification')
     .reset_index()
)

Unnamed: 0,index,organization,classification,specialization
0,185,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,Critical Access
1,45127,HTI MEMORIAL HOSPITAL CORPORATION,General Acute Care Hospital,
2,43782,SAINT THOMAS WEST HOSPITAL,General Acute Care Hospital,
3,26270,"SAINT THOMAS STONES RIVER HOSPITAL, LLC",General Acute Care Hospital,
4,25460,NORTHCREST MEDICAL CENTER,General Acute Care Hospital,
5,24615,"SAINT THOMAS DEKALB HOSPITAL, LLC",General Acute Care Hospital,
6,24000,SHELBYVILLE HOSPITAL COMPANY LLC,General Acute Care Hospital,
7,22153,SAINT THOMAS RUTHERFORD HOSPITAL,General Acute Care Hospital,
8,20701,"TRUSTPOINT HOSPITAL, LLC",General Acute Care Hospital,
9,66649,MAURY REGIONAL HOSPITAL,General Acute Care Hospital,


### Look at the referrals data

In [35]:
referrals

Unnamed: 0,from_npi,to_npi,referral_id,patient_count,transaction_count,average_day_wait,std_day_wait
0,1821080961,1013012616,18854880,91,238,9.924,40.266
1,1841282779,1013012616,131966153,94,236,7.695,36.447
2,1770575607,1013012616,146169993,115,320,2.725,18.756
3,1508804337,1902804271,320488,157,163,27.583,51.939
4,1538109103,1902804271,6754370,312,320,12.709,40.583
...,...,...,...,...,...,...,...
69422,1558355941,1669872735,30083148,31,89,23.101,31.695
69423,1023099074,1669872735,114150220,256,1309,0.214,5.207
69424,1184619124,1669872735,178563129,40,90,23.089,36.995
69425,1205931565,1669872735,184096138,35,54,1.315,6.815


### Look at all providers who recommending to Nashville CBSA from out of state

In [37]:
providers[providers['state'] != 'TN']

Unnamed: 0,from_npi,referral_id,entity_type_code,name,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
22,1427591627,44512612,1.0,BECKMAN LAUREN,363L00000X,Nurse Practitioner,,,18 ELMS CT,,HATTIESBURG,MS,39402
41,1457888976,70281243,1.0,MOLINA DANIELLE,363A00000X,Physician Assistant,,,400 COURT ST STE 100,,CHARLESTON,WV,25301
52,1669456307,104646182,1.0,JOHNSTON EVAN,367500000X,"Nurse Anesthetist, Certified Registered",,,3600 RIVERS AVE,,CHARLESTON,SC,29405
59,1043228935,118660569,1.0,D'ANGELO PAUL,2085R0202X,Radiology,Diagnostic Radiology,,5655 HUDSON DR STE 210,ARIS RADIOLOGY,HUDSON,OH,44236
110,1669456307,103864784,1.0,JOHNSTON EVAN,367500000X,"Nurse Anesthetist, Certified Registered",,,3600 RIVERS AVE,,CHARLESTON,SC,29405
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69399,1598994147,80210852,1.0,HARRIS JIM,207P00000X,Emergency Medicine,,,2500 N STATE ST,UMC DEPART MENT OF EMERGENCY MEDICINE,JACKSON,MS,39216
69409,1063598373,25237153,1.0,BURGESS BERNARD,208600000X,Surgery,,,3601 SW 160TH AVE,SUITE 250,MIRAMAR,FL,33027
69411,1598994147,81057240,1.0,HARRIS JIM,207P00000X,Emergency Medicine,,,2500 N STATE ST,UMC DEPART MENT OF EMERGENCY MEDICINE,JACKSON,MS,39216
69419,1386976207,211436182,1.0,ROTH JASON,2085R0202X,Radiology,Diagnostic Radiology,,INTEGRA IMAGING PS,1200 WESTWOOD DRIVE,HAMILTON,MT,59840
