# Import

In [1]:
import pandas as pd
import numpy as np
import sqlite3

# Neo4J Queries

    - provider_nodes
        - organization_nodes
        - taxonomy_nodes
    - hospital_nodes
        - organization_nodes
        - taxonomy_nodes
    - referral_relationships

## provider_nodes
    - entity type 1
    - any cbsa
    - referred to an entity type 2 in cbsa 34980
        - with at least 50 transactions and
        - with less than 50 avg wait time

In [2]:
%%time
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT *
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT npi
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT 
            to_npi,
            referral_id,
            qf.entity_type_code,
            qf.taxonomy_code,
            classification,
            specialization,
            qf."provider_organization_name_(legal_business_name)" AS organization,
            qf.provider_first_line_business_practice_location_address AS address_line1,
            qf.provider_second_line_business_practice_location_address AS address_line2,
            qf.provider_business_practice_location_address_city_name AS city,
            qf.provider_business_practice_location_address_state_name AS state,
            qf.provider_business_practice_location_address_postal_code AS zip
        FROM qualifying_refs AS qf
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""
with sqlite3.connect('../data/hopteam.sqlite') as db:
    groups = pd.read_sql(query, db)

Wall time: 9.7 s


In [3]:
%%time
# profiles of all providers
# with referrals to metro nashville groups (entity 2) having
# referrals of over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT 
                referral_id
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            referral_id,
            entity_type_code,
            "provider_last_name_(legal_name)" || ', ' || provider_first_name AS name,
            taxonomy_code,
            classification,
            specialization,
            "provider_organization_name_(legal_business_name)" AS organization,
            provider_first_line_business_practice_location_address AS address_line1,
            provider_second_line_business_practice_location_address AS address_line2,
            provider_business_practice_location_address_city_name AS city,
            provider_business_practice_location_address_state_name AS state,
            provider_business_practice_location_address_postal_code AS zip     
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    providers = pd.read_sql(query, db)

Wall time: 9.57 s


In [4]:
%%time
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time
query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT
                to_npi,
                referral_id,
                patient_count,
                transaction_count,
                average_day_wait,
                std_day_wait  
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            to_npi,
            referral_id,
            patient_count,
            transaction_count,
            average_day_wait,
            std_day_wait
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    referrals = pd.read_sql(query, db)

Wall time: 9.18 s


In [5]:
query = """

SELECT COUNT(*)
FROM zip_cbsa

"""

with sqlite3.connect('../data/hopteam.sqlite') as db:
    c = pd.read_sql(query, db)
c

Unnamed: 0,COUNT(*)
0,47484


## Explore the dfs

### Look at info of various tables

In [6]:
referrals.shape

(69427, 7)

In [7]:
groups.loc[groups['state'] == 'TX']

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
19129,1881196376,67055500,2.0,103TC0700X,Psychologist,Clinical,ALLIED HEALTH PROFESSIONALS,3056 HORN SPRINGS ROAD,,LEBANON,TX,37087
19130,1881196376,170610474,2.0,103TC0700X,Psychologist,Clinical,ALLIED HEALTH PROFESSIONALS,3056 HORN SPRINGS ROAD,,LEBANON,TX,37087
19131,1881196376,203902639,2.0,103TC0700X,Psychologist,Clinical,ALLIED HEALTH PROFESSIONALS,3056 HORN SPRINGS ROAD,,LEBANON,TX,37087


### Count unique npis and organizations

In [8]:
print('There are ', providers['from_npi'].nunique(), 'unique recommending providers.')
print('There are ', providers['organization'].nunique(), 'unique recommending provider organizations.')
print('There are ', groups['to_npi'].nunique(), 'unique entity 2 npis referred to.')
print('There are ', groups['organization'].nunique(), 'unique entity 2 organizations referred to.')

There are  17773 unique recommending providers.
There are  0 unique recommending provider organizations.
There are  1495 unique entity 2 npis referred to.
There are  1262 unique entity 2 organizations referred to.


### Look at General Acute Care Hospitals in Nashville
VANDERBILT UNIVERSITY MEDICAL CENTER 1601 23RD AVE S is for adult psychiatric outpatient services.

HCA HEALTH SERVICES OF TENNESSEE, INC. 2300 PATTERSON ST is TriStar Centennial.

HCA HEALTH SERVICES OF TENNESSEE, INC. 391 WALLACE RD is TriStar Southern Hills.

SAINT THOMAS WEST HOSPITAL 4220 HARDING RD is Ascension Saint Thomas West on West End.

SAINT THOMAS WEST HOSPITAL 2000 CHURCH ST is Ascension Saint Thomas Midtown.

SETON CORPORATION has same address as Saint Thomas. It's a subsidiary (and a parent of others):

    - Parent organizations: Saint Thomas Health, Brady Corporation
    - Subsidiaries: Baptist Home Care, Baptist Healthcare Group

In [9]:
# Check out General Acute Care Hospitals in Nashville
(groups
     .loc[(groups['classification'].str.contains('General Acute Care Hospital', na = False)) 
           & 
           (groups['city'].str.contains('NASHVILLE'))
           ,
          ['organization', 'classification', 'specialization','address_line1', 'city', 'zip']]
     .drop_duplicates()
     .sort_values('organization')
     .reset_index()
)

Unnamed: 0,index,organization,classification,specialization,address_line1,city,zip
0,33509,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,2300 PATTERSON ST,NASHVILLE,37203
1,49937,"HCA HEALTH SERVICES OF TENNESSEE, INC.",General Acute Care Hospital,,391 WALLACE RD,NASHVILLE,37211
2,45127,HTI MEMORIAL HOSPITAL CORPORATION,General Acute Care Hospital,,3441 DICKERSON PIKE,NASHVILLE,37207
3,46760,NASHVILLE GENERAL HOSPITAL,General Acute Care Hospital,,1818 ALBION ST,NASHVILLE,37208
4,43782,SAINT THOMAS WEST HOSPITAL,General Acute Care Hospital,,4220 HARDING RD,NASHVILLE,37205
5,65635,SAINT THOMAS WEST HOSPITAL,General Acute Care Hospital,,2000 CHURCH ST,NASHVILLE,37236
6,66249,SETON CORPORATION,General Acute Care Hospital,,2000 CHURCH ST,NASHVILLE,37236
7,51105,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,1601 23RD AVE S,NASHVILLE,37212
8,59044,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,1211 MEDICAL CENTER DRIVE,NASHVILLE,37232


### Look at General Acute Care Hospitals in Nashville CBSA (34 total)


In [10]:
cities = ['NASHVILLE',
          'MURFREESBORO',
          'BRENTWOOD',
          'FRANKLIN',
          'HENDERSONVILLE',
          'GOODLETTSVILLE',
          'LA VERGNE',
          'SMYRNA',
          'MT. JULIET',
          'GALLATIN']
# not currently using the cities list above
gach = (groups
     .loc[(groups['classification'].str.contains('General Acute Care Hospital', na = False)) 
#            & 
#            (groups['city'].isin(cities))
           ,
          ]
     .drop_duplicates()
     .sort_values('organization')
)

gach

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
7974,1871530832,159858289,2.0,282N00000X,General Acute Care Hospital,,CENTRAL TENNESSEE HOSPITAL CORPORATION,111 HIGHWAY 70 E,,DICKSON,TN,37055
7927,1871530832,108311749,2.0,282N00000X,General Acute Care Hospital,,CENTRAL TENNESSEE HOSPITAL CORPORATION,111 HIGHWAY 70 E,,DICKSON,TN,37055
7926,1871530832,108311748,2.0,282N00000X,General Acute Care Hospital,,CENTRAL TENNESSEE HOSPITAL CORPORATION,111 HIGHWAY 70 E,,DICKSON,TN,37055
7925,1871530832,106475420,2.0,282N00000X,General Acute Care Hospital,,CENTRAL TENNESSEE HOSPITAL CORPORATION,111 HIGHWAY 70 E,,DICKSON,TN,37055
7924,1871530832,104580694,2.0,282N00000X,General Acute Care Hospital,,CENTRAL TENNESSEE HOSPITAL CORPORATION,111 HIGHWAY 70 E,,DICKSON,TN,37055
...,...,...,...,...,...,...,...,...,...,...,...,...
10878,1265445506,7541090,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10877,1265445506,5204129,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10876,1265445506,5204128,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
11066,1265445506,132420210,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067


In [11]:
gach.drop_duplicates(subset = ['organization', 'address_line1', 'city', 'state', 'zip']).reset_index().sort_values('organization')

Unnamed: 0,index,to_npi,referral_id,entity_type_code,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
0,7974,1871530832,159858289,2.0,282N00000X,General Acute Care Hospital,,CENTRAL TENNESSEE HOSPITAL CORPORATION,111 HIGHWAY 70 E,,DICKSON,TN,37055
1,34284,1932146032,21680691,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
2,185,1265487193,13418843,2.0,282NC0060X,General Acute Care Hospital,Critical Access,"HCA HEALTH SERVICES OF TENNESSEE, INC.",313 N MAIN ST,,ASHLAND CITY,TN,37015
3,49958,1720032345,23140295,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",391 WALLACE RD,,NASHVILLE,TN,37211
4,16723,1982650024,174694201,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",5655 FRIST BLVD,,HERMITAGE,TN,37076
5,24887,1992776405,203872372,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",200 STONECREST BLVD,,SMYRNA,TN,37167
6,14815,1538114434,115935786,2.0,282N00000X,General Acute Care Hospital,,HENDERSONVILLE HOSPITAL CORPORATION,355 NEW SHACKLE ISLAND RD,,HENDERSONVILLE,TN,37075
7,45389,1295780476,156776284,2.0,282N00000X,General Acute Care Hospital,,HTI MEMORIAL HOSPITAL CORPORATION,3441 DICKERSON PIKE,,NASHVILLE,TN,37207
8,17754,1417938846,67910007,2.0,282NC0060X,General Acute Care Hospital,Critical Access,"MACON COUNTY GENERAL HOSPITAL, INC.",204 MEDICAL DRIVE,,LAFAYETTE,TN,37083
9,67056,1861479545,150018678,2.0,282N00000X,General Acute Care Hospital,,MAURY REGIONAL HOSPITAL,1224 TROTWOOD AVE,,COLUMBIA,TN,38401


### Look at all Vanderbilt organizations

In [12]:
(groups
     .loc[groups['organization'].str.contains('VANDERBILT', na = False),
          ['organization', 'classification', 'specialization', 'zip']]
     .drop_duplicates()
)

Unnamed: 0,organization,classification,specialization,zip
8700,VANDERBILT HEALTH AND WILLIAMSON MEDICAL CENTE...,Internal Medicine,,37064
18178,VANDERBILT UNIVERSITY MEDICAL CENTER,Psychiatric Unit,,37087
18182,VANDERBILT UNIVERSITY MEDICAL CENTER,Rehabilitation Unit,,37087
18185,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,37087
26037,"VANDERBILT MAURY RADIATION ONCOLOGY, LLC",Clinic/Center,"Oncology, Radiation",37174
41143,VANDERBILT UNIVERSITY MEDICAL CENTER,Clinic/Center,End-Stage Renal Disease (ESRD) Treatment,37204
44951,"VANDERBILT IMAGING SERVICES, LLC",Clinic/Center,Radiology,37205
50958,VANDERBILT STALLWORTH REHABILITATION HOSPITAL LP,Rehabilitation Hospital,,37212
51038,"VANDERBILT IMAGING SERVICES, LLC",Radiology,Diagnostic Radiology,37212
51105,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,37212


### Look at all classifications containing 'Hospital'

In [13]:
cities = ['NASHVILLE',
          'MURFREESBORO',
          'BRENTWOOD',
          'FRANKLIN',
          'HENDERSONVILLE',
          'GOODLETTSVILLE',
          'LA VERGNE',
          'SMYRNA',
          'MT. JULIET',
          'GALLATIN']
# not currently using the cities list above
hospitals = (groups
     .loc[(groups['classification'].str.contains('Hospital', na = False)) 
#            & 
#            (groups['city'].isin(cities))
           ,
          ]
     .drop_duplicates()
     .sort_values('organization')
)

hospitals

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
24396,1396160768,152767826,2.0,208M00000X,Hospitalist,,"APP OF TENNESSEE HM, PLLC",2835 HIGHWAY 231 N,,SHELBYVILLE,TN,37160
24374,1396160768,109754719,2.0,208M00000X,Hospitalist,,"APP OF TENNESSEE HM, PLLC",2835 HIGHWAY 231 N,,SHELBYVILLE,TN,37160
24375,1396160768,113517203,2.0,208M00000X,Hospitalist,,"APP OF TENNESSEE HM, PLLC",2835 HIGHWAY 231 N,,SHELBYVILLE,TN,37160
24376,1396160768,117634620,2.0,208M00000X,Hospitalist,,"APP OF TENNESSEE HM, PLLC",2835 HIGHWAY 231 N,,SHELBYVILLE,TN,37160
24377,1396160768,119158843,2.0,208M00000X,Hospitalist,,"APP OF TENNESSEE HM, PLLC",2835 HIGHWAY 231 N,,SHELBYVILLE,TN,37160
...,...,...,...,...,...,...,...,...,...,...,...,...
11097,1265445506,144711215,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
11098,1265445506,144711218,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
11099,1265445506,144711223,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
11091,1265445506,140783036,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067


### Look at the referrals data

In [14]:
referrals

Unnamed: 0,from_npi,to_npi,referral_id,patient_count,transaction_count,average_day_wait,std_day_wait
0,1821080961,1013012616,18854880,91,238,9.924,40.266
1,1841282779,1013012616,131966153,94,236,7.695,36.447
2,1770575607,1013012616,146169993,115,320,2.725,18.756
3,1508804337,1902804271,320488,157,163,27.583,51.939
4,1538109103,1902804271,6754370,312,320,12.709,40.583
...,...,...,...,...,...,...,...
69422,1558355941,1669872735,30083148,31,89,23.101,31.695
69423,1023099074,1669872735,114150220,256,1309,0.214,5.207
69424,1184619124,1669872735,178563129,40,90,23.089,36.995
69425,1205931565,1669872735,184096138,35,54,1.315,6.815


### Look at all providers who recommending to Nashville CBSA from out of state

In [15]:
providers[providers['state'] != 'TN']

Unnamed: 0,from_npi,referral_id,entity_type_code,name,taxonomy_code,classification,specialization,organization,address_line1,address_line2,city,state,zip
22,1427591627,44512612,1.0,"BECKMAN, LAUREN",363L00000X,Nurse Practitioner,,,18 ELMS CT,,HATTIESBURG,MS,39402
41,1457888976,70281243,1.0,"MOLINA, DANIELLE",363A00000X,Physician Assistant,,,400 COURT ST STE 100,,CHARLESTON,WV,25301
52,1669456307,104646182,1.0,"JOHNSTON, EVAN",367500000X,"Nurse Anesthetist, Certified Registered",,,3600 RIVERS AVE,,CHARLESTON,SC,29405
59,1043228935,118660569,1.0,"D'ANGELO, PAUL",2085R0202X,Radiology,Diagnostic Radiology,,5655 HUDSON DR STE 210,ARIS RADIOLOGY,HUDSON,OH,44236
110,1669456307,103864784,1.0,"JOHNSTON, EVAN",367500000X,"Nurse Anesthetist, Certified Registered",,,3600 RIVERS AVE,,CHARLESTON,SC,29405
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69399,1598994147,80210852,1.0,"HARRIS, JIM",207P00000X,Emergency Medicine,,,2500 N STATE ST,UMC DEPART MENT OF EMERGENCY MEDICINE,JACKSON,MS,39216
69409,1063598373,25237153,1.0,"BURGESS, BERNARD",208600000X,Surgery,,,3601 SW 160TH AVE,SUITE 250,MIRAMAR,FL,33027
69411,1598994147,81057240,1.0,"HARRIS, JIM",207P00000X,Emergency Medicine,,,2500 N STATE ST,UMC DEPART MENT OF EMERGENCY MEDICINE,JACKSON,MS,39216
69419,1386976207,211436182,1.0,"ROTH, JASON",2085R0202X,Radiology,Diagnostic Radiology,,INTEGRA IMAGING PS,1200 WESTWOOD DRIVE,HAMILTON,MT,59840


# Build formula to cleanly build full address, add to dfs

In [16]:
def build_address(df):
    address = ((df['address_line1'] + ', ' + df['address_line2']).mask(pd.isna, df['address_line1'])
         + ', ' 
         + df['city'] 
         + ', ' 
         + df['state']
         + ' '
         + df['zip']
    )
    return address

In [17]:
build_address(hospitals)

24396     2835 HIGHWAY 231 N, SHELBYVILLE, TN 37160
24374     2835 HIGHWAY 231 N, SHELBYVILLE, TN 37160
24375     2835 HIGHWAY 231 N, SHELBYVILLE, TN 37160
24376     2835 HIGHWAY 231 N, SHELBYVILLE, TN 37160
24377     2835 HIGHWAY 231 N, SHELBYVILLE, TN 37160
                            ...                    
11097    4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
11098    4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
11099    4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
11091    4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
10950    4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
Length: 9562, dtype: object

In [18]:
providers['address'] = build_address(providers)
hospitals['address'] = build_address(hospitals)
gach['address'] = build_address(gach)

# Combine relevant dfs to make full_refs using gach

In [19]:
full_refs = (providers.merge(gach, 
                on = 'referral_id',
               suffixes = ('_provider', '_hospital'))
          .merge(referrals,
                on = ['referral_id', 'from_npi', 'to_npi'])
)

full_refs

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,classification_provider,specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
0,1790730448,13418843,1.0,"STAFFORD, JAMES",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",65,67,24.552,50.253
1,1790762219,13418844,1.0,"GOODIN, ELLIS",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",61,63,17.238,35.569
2,1801017116,13418846,1.0,"STEWART, BRANDY",363LF0000X,Nurse Practitioner,Family,,313 N MAIN ST,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",104,186,19.129,51.613
3,1811077712,17417315,1.0,"SHOEMAKER, BENJAMIN",207Q00000X,Family Medicine,,,302 N MAIN ST,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",50,90,17.611,47.253
4,1821060526,19402345,1.0,"WATERS, RONALD",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",62,66,12.697,29.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9063,1396713715,214911952,1.0,"ARTHUR, SCOTT",207XX0005X,Orthopaedic Surgery,Sports Medicine,,4323 CAROTHERS PKWY STE 201,,...,1224 TROTWOOD AVE,,COLUMBIA,TN,38401,"1224 TROTWOOD AVE, COLUMBIA, TN 38401",40,73,30.151,51.364
9064,1407141773,216796225,1.0,"KAYS, KELLY",207R00000X,Internal Medicine,,,1224 TROTWOOD AVE,,...,1224 TROTWOOD AVE,,COLUMBIA,TN,38401,"1224 TROTWOOD AVE, COLUMBIA, TN 38401",387,927,0.019,0.591
9065,1407141302,216796226,1.0,"SHEPHERD, JOANNA",207R00000X,Internal Medicine,,,1224 TROTWOOD AVE,,...,1224 TROTWOOD AVE,,COLUMBIA,TN,38401,"1224 TROTWOOD AVE, COLUMBIA, TN 38401",224,582,0.153,3.689
9066,1407384480,216796227,1.0,"WENINEGAR, BRITTANY",363LF0000X,Nurse Practitioner,Family,,44 HUGHES RD STE 100,,...,1224 TROTWOOD AVE,,COLUMBIA,TN,38401,"1224 TROTWOOD AVE, COLUMBIA, TN 38401",149,190,47.326,55.828


In [20]:
full_refs.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9068 entries, 0 to 9067
Data columns (total 30 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   from_npi                   9068 non-null   int64  
 1   referral_id                9068 non-null   int64  
 2   entity_type_code_provider  9068 non-null   float64
 3   name                       9066 non-null   object 
 4   taxonomy_code_provider     9068 non-null   object 
 5   classification_provider    9068 non-null   object 
 6   specialization_provider    4614 non-null   object 
 7   organization_provider      0 non-null      object 
 8   address_line1_provider     9068 non-null   object 
 9   address_line2_provider     3361 non-null   object 
 10  city_provider              9068 non-null   object 
 11  state_provider             9068 non-null   object 
 12  zip_provider               9068 non-null   object 
 13  address_provider           9068 non-null   objec

In [21]:
full_refs[full_refs['name'].isna()]

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,classification_provider,specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
2093,1215064852,183898559,1.0,,2084N0402X,Psychiatry & Neurology,Neurology with Special Qualifications in Child...,,1222 TROTWOOD AVE,SUITE 101,...,1080 N ELLINGTON PKWY,,LEWISBURG,TN,37091,"1080 N ELLINGTON PKWY, LEWISBURG, TN 37091",41,67,19.552,32.098
8973,1215064852,184505060,1.0,,2084N0402X,Psychiatry & Neurology,Neurology with Special Qualifications in Child...,,1222 TROTWOOD AVE,SUITE 101,...,1224 TROTWOOD AVE,,COLUMBIA,TN,38401,"1224 TROTWOOD AVE, COLUMBIA, TN 38401",301,480,27.86,41.48


## Write full_refs to CSV for Neo4J

In [22]:
full_refs.to_csv('../data/neo4j/full_refs.csv', index = False)

# Combine relevant dfs to make full_refs_hosp using hospitals

In [23]:
full_refs_hosp = (providers.merge(hospitals, 
                on = 'referral_id',
               suffixes = ('_provider', '_hospital'))
          .merge(referrals,
                on = ['referral_id', 'from_npi', 'to_npi'])
)

full_refs_hosp

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,classification_provider,specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
0,1790730448,13418843,1.0,"STAFFORD, JAMES",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",65,67,24.552,50.253
1,1790762219,13418844,1.0,"GOODIN, ELLIS",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",61,63,17.238,35.569
2,1801017116,13418846,1.0,"STEWART, BRANDY",363LF0000X,Nurse Practitioner,Family,,313 N MAIN ST,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",104,186,19.129,51.613
3,1811077712,17417315,1.0,"SHOEMAKER, BENJAMIN",207Q00000X,Family Medicine,,,302 N MAIN ST,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",50,90,17.611,47.253
4,1821060526,19402345,1.0,"WATERS, RONALD",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N STE 1204,,...,313 N MAIN ST,,ASHLAND CITY,TN,37015,"313 N MAIN ST, ASHLAND CITY, TN 37015",62,66,12.697,29.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9557,1538153556,6720971,1.0,"POLING, RODNEY",2084P0800X,Psychiatry & Neurology,Psychiatry,,1402 ROSEWOOD DRIVE,,...,1400 ROSEWOOD DR,,COLUMBIA,TN,38401,"1400 ROSEWOOD DR, COLUMBIA, TN 38401",178,2307,0.125,4.677
9558,1467428680,70254221,1.0,"POWERS, RICHARD",207Q00000X,Family Medicine,,,1605 NASHVILLE HWY,SUITE 200,...,1400 ROSEWOOD DR,,COLUMBIA,TN,38401,"1400 ROSEWOOD DR, COLUMBIA, TN 38401",125,196,0.153,1.819
9559,1891708012,94988082,1.0,"GENTRY, SHAWN",207Q00000X,Family Medicine,,,1605 NASHVILLE HWY,SUITE 200,...,1400 ROSEWOOD DR,,COLUMBIA,TN,38401,"1400 ROSEWOOD DR, COLUMBIA, TN 38401",141,195,0.287,3.271
9560,1164748430,165344879,1.0,"NORTON, LINDSAY",2084P0800X,Psychiatry & Neurology,Psychiatry,,1405 BROWNS LN,,...,1400 ROSEWOOD DR,,COLUMBIA,TN,38401,"1400 ROSEWOOD DR, COLUMBIA, TN 38401",74,192,0.151,1.888


## Write full_refs_hosp to CSV for another notebook

In [24]:
full_refs_hosp.to_csv('../data/neo4j/full_refs_hosp.csv', index = False)