In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import sqlite3
import matplotlib.pyplot as plt

In [2]:
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time

query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT *
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT npi
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT 
            to_npi,
            referral_id,
            qf.entity_type_code,
            qf.taxonomy_code,
            classification,
            specialization,
            qf."provider_organization_name_(legal_business_name)" AS organization,
            qf.provider_first_line_business_practice_location_address AS address_line1,
            qf.provider_second_line_business_practice_location_address AS address_line2,
            qf.provider_business_practice_location_address_city_name AS city,
            qf.provider_business_practice_location_address_state_name AS state,
            qf.provider_business_practice_location_address_postal_code AS zip
        FROM qualifying_refs AS qf
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""
with sqlite3.connect('data/hopteam.sqlite') as db:
    groups = pd.read_sql(query, db)

In [3]:
groups

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
0,1013012616,18854880,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
1,1013012616,131966153,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
2,1013012616,146169993,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
3,1902804271,320488,2.0,208VP0014X,Pain Medicine,Interventional Pain Medicine,"PAIN MANAGEMENT GROUP, P.C.",5801 CROSSINGS BLVD,,ANTIOCH,TN,37013
4,1902804271,6754370,2.0,208VP0014X,Pain Medicine,Interventional Pain Medicine,"PAIN MANAGEMENT GROUP, P.C.",5801 CROSSINGS BLVD,,ANTIOCH,TN,37013
...,...,...,...,...,...,...,...,...,...,...,...,...
69422,1669872735,30083148,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69423,1669872735,114150220,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69424,1669872735,178563129,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69425,1669872735,184096138,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563


In [4]:
# profiles of all providers
# with referrals to metro nashville groups (entity 2) 
#having referrals of over 50 transactions and under 50 average wait time

query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT 
                referral_id
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            referral_id,
            entity_type_code,
            "provider_last_name_(legal_name)" || ', ' || provider_first_name AS name,
            taxonomy_code,
            classification,
            specialization,
            "provider_organization_name_(legal_business_name)" AS organization,
            provider_first_line_business_practice_location_address AS address_line1,
            provider_second_line_business_practice_location_address AS address_line2,
            provider_business_practice_location_address_city_name AS city,
            provider_business_practice_location_address_state_name AS state,
            provider_business_practice_location_address_postal_code AS zip     
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('data/hopteam.sqlite') as db:
    providers = pd.read_sql(query, db)

In [5]:
providers

Unnamed: 0,from_npi,referral_id,entity_type_code,name,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
0,1821080961,18854880,1.0,"SHERWOOD, WILLIAM",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
1,1841282779,131966153,1.0,"RHODY, KEVIN",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
2,1770575607,146169993,1.0,"COOPER, STEVEN",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
3,1508804337,320488,1.0,"SHARMA, VINEET",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
4,1538109103,6754370,1.0,"NAU, PAUL",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69422,1558355941,30083148,1.0,"JOSEPH, JOJU",207RN0300X,Internal Medicine,Nephrology,,270 EAST MAIN STREET,SUITE 200,GALLATIN,TN,37066
69423,1023099074,114150220,1.0,"LAMBERT, HEATHER",363LW0102X,Nurse Practitioner,Women's Health,,507 GORDONSVILLE HWY,SUITE 203,GORDONSVILLE,TN,38563
69424,1184619124,178563129,1.0,"RUTHERFORD, RICHARD",207Q00000X,Family Medicine,,,133 HOSPITAL DR,SUITE 500,CARTHAGE,TN,37030
69425,1205931565,184096138,1.0,"NOBLE, MICHAEL",207Q00000X,Family Medicine,,,37 PALMER ST,,CALAIS,ME,04619


### Top providers in terms of specialty 

In [None]:
providers['Classification'].value_counts().head(20)

In [None]:
providers['Classification'].value_counts().head(15).plot(kind='bar', figsize= (10,6))
plt.xlabel('Specialties')
plt.ylabel('Count of Referrals')
plt.title('Referrals by Specialty')


In [None]:
providers.groupby(['name','Classification']).sum()

In [9]:
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time

query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT
                to_npi,
                referral_id,
                patient_count,
                transaction_count,
                average_day_wait,
                std_day_wait  
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            to_npi,
            referral_id,
            patient_count,
            transaction_count,
            average_day_wait,
            std_day_wait
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('data/hopteam.sqlite') as db:
    referrals = pd.read_sql(query, db)

In [10]:
referrals

Unnamed: 0,from_npi,to_npi,referral_id,patient_count,transaction_count,average_day_wait,std_day_wait
0,1821080961,1013012616,18854880,91,238,9.924,40.266
1,1841282779,1013012616,131966153,94,236,7.695,36.447
2,1770575607,1013012616,146169993,115,320,2.725,18.756
3,1508804337,1902804271,320488,157,163,27.583,51.939
4,1538109103,1902804271,6754370,312,320,12.709,40.583
...,...,...,...,...,...,...,...
69422,1558355941,1669872735,30083148,31,89,23.101,31.695
69423,1023099074,1669872735,114150220,256,1309,0.214,5.207
69424,1184619124,1669872735,178563129,40,90,23.089,36.995
69425,1205931565,1669872735,184096138,35,54,1.315,6.815


In [None]:
density_patient_count = (referrals
    .groupby('from_npi')['transaction_count']
    .mean()
    .sort_values(ascending = False)
)
density_patient_count

In [None]:
#average count of patients by referring npi
density_patient_count = (referrals
    .groupby('from_npi')['patient_count']
    .mean()
    .sort_values(ascending = False)
)
density_patient_count

In [None]:
referrals[referrals['from_npi']==1457649139]

### Top 6 provider referrals are from Berntwood, TN

In [None]:
providers[providers['from_npi']==1457649139]

In [None]:
providers[providers['from_npi']==1891955688]

In [None]:
providers[providers['from_npi']==1720314248]

In [None]:
providers[providers['from_npi']==1609848563]

In [None]:
providers[providers['from_npi']==1518046796]

In [70]:
# General Acute Care Hospitals in Nashville and nearby 


cities = ['NASHVILLE',
          'MURFREESBORO',
          'BRENTWOOD',
          'FRANKLIN',
          'HENDERSONVILLE',
          'GOODLETTSVILLE',
          'LA VERGNE',
          'SMYRNA',
          'MT. JULIET',
          'GALLATIN']

gach = (groups
     .loc[(groups['Classification'].str.contains('General Acute Care Hospital', na = False)) 
            & 
           (groups['city'].isin(cities))
           ,
          ]
     .drop_duplicates()
     .sort_values('organization')
)

gach

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
33687,1023055126,48731386,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33707,1023055126,53473559,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33708,1023055126,55525652,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33709,1023055126,55525663,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33710,1023055126,55525676,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
...,...,...,...,...,...,...,...,...,...,...,...,...
10962,1265445506,53960238,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10961,1265445506,53960229,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10960,1265445506,52260376,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10967,1265445506,55966757,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067


In [20]:
gach.drop_duplicates(subset = ['organization', 'address_line1', 'city', 'state', 'zip']).reset_index().sort_values('organization')


Unnamed: 0,index,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
0,33687,1023055126,48731386,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
1,50076,1720032345,155535970,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",391 WALLACE RD,,NASHVILLE,TN,37211
2,24794,1992776405,100689576,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",200 STONECREST BLVD,,SMYRNA,TN,37167
3,14903,1538114434,169422729,2.0,282N00000X,General Acute Care Hospital,,HENDERSONVILLE HOSPITAL CORPORATION,355 NEW SHACKLE ISLAND RD,,HENDERSONVILLE,TN,37075
4,45127,1093769606,20019626,2.0,282N00000X,General Acute Care Hospital,,HTI MEMORIAL HOSPITAL CORPORATION,3441 DICKERSON PIKE,,NASHVILLE,TN,37207
5,46796,1568551455,132969432,2.0,282N00000X,General Acute Care Hospital,,NASHVILLE GENERAL HOSPITAL,1818 ALBION ST,ATTN: MARTHA LAMPLEY,NASHVILLE,TN,37208
6,22376,1164590386,117188077,2.0,282N00000X,General Acute Care Hospital,,SAINT THOMAS RUTHERFORD HOSPITAL,1700 MEDICAL CENTER PKWY,,MURFREESBORO,TN,37129
7,44234,1629025648,176011269,2.0,282N00000X,General Acute Care Hospital,,SAINT THOMAS WEST HOSPITAL,4220 HARDING RD,,NASHVILLE,TN,37205
8,66179,1780778969,194249596,2.0,282N00000X,General Acute Care Hospital,,SAINT THOMAS WEST HOSPITAL,2000 CHURCH ST,,NASHVILLE,TN,37236
9,66249,1992861314,106709285,2.0,282N00000X,General Acute Care Hospital,,SETON CORPORATION,2000 CHURCH ST,,NASHVILLE,TN,37236


In [21]:
#Vanderbilt organizations
(groups
     .loc[groups['organization'].str.contains('VANDERBILT', na = False),
          ['organization', 'Classification', 'Specialization', 'zip']]
     .drop_duplicates()
)

Unnamed: 0,organization,Classification,Specialization,zip
8700,VANDERBILT HEALTH AND WILLIAMSON MEDICAL CENTE...,Internal Medicine,,37064
18178,VANDERBILT UNIVERSITY MEDICAL CENTER,Psychiatric Unit,,37087
18182,VANDERBILT UNIVERSITY MEDICAL CENTER,Rehabilitation Unit,,37087
18185,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,37087
26037,"VANDERBILT MAURY RADIATION ONCOLOGY, LLC",Clinic/Center,"Oncology, Radiation",37174
41143,VANDERBILT UNIVERSITY MEDICAL CENTER,Clinic/Center,End-Stage Renal Disease (ESRD) Treatment,37204
44951,"VANDERBILT IMAGING SERVICES, LLC",Clinic/Center,Radiology,37205
50958,VANDERBILT STALLWORTH REHABILITATION HOSPITAL LP,Rehabilitation Hospital,,37212
51038,"VANDERBILT IMAGING SERVICES, LLC",Radiology,Diagnostic Radiology,37212
51105,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital,,37212


In [22]:
#all classifications containing 'Hospital'

cities = ['NASHVILLE',
          'MURFREESBORO',
          'BRENTWOOD',
          'FRANKLIN',
          'HENDERSONVILLE',
          'GOODLETTSVILLE',
          'LA VERGNE',
          'SMYRNA',
          'MT. JULIET',
          'GALLATIN']

hospitals = (groups
     .loc[(groups['Classification'].str.contains('Hospital', na = False)) 
           & 
            (groups['city'].isin(cities))
           ,
          ]
     .drop_duplicates()
     .sort_values('organization')
)

hospitals

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
30398,1336119627,150892114,2.0,284300000X,Special Hospital,,BAPTIST WOMEN'S HEALTH CENTER LLC,2011 MURPHY AVE STE 400,,NASHVILLE,TN,37203
30396,1336119627,140924362,2.0,284300000X,Special Hospital,,BAPTIST WOMEN'S HEALTH CENTER LLC,2011 MURPHY AVE STE 400,,NASHVILLE,TN,37203
30395,1336119627,132542700,2.0,284300000X,Special Hospital,,BAPTIST WOMEN'S HEALTH CENTER LLC,2011 MURPHY AVE STE 400,,NASHVILLE,TN,37203
30394,1336119627,125171740,2.0,284300000X,Special Hospital,,BAPTIST WOMEN'S HEALTH CENTER LLC,2011 MURPHY AVE STE 400,,NASHVILLE,TN,37203
30393,1336119627,122892853,2.0,284300000X,Special Hospital,,BAPTIST WOMEN'S HEALTH CENTER LLC,2011 MURPHY AVE STE 400,,NASHVILLE,TN,37203
...,...,...,...,...,...,...,...,...,...,...,...,...
10993,1265445506,74413116,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10992,1265445506,74413113,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10991,1265445506,72681241,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10989,1265445506,67595932,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067


In [23]:
#build full address, add to dfs

def build_address(df):
    address = ((df['address_line1'] + ', ' + df['address_line2']).mask(pd.isna, df['address_line1'])
         + ', ' 
         + df['city'] 
         + ', ' 
         + df['state']
         + ' '
         + df['zip']
    )
    return address

In [24]:
build_address(hospitals)

30398    2011 MURPHY AVE STE 400, NASHVILLE, TN 37203
30396    2011 MURPHY AVE STE 400, NASHVILLE, TN 37203
30395    2011 MURPHY AVE STE 400, NASHVILLE, TN 37203
30394    2011 MURPHY AVE STE 400, NASHVILLE, TN 37203
30393    2011 MURPHY AVE STE 400, NASHVILLE, TN 37203
                             ...                     
10993      4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
10992      4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
10991      4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
10989      4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
11069      4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067
Length: 7254, dtype: object

In [25]:
providers['address'] = build_address(providers)
hospitals['address'] = build_address(hospitals)
gach['address'] = build_address(gach)

In [26]:
#Combine relevant dfs to make full_refs

full_refs = (providers.merge(gach, 
                on = 'referral_id',
               suffixes = ('_provider', '_hospital'))
          .merge(referrals,
                on = ['referral_id', 'from_npi', 'to_npi'])
)

full_refs

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,Classification_provider,Specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
0,1548466808,9550017,1.0,"JONES, MATTHEW",2085R0202X,Radiology,Diagnostic Radiology,,1265 UNION AVE,,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",121,135,32.563,62.417
1,1558355941,9550019,1.0,"JOSEPH, JOJU",207RN0300X,Internal Medicine,Nephrology,,270 EAST MAIN STREET,SUITE 200,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",81,292,6.767,30.330
2,1790891315,13457790,1.0,"RUSSO, DONALD",207RC0000X,Internal Medicine,Cardiovascular Disease,,353 NEW SHACKLE ISLAND RD,#300C,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",70,71,47.254,75.620
3,1053366369,20389882,1.0,"NABORS, GLENN",2085R0202X,Radiology,Diagnostic Radiology,,620 HARTSVILLE PIKE,,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",141,154,38.636,68.659
4,1043663719,20389883,1.0,"GUSTAFSON, STEPHANIE",367500000X,"Nurse Anesthetist, Certified Registered",,,110 29TH AVE N,SUITE 201,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",71,73,5.808,21.015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6870,1174565543,167471199,1.0,"DOSS, HABIB",207RH0003X,Internal Medicine,Hematology & Oncology,,2004 HAYES ST STE 350,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",69,143,3.273,26.756
6871,1942254883,174721235,1.0,"BURCHAM, ROBERT",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",52,65,13.154,32.501
6872,1194751354,181082032,1.0,"PEACOCK, NANCY",207RX0202X,Internal Medicine,Medical Oncology,,2004 HAYES ST STE 350,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",37,64,12.641,38.231
6873,1194743914,181082033,1.0,"PENLEY, WILLIAM",207RX0202X,Internal Medicine,Medical Oncology,,2004 HAYES ST STE 350,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",43,69,23.420,48.484


In [54]:
#Combining relevant dfs to make full_refs_hosp

full_refs_hosp = (providers.merge(hospitals, 
                on = 'referral_id',
               suffixes = ('_provider', '_hospital'))
          .merge(referrals,
                on = ['referral_id', 'from_npi', 'to_npi'])
)

full_refs_hosp

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,Classification_provider,Specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
0,1548466808,9550017,1.0,"JONES, MATTHEW",2085R0202X,Radiology,Diagnostic Radiology,,1265 UNION AVE,,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",121,135,32.563,62.417
1,1558355941,9550019,1.0,"JOSEPH, JOJU",207RN0300X,Internal Medicine,Nephrology,,270 EAST MAIN STREET,SUITE 200,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",81,292,6.767,30.330
2,1790891315,13457790,1.0,"RUSSO, DONALD",207RC0000X,Internal Medicine,Cardiovascular Disease,,353 NEW SHACKLE ISLAND RD,#300C,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",70,71,47.254,75.620
3,1053366369,20389882,1.0,"NABORS, GLENN",2085R0202X,Radiology,Diagnostic Radiology,,620 HARTSVILLE PIKE,,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",141,154,38.636,68.659
4,1043663719,20389883,1.0,"GUSTAFSON, STEPHANIE",367500000X,"Nurse Anesthetist, Certified Registered",,,110 29TH AVE N,SUITE 201,...,555 HARTSVILLE PIKE,,GALLATIN,TN,37066,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",71,73,5.808,21.015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7249,1174565543,167471199,1.0,"DOSS, HABIB",207RH0003X,Internal Medicine,Hematology & Oncology,,2004 HAYES ST STE 350,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",69,143,3.273,26.756
7250,1942254883,174721235,1.0,"BURCHAM, ROBERT",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",52,65,13.154,32.501
7251,1194751354,181082032,1.0,"PEACOCK, NANCY",207RX0202X,Internal Medicine,Medical Oncology,,2004 HAYES ST STE 350,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",37,64,12.641,38.231
7252,1194743914,181082033,1.0,"PENLEY, WILLIAM",207RX0202X,Internal Medicine,Medical Oncology,,2004 HAYES ST STE 350,,...,2000 CHURCH ST,,NASHVILLE,TN,37236,"2000 CHURCH ST, NASHVILLE, TN 37236",43,69,23.420,48.484


In [28]:
#list of hospitals in the dataset
full_refs_hosp['organization_hospital'].unique()

array(['SUMNER REGIONAL HEALTH SYSTEMS, INC',
       'SUMNER REGIONAL MEDICAL CENTER LLC',
       'WILLIAMSON COUNTY HOSPITAL DISTRICT',
       'ROLLING HILLS HOSPITAL LLC',
       'ENCOMPASS HEALTH REHABILITATION HOSPITAL OF FRANKLIN, LLC',
       'HENDERSONVILLE HOSPITAL CORPORATION', 'TRUSTPOINT HOSPITAL, LLC',
       'SAINT THOMAS RUTHERFORD HOSPITAL',
       'HCA HEALTH SERVICES OF TENNESSEE, INC.',
       "BAPTIST WOMEN'S HEALTH CENTER LLC", 'SAINT THOMAS WEST HOSPITAL',
       'HTI MEMORIAL HOSPITAL CORPORATION',
       'MIDDLE TENNESSEE HOSPITALIST', 'NASHVILLE GENERAL HOSPITAL',
       'VANDERBILT STALLWORTH REHABILITATION HOSPITAL LP',
       'VANDERBILT UNIVERSITY MEDICAL CENTER',
       'STATE OF TENNESSEE STATE F&A PAYROLL',
       'CRESCENT MEDICAL GROUP PLLC', 'CURAHEALTH NASHVILLE, LLC',
       'SELECT SPECIALTY HOSPITAL - NASHVILLE LLC', 'SETON CORPORATION'],
      dtype=object)

In [51]:
#list of doctors referring patients to TriStar Centennial Medical Center

HCA=full_refs_hosp[full_refs_hosp['organization_hospital']== 'HCA HEALTH SERVICES OF TENNESSEE, INC.']
HCA.nlargest(40, 'patient_count')

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,Classification_provider,Specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,address_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
1823,1093753303,29474231,1.0,"WELCH, DEREK",207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,,5301 VIRGINIA WAY,SUITE 300,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",9196,16819,9.041,32.187
2476,1417131715,217106230,1.0,"RIDDICK, JOHN",207RI0011X,Internal Medicine,Interventional Cardiology,,2400 PATTERSON ST,SUITE 502,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",5571,9007,0.578,8.745
3876,1093753303,149736850,1.0,"WELCH, DEREK",207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,,5301 VIRGINIA WAY,SUITE 300,...,391 WALLACE RD,,NASHVILLE,TN,37211,"391 WALLACE RD, NASHVILLE, TN 37211",2610,5166,8.39,30.802
1595,1093753303,150281402,1.0,"WELCH, DEREK",207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,,5301 VIRGINIA WAY,SUITE 300,...,200 STONECREST BLVD,,SMYRNA,TN,37167,"200 STONECREST BLVD, SMYRNA, TN 37167",2060,3436,15.872,37.309
2082,1902823859,95264287,1.0,"HUNEYCUTT, DAVID",207RC0000X,Internal Medicine,Cardiovascular Disease,,2400 PATTERSON ST,SUITE 502,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",1935,2974,4.444,20.451
2213,1750410452,142389439,1.0,"YOUNG, ROBERT",2085R0204X,Radiology,Vascular & Interventional Radiology,,210 25TH AVE N STE 1204,,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",1663,2079,2.392,17.119
1830,1558461970,30764459,1.0,"WHEATLEY, ROBERT",207RI0011X,Internal Medicine,Interventional Cardiology,,2400 PATTERSON ST,SUITE 502,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",1634,2401,2.257,15.758
2275,1144283896,160175351,1.0,"TEPPER, PATRICIA",2085R0202X,Radiology,Diagnostic Radiology,,210 25TH AVE N,SUITE 602,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",1501,1692,4.113,22.752
3882,1114961513,155535969,1.0,"HUMPHREY, STEVEN",207RC0000X,Internal Medicine,Cardiovascular Disease,,3601 THE VANDERBILT CLINIC,,...,391 WALLACE RD,,NASHVILLE,TN,37211,"391 WALLACE RD, NASHVILLE, TN 37211",1487,2878,6.929,27.265
2248,1114101128,152119863,1.0,"PATEL, PARAG",207R00000X,Internal Medicine,,,2400 PATTERSON ST,STE 502,...,2300 PATTERSON ST,,NASHVILLE,TN,37203,"2300 PATTERSON ST, NASHVILLE, TN 37203",1378,2042,1.769,14.41


In [30]:
#top 20 taxonomies (classification/specialization) referring patients 

(full_refs.groupby(['taxonomy_code_provider', 'Classification_provider', 'Specialization_provider'])
     ['patient_count']
     .sum()
     .sort_values(ascending = False)
     .nlargest(20)
)



taxonomy_code_provider  Classification_provider  Specialization_provider                         
2085R0202X              Radiology                Diagnostic Radiology                                302876
207RC0000X              Internal Medicine        Cardiovascular Disease                              126991
207ZP0102X              Pathology                Anatomic Pathology & Clinical Pathology              49194
363LF0000X              Nurse Practitioner       Family                                               30923
207RP1001X              Internal Medicine        Pulmonary Disease                                    26743
207RN0300X              Internal Medicine        Nephrology                                           26353
363LA2100X              Nurse Practitioner       Acute Care                                           21791
207RG0100X              Internal Medicine        Gastroenterology                                     21705
207RI0011X              Internal Medic

In [78]:
#dataframe showing if the referral is to vanderbilt or not 

def make_not_to_vandy(df, group):

    # create a column that labels whether the recommendation went to Vanderbilt or not
    df['to_vandy'] = df['organization_hospital'].str.contains("VANDERBILT")

    # for each specialty, determine the count of patients recommended to and not recommended to Vanderbilt
    to_vandy = (df.groupby(group + ['to_vandy'])
         ['patient_count']
         .sum()
         .reset_index()
    )

    # add a column for overall count of referrals by specialty, then determine proportion
    # of referrals to Vanderbilt vs proportion not to Vanderbilt
    to_vandy['patient_count_overall'] = (to_vandy.groupby(group)
                                     ['patient_count']
                                     .transform(sum)
                                    )
    to_vandy['patient_prop'] = round((to_vandy['patient_count']/
                                     to_vandy['patient_count_overall']
                                    ), 2)

    # filter to focus just on referrals not to Vanderbilt
    not_to_vandy = to_vandy[to_vandy['to_vandy'] == False]
    
    return not_to_vandy.sort_values('patient_count', ascending = False)

In [79]:
#top 25 specialties not referring to vandy

spec_not_to_vandy = make_not_to_vandy(full_refs, ['taxonomy_code_provider', 'Classification_provider', 'Specialization_provider'])

spec_not_to_vandy.nlargest(25, 'patient_count')

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
141,2085R0202X,Radiology,Diagnostic Radiology,False,230982,302876,0.76
35,207RC0000X,Internal Medicine,Cardiovascular Disease,False,68782,126991,0.54
101,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,False,39049,49194,0.79
61,207RP1001X,Internal Medicine,Pulmonary Disease,False,21469,26743,0.8
55,207RI0011X,Internal Medicine,Interventional Cardiology,False,17511,20002,0.88
184,363LF0000X,Nurse Practitioner,Family,False,17198,30923,0.56
59,207RN0300X,Internal Medicine,Nephrology,False,16921,26353,0.64
43,207RG0100X,Internal Medicine,Gastroenterology,False,14090,21705,0.65
37,207RC0001X,Internal Medicine,Clinical Cardiac Electrophysiology,False,10870,14103,0.77
120,2084N0400X,Psychiatry & Neurology,Neurology,False,8834,18003,0.49


In [80]:
# specialties where at least 50% of patients are being referred somewhere other than Vanderbilt
spec_not_to_vandy[spec_not_to_vandy['patient_prop'] >= 0.5].nlargest(25, 'patient_count')

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
141,2085R0202X,Radiology,Diagnostic Radiology,False,230982,302876,0.76
35,207RC0000X,Internal Medicine,Cardiovascular Disease,False,68782,126991,0.54
101,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,False,39049,49194,0.79
61,207RP1001X,Internal Medicine,Pulmonary Disease,False,21469,26743,0.8
55,207RI0011X,Internal Medicine,Interventional Cardiology,False,17511,20002,0.88
184,363LF0000X,Nurse Practitioner,Family,False,17198,30923,0.56
59,207RN0300X,Internal Medicine,Nephrology,False,16921,26353,0.64
43,207RG0100X,Internal Medicine,Gastroenterology,False,14090,21705,0.65
37,207RC0001X,Internal Medicine,Clinical Cardiac Electrophysiology,False,10870,14103,0.77
68,207RX0202X,Internal Medicine,Medical Oncology,False,6670,11598,0.58


In [81]:
# specialties where all patients are referred somewhere other than Vanderbilt
spec_not_to_vandy[spec_not_to_vandy['patient_prop'] == 1].nlargest(25, 'patient_count')


Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
86,207XS0117X,Orthopaedic Surgery,Orthopaedic Surgery of the Spine,False,2290,2290,1.0
143,2085R0203X,Radiology,Therapeutic Radiology,False,218,218,1.0
149,2086S0105X,Surgery,Surgery of the Hand,False,194,194,1.0
109,2080P0207X,Pediatrics,Pediatric Hematology-Oncology,False,139,139,1.0
197,364SW0102X,Clinical Nurse Specialist,Women's Health,False,135,135,1.0
21,207QA0401X,Family Medicine,Addiction Medicine,False,81,81,1.0
9,163WG0600X,Registered Nurse,Gerontology,False,71,71,1.0
80,207WX0200X,Ophthalmology,Ophthalmic Plastic and Reconstructive Surgery,False,55,55,1.0
173,261QP2300X,Clinic/Center,Primary Care,False,55,55,1.0
7,1223S0112X,Dentist,Oral and Maxillofacial Surgery,False,38,38,1.0


In [82]:
#providers not referring patients to vanderbilt 

#In internal medicine, John Riddick and in pathology, Derek Welch should be reached out by vanderbilt to gain patients. 

prov_not_to_vandy = make_not_to_vandy(full_refs, ['from_npi', 'name', 'Classification_provider', 'Specialization_provider'])
prov_not_to_vandy.nlargest(25, 'patient_count')

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
199,1093753303,"WELCH, DEREK",Pathology,Anatomic Pathology & Clinical Pathology,False,15506,15506,1.0
1023,1417131715,"RIDDICK, JOHN",Internal Medicine,Interventional Cardiology,False,6168,6168,1.0
306,1134321235,"PARIKH, VIRAJ",Radiology,Diagnostic Radiology,False,4685,4685,1.0
97,1043232879,"GORDON, JONATHAN",Radiology,Diagnostic Radiology,False,4485,4485,1.0
1798,1710932017,"HIMMELFARB, ELLIOT",Radiology,Diagnostic Radiology,False,3601,3601,1.0
2136,1851339634,"HARNEY, IANTHA",Radiology,Diagnostic Radiology,False,3455,3455,1.0
1912,1760672026,"HAQUE, AMER",Radiology,Diagnostic Radiology,False,3401,3401,1.0
2306,1912984758,"LASSITER, GREGORY",Radiology,Diagnostic Radiology,False,3371,3371,1.0
2188,1871548818,"KLEIN, WILLIAM",Radiology,Diagnostic Radiology,False,3292,3292,1.0
824,1336189521,"WILLIAMS, JEFFREY",Radiology,Diagnostic Radiology,False,3238,3238,1.0


In [91]:
prov_not_to_vandy[prov_not_to_vandy['Specialization_provider'] != 'Diagnostic Radiology'].nlargest(25, 'patient_count')


Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
199,1093753303,"WELCH, DEREK",Pathology,Anatomic Pathology & Clinical Pathology,False,15506,15506,1.0
1023,1417131715,"RIDDICK, JOHN",Internal Medicine,Interventional Cardiology,False,6168,6168,1.0
1887,1750410452,"YOUNG, ROBERT",Radiology,Vascular & Interventional Radiology,False,2674,2674,1.0
1717,1679689285,"CALLISTER, TRACY",Internal Medicine,Cardiovascular Disease,False,2280,2526,0.9
258,1114961513,"HUMPHREY, STEVEN",Internal Medicine,Cardiovascular Disease,False,2187,2187,1.0
2265,1902823859,"HUNEYCUTT, DAVID",Internal Medicine,Cardiovascular Disease,False,2163,2163,1.0
121,1053337717,"KAZA, SUNIL",Internal Medicine,Cardiovascular Disease,False,2119,2119,1.0
1068,1427079946,"CONLEY, CHRISTOPHER",Internal Medicine,Clinical Cardiac Electrophysiology,False,2070,2070,1.0
236,1104933738,"LEE, JUNG",Internal Medicine,Cardiovascular Disease,False,2049,2049,1.0
2447,1972658060,"KETCH, TERRY",Internal Medicine,Cardiovascular Disease,False,2018,2018,1.0


In [93]:
not_vandy_doc=prov_not_to_vandy[prov_not_to_vandy['Classification_provider'] != 'Radiology'].nlargest(25, 'patient_count')
not_vandy_doc

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
199,1093753303,"WELCH, DEREK",Pathology,Anatomic Pathology & Clinical Pathology,False,15506,15506,1.0
1023,1417131715,"RIDDICK, JOHN",Internal Medicine,Interventional Cardiology,False,6168,6168,1.0
1717,1679689285,"CALLISTER, TRACY",Internal Medicine,Cardiovascular Disease,False,2280,2526,0.9
258,1114961513,"HUMPHREY, STEVEN",Internal Medicine,Cardiovascular Disease,False,2187,2187,1.0
2265,1902823859,"HUNEYCUTT, DAVID",Internal Medicine,Cardiovascular Disease,False,2163,2163,1.0
121,1053337717,"KAZA, SUNIL",Internal Medicine,Cardiovascular Disease,False,2119,2119,1.0
1068,1427079946,"CONLEY, CHRISTOPHER",Internal Medicine,Clinical Cardiac Electrophysiology,False,2070,2070,1.0
236,1104933738,"LEE, JUNG",Internal Medicine,Cardiovascular Disease,False,2049,2049,1.0
2447,1972658060,"KETCH, TERRY",Internal Medicine,Cardiovascular Disease,False,2018,2018,1.0
544,1225027014,"KUDELKO, PAUL",Internal Medicine,Cardiovascular Disease,False,1961,1961,1.0


In [94]:
not_vandy_doc[['name', 'Classification_provider', 'Specialization_provider', 'patient_count']]

Unnamed: 0,name,Classification_provider,Specialization_provider,patient_count
199,"WELCH, DEREK",Pathology,Anatomic Pathology & Clinical Pathology,15506
1023,"RIDDICK, JOHN",Internal Medicine,Interventional Cardiology,6168
1717,"CALLISTER, TRACY",Internal Medicine,Cardiovascular Disease,2280
258,"HUMPHREY, STEVEN",Internal Medicine,Cardiovascular Disease,2187
2265,"HUNEYCUTT, DAVID",Internal Medicine,Cardiovascular Disease,2163
121,"KAZA, SUNIL",Internal Medicine,Cardiovascular Disease,2119
1068,"CONLEY, CHRISTOPHER",Internal Medicine,Clinical Cardiac Electrophysiology,2070
236,"LEE, JUNG",Internal Medicine,Cardiovascular Disease,2049
2447,"KETCH, TERRY",Internal Medicine,Cardiovascular Disease,2018
544,"KUDELKO, PAUL",Internal Medicine,Cardiovascular Disease,1961


In [83]:
#for family medicine, Donald Vollmer and Raul Couret can be reached out by Vanderbilt to gain patients. 
#However, it might be better to look at specific specialties 

family_medicine=prov_not_to_vandy[prov_not_to_vandy['Classification_provider']=='Family Medicine']
family_medicine

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
429,1184612921,"VOLLMER, DONALD",Family Medicine,Geriatric Medicine,False,779,898,0.87
753,1306969696,"COURET, RAUL",Family Medicine,Geriatric Medicine,False,549,615,0.89
439,1184782740,"DOLAPTCHIEV, BOJIDAR",Family Medicine,Adult Medicine,False,396,396,1.0
2190,1871565465,"HOOPER, HALDEN",Family Medicine,Geriatric Medicine,False,232,259,0.9
511,1205978152,"STEUART, CATHERINE",Family Medicine,Hospice and Palliative Medicine,False,189,189,1.0
505,1205879467,"KELLOGG, MICHAEL",Family Medicine,Adult Medicine,False,186,219,0.85
437,1184697898,"DOZIER, KENNETH",Family Medicine,Geriatric Medicine,False,115,115,1.0
85,1033343728,"COLLINS, KIMBERLY",Family Medicine,Addiction Medicine,False,81,81,1.0
1522,1598887473,"ORUGANTI, NAGA VIJAYA",Family Medicine,Geriatric Medicine,False,68,92,0.74
82,1033316484,"KINDRED, JEFFRIE",Family Medicine,Sports Medicine,False,38,38,1.0


In [65]:
family_medicine[['name', 'Classification_provider', 'Specialization_provider', 'patient_count']]

Unnamed: 0,name,Classification_provider,Specialization_provider,patient_count
429,"VOLLMER, DONALD",Family Medicine,Geriatric Medicine,779
753,"COURET, RAUL",Family Medicine,Geriatric Medicine,549
439,"DOLAPTCHIEV, BOJIDAR",Family Medicine,Adult Medicine,396
2190,"HOOPER, HALDEN",Family Medicine,Geriatric Medicine,232
511,"STEUART, CATHERINE",Family Medicine,Hospice and Palliative Medicine,189
505,"KELLOGG, MICHAEL",Family Medicine,Adult Medicine,186
437,"DOZIER, KENNETH",Family Medicine,Geriatric Medicine,115
85,"COLLINS, KIMBERLY",Family Medicine,Addiction Medicine,81
1522,"ORUGANTI, NAGA VIJAYA",Family Medicine,Geriatric Medicine,68
82,"KINDRED, JEFFRIE",Family Medicine,Sports Medicine,38


In [37]:
prov_not_to_vandy[prov_not_to_vandy['Specialization_provider']=='Orthopedic']

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
1604,1629320130,"LAY, NATHANIEL",Physical Therapist,Orthopedic,False,47,47,1.0


In [87]:
#list of orthopedic surgeons vanderbilt should reach out to gain patients 

ortho=prov_not_to_vandy[prov_not_to_vandy['Classification_provider']=='Orthopaedic Surgery']
ortho

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,patient_prop
117,1053308502,"KLEKAMP, JOHN",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,False,492,492,1.0
170,1083601397,"MCNAMARA, MICHAEL",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,False,425,425,1.0
1588,1619978822,"BABAT, LAWRENCE",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,False,353,353,1.0
1585,1619964111,"WURTH, TODD",Orthopaedic Surgery,Hand Surgery,False,351,351,1.0
1050,1417973249,"CALENDINE, CORY",Orthopaedic Surgery,Adult Reconstructive Orthopaedic Surgery,False,331,538,0.62
1383,1548485071,"PERKINSON, BRIAN",Orthopaedic Surgery,Adult Reconstructive Orthopaedic Surgery,False,317,490,0.65
2068,1821046459,"FISH, JAMES",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,False,258,258,1.0
763,1316050487,"LOONEY, COLIN",Orthopaedic Surgery,Sports Medicine,False,232,399,0.58
281,1124175948,"BYRAM, IAN",Orthopaedic Surgery,Sports Medicine,False,231,367,0.63
990,1396988960,"CROSBY, SAMUEL",Orthopaedic Surgery,Hand Surgery,False,220,220,1.0


In [88]:
ortho[['name', 'Classification_provider', 'Specialization_provider', 'patient_count']]

Unnamed: 0,name,Classification_provider,Specialization_provider,patient_count
117,"KLEKAMP, JOHN",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,492
170,"MCNAMARA, MICHAEL",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,425
1588,"BABAT, LAWRENCE",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,353
1585,"WURTH, TODD",Orthopaedic Surgery,Hand Surgery,351
1050,"CALENDINE, CORY",Orthopaedic Surgery,Adult Reconstructive Orthopaedic Surgery,331
1383,"PERKINSON, BRIAN",Orthopaedic Surgery,Adult Reconstructive Orthopaedic Surgery,317
2068,"FISH, JAMES",Orthopaedic Surgery,Orthopaedic Surgery of the Spine,258
763,"LOONEY, COLIN",Orthopaedic Surgery,Sports Medicine,232
281,"BYRAM, IAN",Orthopaedic Surgery,Sports Medicine,231
990,"CROSBY, SAMUEL",Orthopaedic Surgery,Hand Surgery,220


In [39]:
#list of all specialties

prov_not_to_vandy['Specialization_provider'].unique()

array(['Anatomic Pathology & Clinical Pathology',
       'Interventional Cardiology', 'Diagnostic Radiology',
       'Vascular & Interventional Radiology', 'Cardiovascular Disease',
       'Clinical Cardiac Electrophysiology', 'Neuroradiology',
       'Advanced Heart Failure and Transplant Cardiology',
       'Medical Oncology', 'Pulmonary Disease', 'Nephrology',
       'Emergency Medical Services', 'Critical Care Medicine',
       'Gastroenterology', 'Geriatric Medicine', 'Surgical Oncology',
       'Clinical Pathology/Laboratory Medicine', 'Family',
       'Endocrinology, Diabetes & Metabolism', 'Hematology & Oncology',
       'Acute Care', 'Neurology', 'Gynecology',
       'Orthopaedic Surgery of the Spine', 'Vascular Surgery',
       'Vascular Neurology', 'Infectious Disease', "Women's Health",
       'Sleep Medicine', 'Foot & Ankle Surgery', 'Medical',
       'Adult Medicine', 'Adult Health', 'Radiation Oncology',
       'Hand Surgery', 'Rheumatology',
       'Adult Reconstructive

In [40]:
prov_not_to_vandy['Classification_provider'].unique()

array(['Pathology', 'Internal Medicine', 'Radiology',
       'Emergency Medicine', 'Anesthesiology', 'Family Medicine',
       'Surgery', 'Nurse Practitioner', 'Psychiatry & Neurology',
       'Obstetrics & Gynecology', 'Orthopaedic Surgery', 'Podiatrist',
       'Physician Assistant', 'Ophthalmology',
       'Physical Medicine & Rehabilitation', 'Pain Medicine',
       'Clinical Nurse Specialist', 'Otolaryngology', 'Pediatrics',
       'Urology', 'Registered Nurse', 'Clinic/Center',
       'Allergy & Immunology', 'Physical Therapist',
       'Preventive Medicine', 'Dentist', 'Psychologist', 'Social Worker'],
      dtype=object)

In [41]:
#specialty referrals to hospitals
#diagnostic radiology is the highest specialty 

spec_to_hosp = (full_refs.groupby(['taxonomy_code_provider', 'Classification_provider', 'Specialization_provider', 'to_npi', 'organization_hospital', 'address_hospital'])
     ['patient_count']
     .sum()
     .sort_values(ascending = False)
     .reset_index()
)

spec_to_hosp

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_npi,organization_hospital,address_hospital,patient_count
0,2085R0202X,Radiology,Diagnostic Radiology,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",71894
1,207RC0000X,Internal Medicine,Cardiovascular Disease,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",58209
2,2085R0202X,Radiology,Diagnostic Radiology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",42282
3,2085R0202X,Radiology,Diagnostic Radiology,1629025648,SAINT THOMAS WEST HOSPITAL,"4220 HARDING RD, NASHVILLE, TN 37205",36715
4,2085R0202X,Radiology,Diagnostic Radiology,1780778969,SAINT THOMAS WEST HOSPITAL,"2000 CHURCH ST, NASHVILLE, TN 37236",28344
...,...,...,...,...,...,...,...
560,207VX0201X,Obstetrics & Gynecology,Gynecologic Oncology,1538114434,HENDERSONVILLE HOSPITAL CORPORATION,"355 NEW SHACKLE ISLAND RD, HENDERSONVILLE, TN ...",21
561,2080P0210X,Pediatrics,Pediatric Nephrology,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",19
562,207RR0500X,Internal Medicine,Rheumatology,1568551455,NASHVILLE GENERAL HOSPITAL,"1818 ALBION ST, ATTN: MARTHA LAMPLEY, NASHVILL...",17
563,103TC2200X,Psychologist,Clinical Child & Adolescent,1558408633,VANDERBILT UNIVERSITY MEDICAL CENTER,"1601 23RD AVE S, NASHVILLE, TN 37212",12


In [42]:
#top 20 hospital referrals

spec_to_hosp.nlargest(20, 'patient_count')


Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_npi,organization_hospital,address_hospital,patient_count
0,2085R0202X,Radiology,Diagnostic Radiology,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",71894
1,207RC0000X,Internal Medicine,Cardiovascular Disease,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",58209
2,2085R0202X,Radiology,Diagnostic Radiology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",42282
3,2085R0202X,Radiology,Diagnostic Radiology,1629025648,SAINT THOMAS WEST HOSPITAL,"4220 HARDING RD, NASHVILLE, TN 37205",36715
4,2085R0202X,Radiology,Diagnostic Radiology,1780778969,SAINT THOMAS WEST HOSPITAL,"2000 CHURCH ST, NASHVILLE, TN 37236",28344
5,2085R0202X,Radiology,Diagnostic Radiology,1295780476,HTI MEMORIAL HOSPITAL CORPORATION,"3441 DICKERSON PIKE, NASHVILLE, TN 37207",27111
6,2085R0202X,Radiology,Diagnostic Radiology,1164590386,SAINT THOMAS RUTHERFORD HOSPITAL,"1700 MEDICAL CENTER PKWY, MURFREESBORO, TN 37129",21610
7,2085R0202X,Radiology,Diagnostic Radiology,1265445506,WILLIAMSON COUNTY HOSPITAL DISTRICT,"4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067",20038
8,207RC0000X,Internal Medicine,Cardiovascular Disease,1629025648,SAINT THOMAS WEST HOSPITAL,"4220 HARDING RD, NASHVILLE, TN 37205",16639
9,2085R0202X,Radiology,Diagnostic Radiology,1538114434,HENDERSONVILLE HOSPITAL CORPORATION,"355 NEW SHACKLE ISLAND RD, HENDERSONVILLE, TN ...",15831


In [43]:
spec_to_hosp['organization_hospital'].unique()

array(['VANDERBILT UNIVERSITY MEDICAL CENTER',
       'HCA HEALTH SERVICES OF TENNESSEE, INC.',
       'SAINT THOMAS WEST HOSPITAL', 'HTI MEMORIAL HOSPITAL CORPORATION',
       'SAINT THOMAS RUTHERFORD HOSPITAL',
       'WILLIAMSON COUNTY HOSPITAL DISTRICT',
       'HENDERSONVILLE HOSPITAL CORPORATION',
       'SUMNER REGIONAL MEDICAL CENTER LLC',
       'SUMNER REGIONAL HEALTH SYSTEMS, INC',
       'NASHVILLE GENERAL HOSPITAL', 'SETON CORPORATION',
       'TRUSTPOINT HOSPITAL, LLC'], dtype=object)

In [44]:
#radiology and pathology are the top two fields that vanderbilt needs to look closely 


spec_to_hosp[spec_to_hosp['organization_hospital']=='HCA HEALTH SERVICES OF TENNESSEE, INC.']

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_npi,organization_hospital,address_hospital,patient_count
2,2085R0202X,Radiology,Diagnostic Radiology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",42282
11,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",13682
15,2085R0202X,Radiology,Diagnostic Radiology,1720032345,"HCA HEALTH SERVICES OF TENNESSEE, INC.","391 WALLACE RD, NASHVILLE, TN 37211",10179
17,2085R0202X,Radiology,Diagnostic Radiology,1992776405,"HCA HEALTH SERVICES OF TENNESSEE, INC.","200 STONECREST BLVD, SMYRNA, TN 37167",9895
18,207RI0011X,Internal Medicine,Interventional Cardiology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",9676
...,...,...,...,...,...,...,...
543,103TC0700X,Psychologist,Clinical,1992776405,"HCA HEALTH SERVICES OF TENNESSEE, INC.","200 STONECREST BLVD, SMYRNA, TN 37167",35
544,207XS0106X,Orthopaedic Surgery,Hand Surgery,1992776405,"HCA HEALTH SERVICES OF TENNESSEE, INC.","200 STONECREST BLVD, SMYRNA, TN 37167",34
546,363LW0102X,Nurse Practitioner,Women's Health,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",34
550,363LP0808X,Nurse Practitioner,Psychiatric/Mental Health,1992776405,"HCA HEALTH SERVICES OF TENNESSEE, INC.","200 STONECREST BLVD, SMYRNA, TN 37167",32


In [45]:
spec_to_hosp[spec_to_hosp['Specialization_provider'] != 'Diagnostic Radiology'].nlargest(20, 'patient_count')


Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_npi,organization_hospital,address_hospital,patient_count
1,207RC0000X,Internal Medicine,Cardiovascular Disease,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",58209
8,207RC0000X,Internal Medicine,Cardiovascular Disease,1629025648,SAINT THOMAS WEST HOSPITAL,"4220 HARDING RD, NASHVILLE, TN 37205",16639
10,363LF0000X,Nurse Practitioner,Family,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",13725
11,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",13682
12,363LA2100X,Nurse Practitioner,Acute Care,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",13294
14,207RC0000X,Internal Medicine,Cardiovascular Disease,1265445506,WILLIAMSON COUNTY HOSPITAL DISTRICT,"4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067",10258
16,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",10145
18,207RI0011X,Internal Medicine,Interventional Cardiology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",9676
19,207RH0003X,Internal Medicine,Hematology & Oncology,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",9659
20,207RN0300X,Internal Medicine,Nephrology,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER,"1211 MEDICAL CENTER DRIVE, NASHVILLE, TN 37232",9432


In [46]:
(spec_to_hosp[(spec_to_hosp['Specialization_provider'] != 'Diagnostic Radiology')
             &
             (spec_to_hosp['organization_hospital'] != 'VANDERBILT UNIVERSITY MEDICAL CENTER')]
             .nlargest(20, 'patient_count')
)

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_npi,organization_hospital,address_hospital,patient_count
8,207RC0000X,Internal Medicine,Cardiovascular Disease,1629025648,SAINT THOMAS WEST HOSPITAL,"4220 HARDING RD, NASHVILLE, TN 37205",16639
11,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",13682
14,207RC0000X,Internal Medicine,Cardiovascular Disease,1265445506,WILLIAMSON COUNTY HOSPITAL DISTRICT,"4321 CAROTHERS PARKWAY, FRANKLIN, TN 37067",10258
18,207RI0011X,Internal Medicine,Interventional Cardiology,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",9676
22,207RC0000X,Internal Medicine,Cardiovascular Disease,1023055126,"HCA HEALTH SERVICES OF TENNESSEE, INC.","2300 PATTERSON ST, NASHVILLE, TN 37203",7902
26,207RC0000X,Internal Medicine,Cardiovascular Disease,1538114434,HENDERSONVILLE HOSPITAL CORPORATION,"355 NEW SHACKLE ISLAND RD, HENDERSONVILLE, TN ...",6270
27,207RC0000X,Internal Medicine,Cardiovascular Disease,1447571658,SUMNER REGIONAL MEDICAL CENTER LLC,"555 HARTSVILLE PIKE, GALLATIN, TN 37066",6170
28,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,1780778969,SAINT THOMAS WEST HOSPITAL,"2000 CHURCH ST, NASHVILLE, TN 37236",5882
29,207RC0000X,Internal Medicine,Cardiovascular Disease,1780778969,SAINT THOMAS WEST HOSPITAL,"2000 CHURCH ST, NASHVILLE, TN 37236",5571
33,207RC0000X,Internal Medicine,Cardiovascular Disease,1164590386,SAINT THOMAS RUTHERFORD HOSPITAL,"1700 MEDICAL CENTER PKWY, MURFREESBORO, TN 37129",5140


In [47]:
#number of providers
full_refs_hosp['address_provider'].nunique()

2157

In [48]:
#number of hospitals
full_refs_hosp['address_hospital'].nunique()

22

In [49]:

def make_not_to_vandy(df, group):

    # create a column that labels whether the recommendation went to Vanderbilt or not
    df['to_vandy'] = df['organization_hospital'].str.contains("VANDERBILT")

    # for each specialty, determine the transaction count 
    to_vandy = (df.groupby(group + ['to_vandy'])
         ['transaction_count']
         .sum()
         .reset_index()
    )

    # add a column for overall count of referrals by specialty, then determine proportion
    # of transaction in Vanderbilt vs proportion not in Vanderbilt
    to_vandy['transaction_count_overall'] = (to_vandy.groupby(group)
                                     ['transaction_count']
                                     .transform(sum)
                                    )
    to_vandy['transaction_prop'] = (to_vandy['transaction_count']/
                                     to_vandy['transaction_count_overall']
                                    )

    # filter to focus just on referrals not to Vanderbilt
    not_to_vandy = to_vandy[to_vandy['to_vandy'] == False]
    
    return not_to_vandy.sort_values('transaction_count', ascending = False)

In [50]:
#top 25 specialties not referring to vandy

spec_not_to_vandy = make_not_to_vandy(full_refs, ['taxonomy_code_provider', 'Classification_provider', 'Specialization_provider'])

spec_not_to_vandy.nlargest(25, 'transaction_count')

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_vandy,transaction_count,transaction_count_overall,transaction_prop
141,2085R0202X,Radiology,Diagnostic Radiology,False,257888,335792,0.767999
35,207RC0000X,Internal Medicine,Cardiovascular Disease,False,101095,178469,0.566457
101,207ZP0102X,Pathology,Anatomic Pathology & Clinical Pathology,False,52401,63471,0.82559
61,207RP1001X,Internal Medicine,Pulmonary Disease,False,42755,51295,0.833512
59,207RN0300X,Internal Medicine,Nephrology,False,38464,56694,0.678449
184,363LF0000X,Nurse Practitioner,Family,False,29593,49040,0.603446
55,207RI0011X,Internal Medicine,Interventional Cardiology,False,27244,30148,0.903675
43,207RG0100X,Internal Medicine,Gastroenterology,False,22780,33726,0.675443
37,207RC0001X,Internal Medicine,Clinical Cardiac Electrophysiology,False,18820,23626,0.79658
51,207RH0003X,Internal Medicine,Hematology & Oncology,False,15199,42759,0.355457
