In [71]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import sqlite3



In [2]:
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time

query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT *
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT npi
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT 
            to_npi,
            referral_id,
            qf.entity_type_code,
            qf.taxonomy_code,
            classification,
            specialization,
            qf."provider_organization_name_(legal_business_name)" AS organization,
            qf.provider_first_line_business_practice_location_address AS address_line1,
            qf.provider_second_line_business_practice_location_address AS address_line2,
            qf.provider_business_practice_location_address_city_name AS city,
            qf.provider_business_practice_location_address_state_name AS state,
            qf.provider_business_practice_location_address_postal_code AS zip
        FROM qualifying_refs AS qf
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""
with sqlite3.connect('data/hopteam.sqlite') as db:
    groups = pd.read_sql(query, db)

In [3]:
groups

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
0,1013012616,18854880,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
1,1013012616,131966153,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
2,1013012616,146169993,2.0,207Q00000X,Family Medicine,,"CRIPPS, HOOPER & RHODY, PLLC",400 E PUBLIC SQUARE,,ALEXANDRIA,TN,37012
3,1902804271,320488,2.0,208VP0014X,Pain Medicine,Interventional Pain Medicine,"PAIN MANAGEMENT GROUP, P.C.",5801 CROSSINGS BLVD,,ANTIOCH,TN,37013
4,1902804271,6754370,2.0,208VP0014X,Pain Medicine,Interventional Pain Medicine,"PAIN MANAGEMENT GROUP, P.C.",5801 CROSSINGS BLVD,,ANTIOCH,TN,37013
...,...,...,...,...,...,...,...,...,...,...,...,...
69422,1669872735,30083148,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69423,1669872735,114150220,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69424,1669872735,178563129,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563
69425,1669872735,184096138,2.0,363LF0000X,Nurse Practitioner,Family,"RIVERVIEW PHYSICIAN PRACTICES, LLC",14 MAIN STREET,SUITE B,GORDONSVILLE,TN,38563


In [4]:
# profiles of all providers
# with referrals to metro nashville groups (entity 2) 
#having referrals of over 50 transactions and under 50 average wait time

query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT 
                referral_id
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            referral_id,
            entity_type_code,
            "provider_last_name_(legal_name)" || ', ' || provider_first_name AS name,
            taxonomy_code,
            classification,
            specialization,
            "provider_organization_name_(legal_business_name)" AS organization,
            provider_first_line_business_practice_location_address AS address_line1,
            provider_second_line_business_practice_location_address AS address_line2,
            provider_business_practice_location_address_city_name AS city,
            provider_business_practice_location_address_state_name AS state,
            provider_business_practice_location_address_postal_code AS zip     
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('data/hopteam.sqlite') as db:
    providers = pd.read_sql(query, db)

In [5]:
providers

Unnamed: 0,from_npi,referral_id,entity_type_code,name,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
0,1821080961,18854880,1.0,"SHERWOOD, WILLIAM",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
1,1841282779,131966153,1.0,"RHODY, KEVIN",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
2,1770575607,146169993,1.0,"COOPER, STEVEN",207Q00000X,Family Medicine,,,302 N CONGRESS BLVD,,SMITHVILLE,TN,37166
3,1508804337,320488,1.0,"SHARMA, VINEET",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
4,1538109103,6754370,1.0,"NAU, PAUL",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69422,1558355941,30083148,1.0,"JOSEPH, JOJU",207RN0300X,Internal Medicine,Nephrology,,270 EAST MAIN STREET,SUITE 200,GALLATIN,TN,37066
69423,1023099074,114150220,1.0,"LAMBERT, HEATHER",363LW0102X,Nurse Practitioner,Women's Health,,507 GORDONSVILLE HWY,SUITE 203,GORDONSVILLE,TN,38563
69424,1184619124,178563129,1.0,"RUTHERFORD, RICHARD",207Q00000X,Family Medicine,,,133 HOSPITAL DR,SUITE 500,CARTHAGE,TN,37030
69425,1205931565,184096138,1.0,"NOBLE, MICHAEL",207Q00000X,Family Medicine,,,37 PALMER ST,,CALAIS,ME,04619


In [6]:
# referrals from providers (entity 1) to metro nashville groups (entity 2) 
# having over 50 transactions and under 50 average wait time

query = """
        
        WITH metro_entity_2 AS (
            SELECT *
            FROM profile
            WHERE provider_business_practice_location_address_postal_code IN (
                SELECT zip
                FROM zip_cbsa
                WHERE cbsa = '34980'
                )
                AND entity_type_code = 2.0
            ),
        referrals50 AS (
            SELECT *
            FROM referrals
            WHERE average_day_wait < 50
            AND transaction_count >= 50
        ),
        qualifying_refs AS (
            SELECT
                to_npi,
                referral_id,
                patient_count,
                transaction_count,
                average_day_wait,
                std_day_wait  
            FROM metro_entity_2
            INNER JOIN referral_to
            ON metro_entity_2.npi = referral_to.to_npi
            INNER JOIN referrals50
            USING (referral_id)
        ),
        entity1 AS (
            SELECT *
            FROM profile
            INNER JOIN taxonomy
            ON profile.taxonomy_code = taxonomy.code
            WHERE entity_type_code = 1.0
        )
        SELECT
            from_npi,
            to_npi,
            referral_id,
            patient_count,
            transaction_count,
            average_day_wait,
            std_day_wait
        FROM qualifying_refs
        INNER JOIN referral_from
        USING (referral_id)
        INNER JOIN entity1
        ON referral_from.from_npi = entity1.npi;

"""

with sqlite3.connect('data/hopteam.sqlite') as db:
    referrals = pd.read_sql(query, db)

In [7]:
referrals

Unnamed: 0,from_npi,to_npi,referral_id,patient_count,transaction_count,average_day_wait,std_day_wait
0,1821080961,1013012616,18854880,91,238,9.924,40.266
1,1841282779,1013012616,131966153,94,236,7.695,36.447
2,1770575607,1013012616,146169993,115,320,2.725,18.756
3,1508804337,1902804271,320488,157,163,27.583,51.939
4,1538109103,1902804271,6754370,312,320,12.709,40.583
...,...,...,...,...,...,...,...
69422,1558355941,1669872735,30083148,31,89,23.101,31.695
69423,1023099074,1669872735,114150220,256,1309,0.214,5.207
69424,1184619124,1669872735,178563129,40,90,23.089,36.995
69425,1205931565,1669872735,184096138,35,54,1.315,6.815


In [8]:
# General Acute Care Hospitals in Nashville and nearby 


cities = ['NASHVILLE',
          'MURFREESBORO',
          'BRENTWOOD',
          'FRANKLIN',
          'HENDERSONVILLE',
          'GOODLETTSVILLE',
          'LA VERGNE',
          'SMYRNA',
          'MT. JULIET',
          'GALLATIN']

gach = (groups
     .loc[(groups['Classification'].str.contains('General Acute Care Hospital', na = False)) 
            & 
           (groups['city'].isin(cities))
           ,
          ]
     .drop_duplicates()
     .sort_values('organization')
)

gach

Unnamed: 0,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
33687,1023055126,48731386,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33707,1023055126,53473559,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33708,1023055126,55525652,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33709,1023055126,55525663,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
33710,1023055126,55525676,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
...,...,...,...,...,...,...,...,...,...,...,...,...
10962,1265445506,53960238,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10961,1265445506,53960229,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10960,1265445506,52260376,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067
10967,1265445506,55966757,2.0,282N00000X,General Acute Care Hospital,,WILLIAMSON COUNTY HOSPITAL DISTRICT,4321 CAROTHERS PARKWAY,,FRANKLIN,TN,37067


In [9]:
gach.drop_duplicates(subset = ['organization', 'address_line1', 'city', 'state', 'zip']).reset_index().sort_values('organization')


Unnamed: 0,index,to_npi,referral_id,entity_type_code,taxonomy_code,Classification,Specialization,organization,address_line1,address_line2,city,state,zip
0,33687,1023055126,48731386,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",2300 PATTERSON ST,,NASHVILLE,TN,37203
1,50076,1720032345,155535970,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",391 WALLACE RD,,NASHVILLE,TN,37211
2,24794,1992776405,100689576,2.0,282N00000X,General Acute Care Hospital,,"HCA HEALTH SERVICES OF TENNESSEE, INC.",200 STONECREST BLVD,,SMYRNA,TN,37167
3,14903,1538114434,169422729,2.0,282N00000X,General Acute Care Hospital,,HENDERSONVILLE HOSPITAL CORPORATION,355 NEW SHACKLE ISLAND RD,,HENDERSONVILLE,TN,37075
4,45127,1093769606,20019626,2.0,282N00000X,General Acute Care Hospital,,HTI MEMORIAL HOSPITAL CORPORATION,3441 DICKERSON PIKE,,NASHVILLE,TN,37207
5,46796,1568551455,132969432,2.0,282N00000X,General Acute Care Hospital,,NASHVILLE GENERAL HOSPITAL,1818 ALBION ST,ATTN: MARTHA LAMPLEY,NASHVILLE,TN,37208
6,22376,1164590386,117188077,2.0,282N00000X,General Acute Care Hospital,,SAINT THOMAS RUTHERFORD HOSPITAL,1700 MEDICAL CENTER PKWY,,MURFREESBORO,TN,37129
7,44234,1629025648,176011269,2.0,282N00000X,General Acute Care Hospital,,SAINT THOMAS WEST HOSPITAL,4220 HARDING RD,,NASHVILLE,TN,37205
8,66179,1780778969,194249596,2.0,282N00000X,General Acute Care Hospital,,SAINT THOMAS WEST HOSPITAL,2000 CHURCH ST,,NASHVILLE,TN,37236
9,66249,1992861314,106709285,2.0,282N00000X,General Acute Care Hospital,,SETON CORPORATION,2000 CHURCH ST,,NASHVILLE,TN,37236


In [10]:
#Combine relevant dfs to make full_refs

full_refs = (providers.merge(gach, 
                on = 'referral_id',
               suffixes = ('_provider', '_hospital'))
          .merge(referrals,
                on = ['referral_id', 'from_npi', 'to_npi'])
)

full_refs

Unnamed: 0,from_npi,referral_id,entity_type_code_provider,name,taxonomy_code_provider,Classification_provider,Specialization_provider,organization_provider,address_line1_provider,address_line2_provider,...,organization_hospital,address_line1_hospital,address_line2_hospital,city_hospital,state_hospital,zip_hospital,patient_count,transaction_count,average_day_wait,std_day_wait
0,1548466808,9550017,1.0,"JONES, MATTHEW",2085R0202X,Radiology,Diagnostic Radiology,,1265 UNION AVE,,...,"SUMNER REGIONAL HEALTH SYSTEMS, INC",555 HARTSVILLE PIKE,,GALLATIN,TN,37066,121,135,32.563,62.417
1,1558355941,9550019,1.0,"JOSEPH, JOJU",207RN0300X,Internal Medicine,Nephrology,,270 EAST MAIN STREET,SUITE 200,...,"SUMNER REGIONAL HEALTH SYSTEMS, INC",555 HARTSVILLE PIKE,,GALLATIN,TN,37066,81,292,6.767,30.330
2,1790891315,13457790,1.0,"RUSSO, DONALD",207RC0000X,Internal Medicine,Cardiovascular Disease,,353 NEW SHACKLE ISLAND RD,#300C,...,"SUMNER REGIONAL HEALTH SYSTEMS, INC",555 HARTSVILLE PIKE,,GALLATIN,TN,37066,70,71,47.254,75.620
3,1053366369,20389882,1.0,"NABORS, GLENN",2085R0202X,Radiology,Diagnostic Radiology,,620 HARTSVILLE PIKE,,...,"SUMNER REGIONAL HEALTH SYSTEMS, INC",555 HARTSVILLE PIKE,,GALLATIN,TN,37066,141,154,38.636,68.659
4,1043663719,20389883,1.0,"GUSTAFSON, STEPHANIE",367500000X,"Nurse Anesthetist, Certified Registered",,,110 29TH AVE N,SUITE 201,...,"SUMNER REGIONAL HEALTH SYSTEMS, INC",555 HARTSVILLE PIKE,,GALLATIN,TN,37066,71,73,5.808,21.015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6870,1174565543,167471199,1.0,"DOSS, HABIB",207RH0003X,Internal Medicine,Hematology & Oncology,,2004 HAYES ST STE 350,,...,SETON CORPORATION,2000 CHURCH ST,,NASHVILLE,TN,37236,69,143,3.273,26.756
6871,1942254883,174721235,1.0,"BURCHAM, ROBERT",2085R0202X,Radiology,Diagnostic Radiology,,3024 BUSINESS PARK CIR,,...,SETON CORPORATION,2000 CHURCH ST,,NASHVILLE,TN,37236,52,65,13.154,32.501
6872,1194751354,181082032,1.0,"PEACOCK, NANCY",207RX0202X,Internal Medicine,Medical Oncology,,2004 HAYES ST STE 350,,...,SETON CORPORATION,2000 CHURCH ST,,NASHVILLE,TN,37236,37,64,12.641,38.231
6873,1194743914,181082033,1.0,"PENLEY, WILLIAM",207RX0202X,Internal Medicine,Medical Oncology,,2004 HAYES ST STE 350,,...,SETON CORPORATION,2000 CHURCH ST,,NASHVILLE,TN,37236,43,69,23.420,48.484


In [38]:
#we don't want radiology and pathology because they don't refer patients to doctors

full_refs=full_refs[(full_refs['Classification_provider'] != 'Radiology') 
         & (full_refs['Classification_provider'] != 'Pathology')
         & (full_refs['Classification_provider'] != 'Emergency Medicine')]


In [123]:
#dataframe showing if the referral is to vanderbilt or not 

def make_not_to_vandy(df, group):

    # create a column that labels whether the recommendation went to Vanderbilt or not
    df['to_vandy'] = df['organization_hospital'].str.contains('VANDERBILT')

    # for each specialty, determine the count of patients recommended to and not recommended to Vanderbilt
    to_vandy = (df.groupby(group + ['to_vandy'])
         ['patient_count']
         .sum()
         .reset_index()
    )

    # add a column for overall count of referrals by specialty, then determine proportion
    to_vandy['patient_count_overall'] = (to_vandy.groupby(group)
                                     ['patient_count']
                                     .transform(sum)
                                    )
    
    #add a column for referral percentage not to vanderbilt
    to_vandy['ref_perc_not_vandy'] = round((to_vandy['patient_count']/
                                     to_vandy['patient_count_overall']
                                    ), 2)
    
 
    #add a column for referral percentage to vanderbilt
    to_vandy['ref_perc_to_vandy']= (1- to_vandy['ref_perc_not_vandy'])
        
    
    # filter to focus just on referrals not to Vanderbilt
    not_to_vandy = to_vandy[to_vandy['to_vandy'] == False]
    
    return not_to_vandy.sort_values('patient_count', ascending = False)

In [124]:
#top 25 specialties not referring to vandy
#In internal medicine, cardiovascular disease, 54% of patients are referred somewhere else and only 46% are referred to vandy

spec_not_to_vandy = make_not_to_vandy(full_refs, ['taxonomy_code_provider', 'Classification_provider', 'Specialization_provider'])

spec_not_to_vandy.nlargest(25, 'patient_count_overall')


Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,ref_perc_not_vandy,ref_perc_to_vandy
33,207RC0000X,Internal Medicine,Cardiovascular Disease,False,68782,126991,0.54,0.46
156,363LF0000X,Nurse Practitioner,Family,False,17198,30923,0.56,0.44
59,207RP1001X,Internal Medicine,Pulmonary Disease,False,21469,26743,0.8,0.2
57,207RN0300X,Internal Medicine,Nephrology,False,16921,26353,0.64,0.36
150,363LA2100X,Nurse Practitioner,Acute Care,False,8497,21791,0.39,0.61
41,207RG0100X,Internal Medicine,Gastroenterology,False,14090,21705,0.65,0.35
53,207RI0011X,Internal Medicine,Interventional Cardiology,False,17511,20002,0.88,0.12
107,2084N0400X,Psychiatry & Neurology,Neurology,False,8834,18003,0.49,0.51
49,207RH0003X,Internal Medicine,Hematology & Oncology,False,7906,17565,0.45,0.55
35,207RC0001X,Internal Medicine,Clinical Cardiac Electrophysiology,False,10870,14103,0.77,0.23


In [125]:
#top 10 specializations (with highest number of patients) referring 25-75% patients to vandy 

top_spec=spec_not_to_vandy[(spec_not_to_vandy['ref_perc_to_vandy'] >= 0.25) 
                  & (spec_not_to_vandy['ref_perc_to_vandy']<=0.75)].nlargest(10, 'patient_count_overall')
top_spec

Unnamed: 0,taxonomy_code_provider,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,ref_perc_not_vandy,ref_perc_to_vandy
33,207RC0000X,Internal Medicine,Cardiovascular Disease,False,68782,126991,0.54,0.46
156,363LF0000X,Nurse Practitioner,Family,False,17198,30923,0.56,0.44
57,207RN0300X,Internal Medicine,Nephrology,False,16921,26353,0.64,0.36
150,363LA2100X,Nurse Practitioner,Acute Care,False,8497,21791,0.39,0.61
41,207RG0100X,Internal Medicine,Gastroenterology,False,14090,21705,0.65,0.35
107,2084N0400X,Psychiatry & Neurology,Neurology,False,8834,18003,0.49,0.51
49,207RH0003X,Internal Medicine,Hematology & Oncology,False,7906,17565,0.45,0.55
66,207RX0202X,Internal Medicine,Medical Oncology,False,6670,11598,0.58,0.42
39,207RE0101X,Internal Medicine,"Endocrinology, Diabetes & Metabolism",False,3952,10308,0.38,0.62
152,363LA2200X,Nurse Practitioner,Adult Health,False,3401,9706,0.35,0.65


In [126]:
#renaming the columns we are interested in

top_spec=top_spec.rename(columns ={'Classification_provider': 'Classification',
                                           'Specialization_provider': 'Specialization',
                                            'patient_count_overall': 'Total Patient Count',
                                            'ref_perc_to_vandy': 'Referral Percentage to Vanderlit'
    
})
top_spec

Unnamed: 0,taxonomy_code_provider,Classification,Specialization,to_vandy,patient_count,Total Patient Count,ref_perc_not_vandy,Referral Percentage to Vanderlit
33,207RC0000X,Internal Medicine,Cardiovascular Disease,False,68782,126991,0.54,0.46
156,363LF0000X,Nurse Practitioner,Family,False,17198,30923,0.56,0.44
57,207RN0300X,Internal Medicine,Nephrology,False,16921,26353,0.64,0.36
150,363LA2100X,Nurse Practitioner,Acute Care,False,8497,21791,0.39,0.61
41,207RG0100X,Internal Medicine,Gastroenterology,False,14090,21705,0.65,0.35
107,2084N0400X,Psychiatry & Neurology,Neurology,False,8834,18003,0.49,0.51
49,207RH0003X,Internal Medicine,Hematology & Oncology,False,7906,17565,0.45,0.55
66,207RX0202X,Internal Medicine,Medical Oncology,False,6670,11598,0.58,0.42
39,207RE0101X,Internal Medicine,"Endocrinology, Diabetes & Metabolism",False,3952,10308,0.38,0.62
152,363LA2200X,Nurse Practitioner,Adult Health,False,3401,9706,0.35,0.65


In [127]:
#final table that has top 10 specialization that has 25-75% referral to vanderbilt

top_spec[['Classification','Specialization','Total Patient Count', 'Referral Percentage to Vanderlit']]

Unnamed: 0,Classification,Specialization,Total Patient Count,Referral Percentage to Vanderlit
33,Internal Medicine,Cardiovascular Disease,126991,0.46
156,Nurse Practitioner,Family,30923,0.44
57,Internal Medicine,Nephrology,26353,0.36
150,Nurse Practitioner,Acute Care,21791,0.61
41,Internal Medicine,Gastroenterology,21705,0.35
107,Psychiatry & Neurology,Neurology,18003,0.51
49,Internal Medicine,Hematology & Oncology,17565,0.55
66,Internal Medicine,Medical Oncology,11598,0.42
39,Internal Medicine,"Endocrinology, Diabetes & Metabolism",10308,0.62
152,Nurse Practitioner,Adult Health,9706,0.65


#### Now, we should find which providers we can recommend to Vanderbilt 

In [128]:
#providers not referring patients to vanderbilt 

#In internal medicine, John Riddick and in pathology, Derek Welch should be reached out by vanderbilt to gain patients. 

prov_not_to_vandy = make_not_to_vandy(full_refs, ['from_npi', 'name', 'Classification_provider', 'Specialization_provider'])
prov_not_to_vandy.nlargest(25, 'patient_count_overall')

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,ref_perc_not_vandy,ref_perc_to_vandy
846,1417131715,"RIDDICK, JOHN",Internal Medicine,Interventional Cardiology,False,6168,6168,1.0,0.0
973,1467491670,"HUGHES, SEAN",Internal Medicine,Cardiovascular Disease,False,897,3249,0.28,0.72
1441,1679665095,"PRUDOFF, ADAM",Internal Medicine,Cardiovascular Disease,False,1154,2652,0.44,0.56
1443,1679689285,"CALLISTER, TRACY",Internal Medicine,Cardiovascular Disease,False,2280,2526,0.9,0.1
210,1114961513,"HUMPHREY, STEVEN",Internal Medicine,Cardiovascular Disease,False,2187,2187,1.0,0.0
1907,1902823859,"HUNEYCUTT, DAVID",Internal Medicine,Cardiovascular Disease,False,2163,2163,1.0,0.0
98,1053337717,"KAZA, SUNIL",Internal Medicine,Cardiovascular Disease,False,2119,2119,1.0,0.0
891,1427079946,"CONLEY, CHRISTOPHER",Internal Medicine,Clinical Cardiac Electrophysiology,False,2070,2070,1.0,0.0
194,1104933738,"LEE, JUNG",Internal Medicine,Cardiovascular Disease,False,2049,2049,1.0,0.0
2052,1972658060,"KETCH, TERRY",Internal Medicine,Cardiovascular Disease,False,2018,2018,1.0,0.0


In [129]:
#providers referring 25-75% of their patients to vandy 

top_prov=prov_not_to_vandy[(prov_not_to_vandy['ref_perc_to_vandy'] >= 0.25) 
                  & (prov_not_to_vandy['ref_perc_to_vandy']<=0.75)].nlargest(25, 'patient_count_overall')

top_prov

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,ref_perc_not_vandy,ref_perc_to_vandy
973,1467491670,"HUGHES, SEAN",Internal Medicine,Cardiovascular Disease,False,897,3249,0.28,0.72
1441,1679665095,"PRUDOFF, ADAM",Internal Medicine,Cardiovascular Disease,False,1154,2652,0.44,0.56
510,1255488052,"ANDERSON, BRENT",Internal Medicine,Cardiovascular Disease,False,1205,1968,0.61,0.39
1989,1942203849,"FRANKLIN, JERRY",Internal Medicine,Cardiovascular Disease,False,1089,1739,0.63,0.37
952,1457437782,"RICHARDSON, THOMAS",Internal Medicine,Cardiovascular Disease,False,902,1708,0.53,0.47
1766,1831281245,"FRIESINGER, G.",Internal Medicine,Cardiovascular Disease,False,992,1699,0.58,0.42
776,1386663144,"THOMPSON, THOMAS",Internal Medicine,Cardiovascular Disease,False,717,1213,0.59,0.41
187,1104859115,"KOLLI, MURALI",Internal Medicine,Cardiovascular Disease,False,523,1164,0.45,0.55
1752,1831122860,"MANDA, RAVINDER",Internal Medicine,Cardiovascular Disease,False,554,1090,0.51,0.49
36,1013958776,"ABU-HALIMAH, AHMAD",Internal Medicine,Cardiovascular Disease,False,456,1051,0.43,0.57


In [130]:
#we need to know doctors' affiliation

query = """
SELECT *
FROM affiliations_from

"""
with sqlite3.connect('data/hopteam_broad.sqlite') as db: 
    aff_from_sqlite = pd.read_sql(query, db)
    
aff_from_sqlite

Unnamed: 0,from_npi,affiliation_from
0,1821080961,NOT IN NASHVILLE
1,1093741464,HCA
2,1295726032,NOT IN NASHVILLE
3,1679677199,NOT IN NASHVILLE
4,1841282779,NOT IN NASHVILLE
...,...,...
38945,1912301771,NOT IN NASHVILLE
38946,1508880493,NO MAJOR AFFILIATION/OTHER
38947,1134143027,NOT IN NASHVILLE
38948,1326550674,NO MAJOR AFFILIATION/OTHER


In [131]:
#merging affiliation with the providers' dataset

prov_aff=pd.merge(top_prov, aff_from_sqlite)
prov_aff

Unnamed: 0,from_npi,name,Classification_provider,Specialization_provider,to_vandy,patient_count,patient_count_overall,ref_perc_not_vandy,ref_perc_to_vandy,affiliation_from
0,1467491670,"HUGHES, SEAN",Internal Medicine,Cardiovascular Disease,False,897,3249,0.28,0.72,VANDERBILT
1,1679665095,"PRUDOFF, ADAM",Internal Medicine,Cardiovascular Disease,False,1154,2652,0.44,0.56,VANDERBILT
2,1255488052,"ANDERSON, BRENT",Internal Medicine,Cardiovascular Disease,False,1205,1968,0.61,0.39,VANDERBILT
3,1942203849,"FRANKLIN, JERRY",Internal Medicine,Cardiovascular Disease,False,1089,1739,0.63,0.37,VANDERBILT
4,1457437782,"RICHARDSON, THOMAS",Internal Medicine,Cardiovascular Disease,False,902,1708,0.53,0.47,VANDERBILT
5,1831281245,"FRIESINGER, G.",Internal Medicine,Cardiovascular Disease,False,992,1699,0.58,0.42,LIFEPOINT
6,1386663144,"THOMPSON, THOMAS",Internal Medicine,Cardiovascular Disease,False,717,1213,0.59,0.41,SAINT THOMAS ASCENSION
7,1104859115,"KOLLI, MURALI",Internal Medicine,Cardiovascular Disease,False,523,1164,0.45,0.55,VANDERBILT
8,1831122860,"MANDA, RAVINDER",Internal Medicine,Cardiovascular Disease,False,554,1090,0.51,0.49,VANDERBILT
9,1013958776,"ABU-HALIMAH, AHMAD",Internal Medicine,Cardiovascular Disease,False,456,1051,0.43,0.57,VANDERBILT


In [132]:
#renaming columns we are interested in

prov_aff=prov_aff.rename(columns ={'Classification_provider': 'Classification',
                                           'Specialization_provider': 'Specialization',
                                            'patient_count_overall': 'Total Patient Count',
                                            'ref_perc_to_vandy': 'Referral Percentage to Vanderlit',
                                           'affiliation_from': 'Affiliation',
                                           'name': 'Name'
    
})
prov_aff

Unnamed: 0,from_npi,Name,Classification,Specialization,to_vandy,patient_count,Total Patient Count,ref_perc_not_vandy,Referral Percentage to Vanderlit,Affiliation
0,1467491670,"HUGHES, SEAN",Internal Medicine,Cardiovascular Disease,False,897,3249,0.28,0.72,VANDERBILT
1,1679665095,"PRUDOFF, ADAM",Internal Medicine,Cardiovascular Disease,False,1154,2652,0.44,0.56,VANDERBILT
2,1255488052,"ANDERSON, BRENT",Internal Medicine,Cardiovascular Disease,False,1205,1968,0.61,0.39,VANDERBILT
3,1942203849,"FRANKLIN, JERRY",Internal Medicine,Cardiovascular Disease,False,1089,1739,0.63,0.37,VANDERBILT
4,1457437782,"RICHARDSON, THOMAS",Internal Medicine,Cardiovascular Disease,False,902,1708,0.53,0.47,VANDERBILT
5,1831281245,"FRIESINGER, G.",Internal Medicine,Cardiovascular Disease,False,992,1699,0.58,0.42,LIFEPOINT
6,1386663144,"THOMPSON, THOMAS",Internal Medicine,Cardiovascular Disease,False,717,1213,0.59,0.41,SAINT THOMAS ASCENSION
7,1104859115,"KOLLI, MURALI",Internal Medicine,Cardiovascular Disease,False,523,1164,0.45,0.55,VANDERBILT
8,1831122860,"MANDA, RAVINDER",Internal Medicine,Cardiovascular Disease,False,554,1090,0.51,0.49,VANDERBILT
9,1013958776,"ABU-HALIMAH, AHMAD",Internal Medicine,Cardiovascular Disease,False,456,1051,0.43,0.57,VANDERBILT


In [133]:
#25 providers with 25-75% referral to vanderbilt

prov_aff[['Name', 'Affiliation','Classification','Specialization','Total Patient Count', 'Referral Percentage to Vanderlit']].sort_values('Total Patient Count', ascending=False)

Unnamed: 0,Name,Affiliation,Classification,Specialization,Total Patient Count,Referral Percentage to Vanderlit
0,"HUGHES, SEAN",VANDERBILT,Internal Medicine,Cardiovascular Disease,3249,0.72
1,"PRUDOFF, ADAM",VANDERBILT,Internal Medicine,Cardiovascular Disease,2652,0.56
2,"ANDERSON, BRENT",VANDERBILT,Internal Medicine,Cardiovascular Disease,1968,0.39
3,"FRANKLIN, JERRY",VANDERBILT,Internal Medicine,Cardiovascular Disease,1739,0.37
4,"RICHARDSON, THOMAS",VANDERBILT,Internal Medicine,Cardiovascular Disease,1708,0.47
5,"FRIESINGER, G.",LIFEPOINT,Internal Medicine,Cardiovascular Disease,1699,0.42
6,"THOMPSON, THOMAS",SAINT THOMAS ASCENSION,Internal Medicine,Cardiovascular Disease,1213,0.41
7,"KOLLI, MURALI",VANDERBILT,Internal Medicine,Cardiovascular Disease,1164,0.55
8,"MANDA, RAVINDER",VANDERBILT,Internal Medicine,Cardiovascular Disease,1090,0.49
9,"ABU-HALIMAH, AHMAD",VANDERBILT,Internal Medicine,Cardiovascular Disease,1051,0.57


In [134]:
#formatting percentage 
prov_aff['Referral Percentage to Vanderlit']= (prov_aff['Referral Percentage to Vanderlit']*100).astype(int).astype(str).add('%') 

In [135]:
prov_aff[['Name', 'Affiliation','Classification','Specialization','Total Patient Count', 'Referral Percentage to Vanderlit']].sort_values('Total Patient Count', ascending=False)

Unnamed: 0,Name,Affiliation,Classification,Specialization,Total Patient Count,Referral Percentage to Vanderlit
0,"HUGHES, SEAN",VANDERBILT,Internal Medicine,Cardiovascular Disease,3249,72%
1,"PRUDOFF, ADAM",VANDERBILT,Internal Medicine,Cardiovascular Disease,2652,56%
2,"ANDERSON, BRENT",VANDERBILT,Internal Medicine,Cardiovascular Disease,1968,39%
3,"FRANKLIN, JERRY",VANDERBILT,Internal Medicine,Cardiovascular Disease,1739,37%
4,"RICHARDSON, THOMAS",VANDERBILT,Internal Medicine,Cardiovascular Disease,1708,47%
5,"FRIESINGER, G.",LIFEPOINT,Internal Medicine,Cardiovascular Disease,1699,42%
6,"THOMPSON, THOMAS",SAINT THOMAS ASCENSION,Internal Medicine,Cardiovascular Disease,1213,41%
7,"KOLLI, MURALI",VANDERBILT,Internal Medicine,Cardiovascular Disease,1164,55%
8,"MANDA, RAVINDER",VANDERBILT,Internal Medicine,Cardiovascular Disease,1090,49%
9,"ABU-HALIMAH, AHMAD",VANDERBILT,Internal Medicine,Cardiovascular Disease,1051,57%
