In [90]:
import pandas as pd
import sqlite3
import numpy as np

### First, build a profile of providers referring patients to the major hospitals in Nashville. Are certain specialties more likely to refer to a particular hospital over the others?

In [None]:
#Check the classification and the corresponding counts

In [91]:
#Find all the provider hospital(Entity Type =2) refererring to Nashville area 
#hospitals(hospitals can be identified with classification code = General Acute Care Hospital)

#Create a CTE
sql = """
WITH CTE_referers AS (
        SELECT distinct npi, organization_name  FROM npi 
        LEFT JOIN taxonomy  USING(taxonomy_code)
        WHERE location_address_state_name = 'TN' 
                AND entity_type_code = 2 
                AND location_address_city_name LIKE 'Nashville%' 
                AND classification = 'General Acute Care Hospital'
)
SELECT 
        h.from_npi
        ,h.to_npi
        ,h.patient_count
        ,cte.organization_name
        ,n.last_name  
        ,n.first_name   
        ,n.location_address_city_name 
        ,n.location_address_state_name 
        ,n.location_address_postal_code 
        ,n.entity_type_code 
        ,t.taxonomy_code 
        ,t.grouping  
        ,t.classification  
        ,t.specialization 
        ,t.display_name 
FROM hop h 
INNER JOIN npi n on h.from_npi = n.npi
inner join CTE_referers cte on cte.npi = h.to_npi
 LEFT JOIN taxonomy t USING(taxonomy_code)
WHERE h.transaction_count >= 50
        AND h.average_day_wait < 50
        And entity_type_code < 2
"""


In [92]:
with sqlite3.connect('../data/hop_db.sqlite') as db: 
    df_referrers = pd.read_sql(sql, db)

In [93]:
df_referrers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5248 entries, 0 to 5247
Data columns (total 15 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   from_npi                      5248 non-null   int64  
 1   to_npi                        5248 non-null   int64  
 2   patient_count                 5248 non-null   int64  
 3   organization_name             5248 non-null   object 
 4   last_name                     5248 non-null   object 
 5   first_name                    5248 non-null   object 
 6   location_address_city_name    5248 non-null   object 
 7   location_address_state_name   5248 non-null   object 
 8   location_address_postal_code  5248 non-null   object 
 9   entity_type_code              5248 non-null   float64
 10  taxonomy_code                 5248 non-null   object 
 11  grouping                      5248 non-null   object 
 12  classification                5248 non-null   object 
 13  spe

In [94]:
df_referrers['entity_type_code'].value_counts()

1.0    5248
Name: entity_type_code, dtype: int64

In [95]:
#find top referred hospitals in nashville by patient count
sql = """
WITH cte_referrers AS (
        SELECT distinct npi, organization_name  FROM npi 
        LEFT JOIN taxonomy  USING(taxonomy_code)
        WHERE location_address_state_name = 'TN' 
                AND entity_type_code = 2 
                AND location_address_city_name LIKE 'Nashville%' 
                AND classification = 'General Acute Care Hospital'
)
SELECT n.organization_name as hospital, SUM(patient_count) tot_patients
FROM hop h
        INNER JOIN npi n ON  h.to_npi = n.npi
        INNER JOIN cte_referrers cte ON h.to_npi = cte.npi
GROUP BY n.organization_name
ORDER BY 2 desc
"""

with sqlite3.connect('../data/hop_db.sqlite') as db: 
    df_nash_top_hosp = pd.read_sql(sql, db)

In [96]:
df_nash_top_hosp

Unnamed: 0,hospital,tot_patients
0,VANDERBILT UNIVERSITY MEDICAL CENTER,1352091
1,SAINT THOMAS WEST HOSPITAL,827346
2,"HCA HEALTH SERVICES OF TENNESSEE, INC.",678400
3,HTI MEMORIAL HOSPITAL CORPORATION,225244
4,NASHVILLE GENERAL HOSPITAL,16642
5,SETON CORPORATION,3294


In [97]:
#Find referrer by speciality
sql = """
WITH cte_referrers AS (
        SELECT distinct npi, organization_name  FROM npi 
        LEFT JOIN taxonomy  USING(taxonomy_code)
        WHERE location_address_state_name = 'TN' 
                AND entity_type_code = 2 
                AND location_address_city_name LIKE 'Nashville%' 
                AND classification = 'General Acute Care Hospital'
)
SELECT 
        t.display_name AS specialty_referrer,
        cte.organization_name AS organization_name_referred,
        COUNT(cte.organization_name) as count_referred
FROM hop h 
INNER JOIN npi n  ON h.from_npi = n.npi
LEFT JOIN taxonomy t USING(taxonomy_code)
INNER JOIN cte_referrers cte ON h.to_npi = cte.npi
WHERE transaction_count >= 50
AND average_day_wait < 50
    AND n.entity_type_code < 2   
GROUP BY organization_name_referred, specialty_referrer
ORDER BY 3 DESC
"""

with sqlite3.connect('../data/hop_db.sqlite') as db: 
    df_referrer_speciality = pd.read_sql(sql, db)

In [98]:
df_referrer_speciality

Unnamed: 0,specialty_referrer,organization_name_referred,count_referred
0,Internal Medicine Physician,VANDERBILT UNIVERSITY MEDICAL CENTER,319
1,Family Medicine Physician,VANDERBILT UNIVERSITY MEDICAL CENTER,203
2,Internal Medicine Physician,SAINT THOMAS WEST HOSPITAL,191
3,Diagnostic Radiology Physician,"HCA HEALTH SERVICES OF TENNESSEE, INC.",163
4,Family Nurse Practitioner,VANDERBILT UNIVERSITY MEDICAL CENTER,146
...,...,...,...
395,Pediatric Orthopaedic Surgery Physician,VANDERBILT UNIVERSITY MEDICAL CENTER,1
396,Pediatric Pathology Physician,VANDERBILT UNIVERSITY MEDICAL CENTER,1
397,Pediatric Radiology Physician,VANDERBILT UNIVERSITY MEDICAL CENTER,1
398,Sports Medicine (Family Medicine) Physician,VANDERBILT UNIVERSITY MEDICAL CENTER,1


In [61]:
df_referrers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5248 entries, 0 to 5247
Data columns (total 15 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   from_npi                      5248 non-null   int64  
 1   to_npi                        5248 non-null   int64  
 2   patient_count                 5248 non-null   int64  
 3   organization_name             5248 non-null   object 
 4   last_name                     5248 non-null   object 
 5   first_name                    5248 non-null   object 
 6   location_address_city_name    5248 non-null   object 
 7   location_address_state_name   5248 non-null   object 
 8   location_address_postal_code  5248 non-null   object 
 9   entity_type_code              5248 non-null   float64
 10  taxonomy_code                 5248 non-null   object 
 11  grouping                      5248 non-null   object 
 12  classification                5248 non-null   object 
 13  spe

In [None]:
df.head(100)

In [13]:
#AK
#Find the Referrer and referred
sql = """
WITH cte_nash_hosp AS (
        SELECT distinct npi, organization_name, display_name  FROM npi 
        LEFT JOIN taxonomy  USING(taxonomy_code)
        WHERE location_address_state_name = 'TN' 
                AND entity_type_code = 2 
                AND location_address_city_name LIKE 'Nashville%' 
                AND classification = 'General Acute Care Hospital'
)
SELECT 
        h.from_npi 
        ,h.patient_count
        ,h.transaction_count
        ,h.average_day_wait
        ,h.std_day_wait
        ,t.display_name as from_speciality
        ,cte.npi as to_npi
        ,cte.organization_name as to_organization_name
        ,cte.display_name as to_speciality
FROM hop h 
        INNER JOIN npi fn  ON h.from_npi = fn.npi
LEFT JOIN taxonomy t USING(taxonomy_code)
        INNER JOIN cte_nash_hosp cte ON h.to_npi = cte.npi
WHERE transaction_count >= 50
        AND average_day_wait < 50
        AND fn.entity_type_code < 2  
"""

with sqlite3.connect('../data/hop_db.sqlite') as db: 
    df_ref = pd.read_sql(sql, db)

In [14]:
df_ref

Unnamed: 0,from_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_speciality,to_npi,to_organization_name,to_speciality
0,1679521090,36,62,10.306,39.479,Hematology & Oncology Physician,1992861314,SETON CORPORATION,General Acute Care Hospital
1,1043232879,47,55,18.727,44.898,Diagnostic Radiology Physician,1992861314,SETON CORPORATION,General Acute Care Hospital
2,1174565543,69,143,3.273,26.756,Hematology & Oncology Physician,1992861314,SETON CORPORATION,General Acute Care Hospital
3,1942254883,52,65,13.154,32.501,Diagnostic Radiology Physician,1992861314,SETON CORPORATION,General Acute Care Hospital
4,1194751354,37,64,12.641,38.231,Medical Oncology Physician,1992861314,SETON CORPORATION,General Acute Care Hospital
...,...,...,...,...,...,...,...,...,...
5243,1952542698,33,58,26.172,24.077,Podiatrist,1558408633,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital
5244,1194812297,70,163,1.816,16.108,Psychiatric/Mental Health Nurse Practitioner,1558408633,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital
5245,1225372733,41,57,6.684,19.407,Certified Registered Nurse Anesthetist,1558408633,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital
5246,1962596734,53,94,0.638,6.189,Psychiatry Physician,1558408633,VANDERBILT UNIVERSITY MEDICAL CENTER,General Acute Care Hospital


In [49]:
df_ref.groupby(['to_organization_name', 'from_speciality'])['from_npi'].count().reset_index()

Unnamed: 0,to_organization_name,from_speciality,from_npi
0,"HCA HEALTH SERVICES OF TENNESSEE, INC.",Acute Care Nurse Practitioner,18
1,"HCA HEALTH SERVICES OF TENNESSEE, INC.",Adolescent Medicine (Internal Medicine) Physician,1
2,"HCA HEALTH SERVICES OF TENNESSEE, INC.",Adult Health Nurse Practitioner,11
3,"HCA HEALTH SERVICES OF TENNESSEE, INC.",Adult Medicine Physician,1
4,"HCA HEALTH SERVICES OF TENNESSEE, INC.",Adult Reconstructive Orthopaedic Surgery Physi...,2
...,...,...,...
395,VANDERBILT UNIVERSITY MEDICAL CENTER,Urology Physician,26
396,VANDERBILT UNIVERSITY MEDICAL CENTER,Vascular & Interventional Radiology Physician,5
397,VANDERBILT UNIVERSITY MEDICAL CENTER,Vascular Neurology Physician,2
398,VANDERBILT UNIVERSITY MEDICAL CENTER,Vascular Surgery Physician,11


In [89]:
#AK find the top referred nsahville hospitals by from npi count
df1= df_ref.groupby(['to_organization_name', 'from_speciality'])['patient_count'].sum().reset_index().sort_values('patient_count', ascending = False)
df1 = df1.sort_values(by = ['from_speciality', 'patient_count'], ascending = [False, False])
#df1['from_speciality_Rank'] = df1['from_speciality'].rank(method='max')
#df1 = df1[df1['to_organization_name'] =='VANDERBILT UNIVERSITY MEDICAL CENTER']
#pd.pivot(data=df1,index=['from_speciality','to_organization_name'])

pd.pivot(df2, index=['to_organization_name'], columns = 'from_speciality',values = 'patient_count') #Reshape from long to wide
  

from_speciality,Acute Care Clinical Nurse Specialist,Acute Care Nurse Practitioner,Addiction Medicine (Internal Medicine) Physician,Addiction Medicine (Psychiatry & Neurology) Physician,Addiction Psychiatry Physician,Adolescent Medicine (Internal Medicine) Physician,Adult Congenital Heart Disease Physician,Adult Health Nurse Practitioner,Adult Medicine Physician,Adult Psychiatric/Mental Health Clinical Nurse Specialist,...,Therapeutic Radiology Physician,Thoracic Surgery (Cardiothoracic Vascular Surgery) Physician,Transplant Surgery Physician,Trauma Surgery Physician,Urology Physician,Vascular & Interventional Radiology Physician,Vascular Neurology Physician,Vascular Surgery Physician,Women's Health Clinical Nurse Specialist,Women's Health Nurse Practitioner
to_organization_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"HCA HEALTH SERVICES OF TENNESSEE, INC.",,2313.0,,,,30.0,,867.0,396.0,,...,,1157.0,61.0,,1610.0,2271.0,,1181.0,,34.0
HTI MEMORIAL HOSPITAL CORPORATION,166.0,618.0,,,,,,202.0,,,...,,241.0,,760.0,614.0,1739.0,,200.0,,328.0
NASHVILLE GENERAL HOSPITAL,,53.0,,,,,,,,,...,,,,,31.0,,,70.0,,
SAINT THOMAS WEST HOSPITAL,,4021.0,84.0,,,48.0,,677.0,,,...,218.0,865.0,142.0,,2098.0,172.0,389.0,1728.0,135.0,490.0
SETON CORPORATION,,,,,,,,,,,...,,,,,,,,,,
VANDERBILT UNIVERSITY MEDICAL CENTER,128.0,13136.0,,128.0,89.0,68.0,70.0,6305.0,81.0,47.0,...,,1530.0,880.0,2254.0,6519.0,1294.0,411.0,2754.0,,1373.0


In [57]:
#AK find the top referred Nashville hospitals by patient count
df_ref.groupby(['to_organization_name', 'from_speciality'])['from_npi'].count().reset_index().sort_values('from_npi', ascending = False)




Unnamed: 0,to_organization_name,from_speciality,from_npi
323,VANDERBILT UNIVERSITY MEDICAL CENTER,Internal Medicine Physician,319
298,VANDERBILT UNIVERSITY MEDICAL CENTER,Family Medicine Physician,203
199,SAINT THOMAS WEST HOSPITAL,Internal Medicine Physician,191
15,"HCA HEALTH SERVICES OF TENNESSEE, INC.",Diagnostic Radiology Physician,163
299,VANDERBILT UNIVERSITY MEDICAL CENTER,Family Nurse Practitioner,146
...,...,...,...
313,VANDERBILT UNIVERSITY MEDICAL CENTER,Health Psychologist,1
206,SAINT THOMAS WEST HOSPITAL,Neuroradiology Physician,1
79,HTI MEMORIAL HOSPITAL CORPORATION,Acute Care Clinical Nurse Specialist,1
310,VANDERBILT UNIVERSITY MEDICAL CENTER,Glaucoma Specialist (Ophthalmology) Physician,1


In [None]:
#find top referred hospitals in nashville by patient count
sql = """
WITH cte_nash_hosp AS (
        SELECT distinct npi, organization_name  FROM npi 
        LEFT JOIN taxonomy  USING(taxonomy_code)
        WHERE location_address_state_name = 'TN' 
                AND entity_type_code = 2 
                AND location_address_city_name LIKE 'Nashville%' 
                AND classification = 'General Acute Care Hospital'
)
SELECT n.organization_name as hospital, SUM(patient_count) tot_patients
FROM hop h
        INNER JOIN npi n ON  h.to_npi = n.npi
        INNER JOIN cte_nash_hosp cte ON h.to_npi = cte.npi
GROUP BY n.organization_name
ORDER BY 2 desc