In [None]:
import sqlite3
import pandas as pd
import numpy as np

Build a profile of providers referring patients to the major hospitals in Nashville. Are certain specialties more likely to refer to a particular hospital over the others?

In [None]:
query = """
SELECT npi, organization_name 
FROM npidata
LEFT JOIN taxonomy
USING(taxonomy_code)
WHERE entity_type_code = 2
AND location_address_city_name = 'NASHVILLE'
AND classification = 'General Acute Care Hospital'
"""

with sqlite3.connect('../data/hop_db.sqlite') as db: 
    nash_hosp = pd.read_sql(query, db)
    
db.close()

In [None]:
nash_hosp['organization_name'].value_counts()

In [None]:
query = """
WITH nash_hosp AS (
    SELECT npi AS hosp_npi, organization_name AS hospital
    FROM npidata
    LEFT JOIN taxonomy
    USING(taxonomy_code)
    WHERE entity_type_code = 2
    AND location_address_city_name = 'NASHVILLE'
    AND classification = 'General Acute Care Hospital'
)
SELECT from_npi, display_name AS specialty, transaction_count, hospital
FROM hop
INNER JOIN npidata
ON from_npi = npi
INNER JOIN nash_hosp
ON to_npi = hosp_npi
LEFT JOIN taxonomy
USING(taxonomy_code)
WHERE transaction_count >= 50
AND average_day_wait > 50
"""

with sqlite3.connect('../data/hop_db.sqlite') as db: 
    nash_hosp_referers = pd.read_sql(query, db)
    
db.close()

In [None]:
# create a df that indicates the number of referrals (transactions) made by practitioners of a given specialty to a given hospital
referring_specialties = (
    nash_hosp_referers
    .groupby(['specialty', 'hospital'])['transaction_count']
    .sum()
    .to_frame()
    .reset_index()
    .sort_values(by = ['specialty', 'transaction_count'], ascending = [True, False])
)

# rank the hospitals based on number of referrals for each specialty, rank of 1 means most referrals
referring_specialties['rank'] = (
    referring_specialties
    .groupby('specialty')['transaction_count']
    .rank(ascending = False)
)

# create a column that creates a percent based comparison referrals within a given specialty
# null values in this column indicate there is no hospital receiving fewer referrals
# smaller values here indicate that a much smaller percentage of referrals are going to the hospital with the next higher rank
referring_specialties['percent_for_next'] = (
    round(100 * referring_specialties.groupby('specialty')['transaction_count'].shift(-1) 
          / 
          referring_specialties['transaction_count'], 2)
)

referring_specialties

In [None]:
# Quick look at how many times hospitals rank #1 for referrals from different specialties
referring_specialties.loc[referring_specialties['rank'] == 1.0]['hospital'].value_counts()

In [None]:
# no looking for hospitals ranked #1 and sorting to see those that either have NO competition
# meaning no other hospitals receive referrals from this specialty, or where there is a step drop in the number of 
# referrals to the #2 ranked hospital
(
    referring_specialties
    .loc[referring_specialties['rank'] == 1.0]
    .sort_values('percent_for_next', na_position = 'first')
    .head(60)
)