In [None]:
import pandas as pd
import numpy as np
import pickle

#import plotly.express as px
#import plotly.graph_objects as go

In [None]:
with open('data/nash_hosp_referrers_b.pickle', 'rb') as file:
    nash_hosp_referrers_b = pickle.load(file)

In [None]:
nash_hosp_referrers_b['hospital'] = (
    nash_hosp_referrers_b['hospital']
    .str.replace('SETON CORPORATION', 'SAINT THOMAS')
    .str.replace('SAINT THOMAS WEST HOSPITAL', 'SAINT THOMAS')
    .str.replace('HCA HEALTH SERVICES OF TENNESSEE, INC.', 'HCA/TRISTAR')
    .str.replace('HTI MEMORIAL HOSPITAL CORPORATION', 'HCA/TRISTAR')
    .str.replace('NASHVILLE GENERAL HOSPITAL', 'NASHVILLE GENERAL')
    .str.replace('VANDERBILT UNIVERSITY MEDICAL CENTER', 'VUMC')
)

In [None]:
#add a column that calculates the total wait in days for each providers' referrals
nash_hosp_referrers_b['total_wait'] = (nash_hosp_referrers_b['transaction_count'] 
                                       * 
                                       nash_hosp_referrers_b['average_day_wait'])

In [None]:
# Create a table that aggregates referral information by hospital
hosp_summary_values_b = (
    nash_hosp_referrers_b
    .groupby('hospital')
    .agg(
        total_providers = ('from_npi', 'count'),
        total_transactions = ('transaction_count', 'sum'),
        total_patients = ('patient_count', 'sum'),
        total_states = ('location_address_state_name', 'nunique'),
        total_specialties = ('specialty', 'nunique'),
        total_wait = ('total_wait', 'sum')
    )
    .reset_index()
)

hosp_summary_values_b['avg_day_wait'] = round(hosp_summary_values_b['total_wait'] 
                                              / 
                                              hosp_summary_values_b['total_transactions'],
                                              2)

hosp_summary_values_b

In [None]:
# Plot of total referral transactions by hospital
df = hosp_summary_values_b.sort_values('total_transactions')

fig = px.bar(df,
             x='hospital', 
             y='total_transactions',
             color='hospital',
             labels = {'total_transactions' : 'Total Referral Transactions',
                       'hospital' : 'Hospital'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 tickangle = 45)

fig.update_layout(showlegend=False)

fig.update_layout(title_text="Referral Transactions by Hospital",
                    title_xanchor="auto",
                    title_font_size=18,
                    yaxis={'categoryorder':'category descending'}
                   )

fig.show()

In [None]:
#fig.write_image("data/visuals/hosp_transactions_2.png")

In [None]:
# Plot of total referring providers by hospital
df = hosp_summary_values_b.sort_values('total_providers')

fig = px.bar(df,
             x='hospital', 
             y='total_providers',
             color='hospital',
             labels = {'total_providers' : 'Total Referring Providers',
                       'hospital' : 'Hospital'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 tickangle = 45)

fig.update_layout(showlegend=False)

fig.update_layout(title_text="Referring Providers by Hospital",
                    title_xanchor="auto",
                    title_font_size=18,
                    yaxis={'categoryorder':'category descending'}
                   )


fig.show()

In [None]:
#fig.write_image("data/visuals/hosp_providers.png")

In [None]:
# Plot of total referred patients by hospital
df = hosp_summary_values_b.sort_values('total_patients')

fig = px.bar(df,
             x='hospital', 
             y='total_patients',
             color='hospital',
             labels = {'total_patients' : 'Total Patients Referred',
                       'hospital' : 'Hospital'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 tickangle = 45)

fig.update_layout(showlegend=False)

fig.update_layout(title_text="Referred Patients by Hospital",
                    title_xanchor="auto",
                    title_font_size=18,
                    yaxis={'categoryorder':'category descending'}
                   )

fig.show()

In [None]:
#fig.write_image("data/visuals/hosp_patients.png")

In [None]:
# Plot of total overall average wait time by hospital
df = hosp_summary_values_b.sort_values('avg_day_wait')

fig = px.bar(df,
             x='hospital', 
             y='avg_day_wait',
             color='hospital',
             labels = {'avg_day_wait' : 'Average Wait (days)',
                       'hospital' : 'Hospital'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 tickangle = 45)

fig.update_layout(showlegend=False)

fig.update_layout(title_text="Average Wait by Hospital",
                    title_xanchor="auto",
                    title_font_size=18,
                    yaxis={'categoryorder':'category descending'}
                   )

fig.show()

In [None]:
#fig.write_image("data/visuals/hosp_avg_wait.png")

In [None]:
# Aggregating by provider and hospital
top_hosp_ref = (nash_hosp_referrers_b
                .groupby(['from_npi',
                         'first_name',
                         'last_name',
                         'credential',
                         'location_address_state_name', 
                         'location_address_city_name', 
                         'specialty',
                         'sub_specialty',
                         'hospital'])
                .agg(total_transactions = ('transaction_count', 'sum'),
                     total_patients = ('patient_count', 'sum'),
                     total_wait = ('total_wait', 'sum')
                    )
                .reset_index())

top_hosp_ref

In [None]:

top_hosp_ref['hosp_top_referrals'] = (top_hosp_ref
                                      .groupby('hospital')['total_patients']
                                      .rank(ascending = False)
)

top_hosp_ref['from_npi'] = top_hosp_ref['from_npi'].astype(str)

In [None]:
#Create plot of the top 5 referring providers by total patient volume for each hospital
df = (top_hosp_ref
      .loc[top_hosp_ref['hosp_top_referrals'] <= 5]
      .sort_values(by= ['hospital', 'hosp_top_referrals'])
     )

fig = px.bar(df,
             x='from_npi', 
             y='total_patients',
             color='hospital',
             labels = {'total_patients' : 'Total Patients Referred',
                       'hospital' : 'Hospital',
                       'from_npi' : 'Provider NPI',
                       'first_name' : 'Provider First Name',
                       'last_name' : 'Provider Last Name',
                       'credential' : 'Credential',
                       'sub_specialty' : 'Sub-Specialty',
                       'total_transactions' : 'Total Referral Transactions'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             hover_name = 'hospital',
             hover_data={
                 'hospital' : False,
                 'first_name' : True,
                 'last_name' : True,
                 'credential' : True,
                 'sub_specialty' : True,
                 'total_transactions' : True
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 showticklabels=False)


fig.show()

In [None]:
# Defining service based specialties since these aren't "typical" provider types
serv_specialties = ['Anatomic Pathology & Clinical Pathology Physician', 
                    'Diagnostic Radiology Physician', 
                    'Emergency Medicine Physician']

In [None]:
# Do a version of the above visual without service-based specialties
top_hosp_ref['hosp_top_referrals_non_serv'] = (top_hosp_ref
                                      .loc[~top_hosp_ref['sub_specialty'].isin(serv_specialties)]
                                      .groupby('hospital')['total_patients']
                                      .rank(ascending = False)
)

In [None]:
df = (top_hosp_ref
      .loc[top_hosp_ref['hosp_top_referrals_non_serv'].isin([1, 2, 3, 4, 5])]
      .sort_values(by= ['hospital', 'hosp_top_referrals_non_serv'])
     )

fig = px.bar(df,
             x='from_npi', 
             y='total_patients',
             color='hospital',
             labels = {'total_patients' : 'Total Patients Referred',
                       'hospital' : 'Hospital',
                       'from_npi' : 'Provider NPI',
                       'first_name' : 'Provider First Name',
                       'last_name' : 'Provider Last Name',
                       'credential' : 'Credential',
                       'sub_specialty' : 'Sub-Specialty',
                       'total_transactions' : 'Total Referral Transactions'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             hover_name = 'hospital',
             hover_data={
                 'hospital' : False,
                 'first_name' : True,
                 'last_name' : True,
                 'credential' : True,
                 'sub_specialty' : True,
                 'total_transactions' : True
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 showticklabels=False)


fig.show()

In [None]:
#fig.write_html("data/visuals/hosp_top_ref_non_serv.html")

In [None]:
# Get a list of npi values for providers that have referred any patients to Vanderbilt
vandy_ref = nash_hosp_referrers_b.loc[nash_hosp_referrers_b['hospital'].str.contains("VUMC")]['from_npi'].unique().tolist()
vandy_ref

In [None]:
# Same as above, but without the service-based specialties
df = (
    nash_hosp_referrers_b
    .loc[(~nash_hosp_referrers_b['from_npi'].isin(vandy_ref))
         &
         (nash_hosp_referrers_b['location_address_state_name'] == 'TN')
         &
         (~nash_hosp_referrers_b['sub_specialty'].isin(serv_specialties))]
    .groupby(['from_npi',
                 'first_name',
                 'last_name',
                 'credential',
                 'location_address_state_name', 
                 'location_address_city_name', 
                 'specialty',
                 'sub_specialty'])
    .agg(total_transactions = ('transaction_count', 'sum'),
         total_patients = ('patient_count', 'sum'),
         total_wait = ('total_wait', 'sum')
               
          )
    .reset_index()
    .sort_values('total_patients', ascending = False)
    .head(25))

df['avg_day_wait'] = round(df['total_wait'] / df['total_transactions'], 2)

df['from_npi'] = df['from_npi'].astype(str)

fig = px.bar(df,
             x='from_npi', 
             y='total_patients',
             color='total_transactions',
             color_continuous_scale='Darkmint',
             labels = {'total_patients' : 'Non-VUMC Patients Referred',
                       'from_npi' : 'Provider NPI',
                       'first_name' : 'Provider First Name',
                       'last_name' : 'Provider Last Name',
                       'credential' : 'Credential',
                       'sub_specialty' : 'Sub-Specialty',
                       'total_transactions' : 'Non-VUMC Referral Transactions',
                       'avg_day_wait' : 'Average Referral Wait (days)'},
             hover_data={
                 'total_transactions' : True,
                 'avg_day_wait' : True,
                 'first_name' : True,
                 'last_name' : True,
                 'credential' : True,
                 'sub_specialty' : True
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 showticklabels=False)


fig.show()

In [None]:
#fig.write_html("data/visuals/individual_no_vandy_no_serv.html")

In [None]:
# Aggregating non-vandy referring providers by sub_specialty
df = (
    nash_hosp_referrers_b
    .loc[(~nash_hosp_referrers_b['from_npi'].isin(vandy_ref))
         &
         (nash_hosp_referrers_b['location_address_state_name'] == 'TN')
         &
         (~nash_hosp_referrers_b['sub_specialty'].isin(serv_specialties))]
    .groupby('sub_specialty')
    .agg(total_providers = ('from_npi', 'count'),
               total_transactions = ('transaction_count', 'sum'),
               total_patients = ('patient_count', 'sum'),
               total_states = ('location_address_state_name', 'nunique'),
               total_wait = ('total_wait', 'sum')
               
          )
    .reset_index()
    .sort_values('total_patients', ascending = False)
    .head(25))

df['avg_day_wait'] = round(df['total_wait'] / df['total_transactions'], 2)

fig = px.bar(df,
             x='sub_specialty', 
             y='total_patients',
             color='total_transactions',
             color_continuous_scale='Bluyl',
             labels = {'sub_specialty' : 'Sub-Specialty',
                       'total_patients' : 'Non-VUMC Patients Referred',
                       'total_transactions' : 'Non-VUMC Referral Transactions',
                       'avg_day_wait' : 'Average Referral Wait (days)'},
             hover_data={
                 'total_transactions' : True,
                 'avg_day_wait' : True
                 
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 showticklabels=False)


fig.show()

In [None]:
#fig.write_html("data/visuals/specialty_no_vandy_no_serv.html")

In [None]:
# Creating a visual of referral activity for referrals not going to Vanderbilt, grouped by specialty
df = (nash_hosp_referrers_b
          .loc[(~nash_hosp_referrers_b['hospital'].str.contains('VUMC'))
              &
              (~nash_hosp_referrers_b['sub_specialty'].isin(serv_specialties))]
          .groupby(['from_npi',
                    'first_name',
                    'last_name',
                    'credential',
                    'location_address_state_name', 
                    'location_address_city_name', 
                    'specialty',
                    'sub_specialty'])
          .agg(total_providers = ('from_npi', 'count'),
               total_transactions = ('transaction_count', 'sum'),
               total_patients = ('patient_count', 'sum'),
               total_states = ('location_address_state_name', 'nunique'),
               total_wait = ('total_wait', 'sum')
               
          )
          .reset_index()
          .sort_values('total_transactions', ascending = False))

df['avg_day_wait'] = round(df['total_wait'] / df['total_transactions'], 2)

fig = px.treemap(df, 
                 path = ['specialty','from_npi'], 
                 values = 'total_patients',
                 color = 'avg_day_wait',
                 color_continuous_scale = 'Emrld',
                 labels = {'avg_day_wait' : 'Average Referral Wait (days)'},
                custom_data= df[['sub_specialty',
                                 'specialty',
                                 'first_name',
                                 'last_name',
                                 'credential',
                                 'from_npi',
                                 'total_patients',
                                 'avg_day_wait',
                                 'total_transactions']]
                )

fig.data[0].hovertemplate = (
  '<b>Specialty: %{customdata[1]}</b>'
  '<br>' +
  '<br>' +
  'Sub-Specialty: %{customdata[0]}' + 
  '<br>' +
  'Provider NPI: %{customdata[5]}'+
  '<br>'+
  'Provider First Name: %{customdata[2]}'+
  '<br>'+
    'Provider Last Name: %{customdata[3]}'+
  '<br>'+
    'Provider Credential: %{customdata[4]}'+
  '<br>'+
    '# Non-VUMC Patients: %{customdata[6]}'+
  '<br>'+
    '# Non-VUMC Referral Transactions: %{customdata[8]}'+
  '<br>'+
    'Average Referral Wait (days): %{customdata[7]}'+
  '<br>'
)

fig.show()

In [None]:
#fig.write_html("data/visuals/vandy_specialty_tree_no_serv.html")

In [None]:
# create a df that indicates the number of referrals (transactions) made by practitioners of a given specialty to a given hospital
referring_specialties_b = (
    nash_hosp_referrers_b
    .groupby(['sub_specialty', 'hospital'])
    .agg(total_patients = ('patient_count', 'sum'),
         total_transactions = ('transaction_count', 'sum'))
    .reset_index()
    .sort_values(by = ['sub_specialty', 'total_transactions'], ascending = [True, False])
)

# rank the hospitals based on number of referrals for each specialty, rank of 1 means most referrals
referring_specialties_b['rank'] = (
    referring_specialties_b
    .groupby('sub_specialty')['total_transactions']
    .rank(ascending = False)
)

# create a column that creates a percent based comparison referrals within a given specialty
# null values in this column indicate there is no hospital receiving fewer referrals
# smaller values here indicate that a much smaller percentage of referrals are going to the hospital with the next higher rank
referring_specialties_b['percent_for_next'] = (
    round(100 * referring_specialties_b.groupby('sub_specialty')['total_transactions'].shift(-1) 
          / 
          referring_specialties_b['total_transactions'], 2)
)

referring_specialties_b

In [None]:
spec_pref = (
    referring_specialties_b
    .loc[(referring_specialties_b['rank'] == 1.0)
        &
        ((referring_specialties_b['percent_for_next'] < 50)
        | 
         (referring_specialties_b['percent_for_next'].isna()))]
    .sort_values('percent_for_next', na_position = 'first')
)

In [None]:
spec_pref.groupby('hospital').agg(total_specialties = ('sub_specialty', 'count'),
                                  total_transactions = ('total_transactions', 'sum'),
                                  total_patients = ('total_patients', 'sum')).reset_index()

In [None]:
# barchart showing the # of specialties sending at least 50% of referrals to 1 hospital
df = (spec_pref.groupby('hospital')
      .agg(total_specialties = ('sub_specialty', 'count'),
           total_transactions = ('total_transactions', 'sum'),
           total_patients = ('total_patients', 'sum'))
      .reset_index()
      .sort_values('total_specialties')
     )

fig = px.bar(df,
             x='hospital', 
             y='total_specialties',
             color='hospital',
             labels = {'total_patients' : 'Total Patients Referred',
                       'hospital' : 'Hospital',
                       'total_transactions' : 'Total Referral Transactions',
                      'total_specialties' : 'Total Specialties'},
             color_discrete_map={
                 "HCA/TRISTAR":"#AA1E2D",
                 "NASHVILLE GENERAL":"#FDE74C",
                 "SAINT THOMAS":"#8FB8DE",
                 "VUMC":"#9A94BC"
             },
             hover_name = 'hospital',
             hover_data={
                 'hospital' : False,
                 'total_transactions' : True,
                 'total_patients' : True
             },
             template="simple_white")

fig.update_xaxes(title=None,
                 tickangle = -45)

fig.update_layout(showlegend=False)


fig.show()

In [None]:
#fig.write_html("data/visuals/pref_spec.html")