In [1]:
import pandas as pd
import pyodbc
import openpyxl
import os
import re
import math
import numpy as np

In [2]:
def open_all_period_vizient_rankings(file_obj):
    file_loc = os.path.abspath(os.path.join(file_obj))
    wb = openpyxl.load_workbook(file_loc, data_only = True)
    ws = wb['all_period_vizient_rankings_up_']
    return(ws)

In [3]:
def filter_all_period_vizient_rankings(openpyxl_ws_obj,year_var,period_var,ccmc_10_var,camc_10_var,lsccmc_10_var,critacc_10_var):
    # store worksheet object as pandas dataframe
    df = pd.DataFrame(ws_obj.values)
    #rename column headers using first row at index 0
    df = df.rename(columns=df.iloc[0])
    #drop first row at index 0
    df = df.drop([0])
    
    #only year and period we want
    df2 = df[(df['YEAR'] == year_var) & (df['PERIOD'] == period_var)]
    
    #only columns we need
    
    df3 = df2[['COHORT','HCO_SHORT_NAME','PERIOD','Parent_ID','Patct_Rank','YEAR','eff_Rank','eff_wt_score','effect_Rank','effect_wt_score','eq_rank','eq_wt_score','final_Rank','final_score','mort_Rank','mort_wt_score','patct_wt_score','safety_Rank','safety_wt_score']]
    
    #only top decile row per cohort
    
    df_filtered = df3[(df3['COHORT'] == 'Complex Care Medical Center') & (df3['final_Rank'] == ccmc_10_var) | \
        (df3['COHORT'] == 'Comprehensive Academic Medical Center') & (df3['final_Rank'] == camc_10_var) | \
        (df3['COHORT'] == 'Large Specialized Complex Care Medical Center') & (df3['final_Rank'] == lsccmc_10_var) | \
       (df3['COHORT'] == 'Critical Access') & (df3['final_Rank'] == critacc_10_var)]
    
    
    return(df_filtered)

## Set filepath to the filepath/filename of the unlocked all_period_vizient_rankings file. This is the same file that we use in the Vizient Q&A Hospital Rankings Dashboard.  This file should not be moved and the file name should not be changed because, currently, the  Vizient Q&A Hospital Rankings Dashboard data source points to this file.

## Ask the Goals Director which top decile rank should be used for each cohort.  Typically, this is simply round(# of hospitals in cohort/10).  However, sometimes there are ties between hospitals and nuance around simple rounding given past year performance.

In [4]:
#set variables
#set file path to unlocked all_period_vizient_rankings.xlsx file
#filepath including filename
path = r"P:\Datastore02\Analytics\230 Inpatient Quality Composite\data\calculator data\original_unlocked\all_period_rankings\all_period_vizient_rankings.xlsx"

#set cohort top-decile ranks 
ccmc_10 = 19
camc_10 = 12
lsccmc_10 = 15
critacc_10 = 17
baseline_year = '2023'
baseline_period = 'PERIOD3'


# Set Baseline Calculator ID for querying
calc_id = 19 #2023 Q&A calculator Period 3 



In [5]:
# open the excel file and return a openpyxl worksheet object
ws_obj = open_all_period_vizient_rankings(path)

In [7]:
# take the openpyxl worksheet object, and clean up
# rename headers
# filter to just baseline year and period
# filter columns
# filter to just top-decile for each cohort (not including Community as of 2023)
filtered_top_decile_df = filter_all_period_vizient_rankings(ws_obj,baseline_year,baseline_period,ccmc_10_var = ccmc_10,camc_10_var = camc_10,lsccmc_10_var = lsccmc_10,critacc_10_var = critacc_10)

In [8]:
#print for validation
filtered_top_decile_df.head()

Unnamed: 0,COHORT,HCO_SHORT_NAME,PERIOD,Parent_ID,Patct_Rank,YEAR,eff_Rank,eff_wt_score,effect_Rank,effect_wt_score,eq_rank,eq_wt_score,final_Rank,final_score,mort_Rank,mort_wt_score,patct_wt_score,safety_Rank,safety_wt_score
13301,Critical Access,MAYOCLINIC_SPARTA,PERIOD3,521305,138,2023,13,0.0525,91,0.0947595,,,17,0.690205,1,0.263,0.0169454,1,0.263
13578,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,2023,135,0.0403047,88,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
13760,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,2023,26,0.0699127,27,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571
13909,Comprehensive Academic Medical Center,UCIRVINE,PERIOD3,50348,42,2023,69,0.0453586,17,0.12561,93.0,0.0439366,12,0.666522,7,0.20133,0.093915,19,0.156372


### Query the database.  Get ids.  Note:  You should have already updated the database to include all hospitals through baseline time period and updated the calc_hospital_cohort table to include the baseline time period

---

## Baseline Period Top-Decile Hospital Ranks Query, preprocessing, and inserts

In [9]:
# query the db to get period id
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                      'Trusted_Connection=yes;')

baseline_period_nm_cohort_assignments = """
SELECT
h.hospital_id
,h.hospital_name
,CASE WHEN c.cohort_name = 'Critical Access & Small Community' THEN 'Critical Access' ELSE c.cohort_name END as cohort_name
,chc.calc_id
,0 as period_id
FROM
NM_Analytics_Prototype.vizient_qa.calc_hospital_cohort as chc
JOIN NM_Analytics_Prototype.vizient_qa.hospitals as h
on h.hospital_id = chc.hospital_id
LEFT JOIN NM_Analytics_Prototype.vizient_qa.cohort as c
on c.cohort_id = chc.hospital_cohort_id
WHERE
chc.calc_id = %s
and
h.hospital_id
in
(
23 --NORTHWESTERN_MEMORIAL
,112 --NORTHWESTERN_CDH
,219 --NORTHWESTERN_LAKEFOREST
,221 --NORTHWESTERN_DELNOR
,222 --NORTHWESTERN_KISH
,356 --VALLEY_WEST_COMMUNITY_HOSPITAL
,471 --NORTHWESTERN_HUNTLEY
,472 --NORTHWESTERN_MCHENRY
,694 --NORTHWESTERN_PALOS
)
""" % calc_id

nm_baseline_hosp_cohorts = pd.DataFrame(pd.read_sql(baseline_period_nm_cohort_assignments, conn))

conn.close()

In [10]:
#left join top-decile df to database query results on cohort_name/COHORT
#join hospital/cohort info for baseline time period with cohort top-decile performance
joined_df = nm_baseline_hosp_cohorts.merge(filtered_top_decile_df, left_on='cohort_name', right_on='COHORT',how='left')

In [11]:
joined_df

Unnamed: 0,hospital_id,hospital_name,cohort_name,calc_id,period_id,COHORT,HCO_SHORT_NAME,PERIOD,Parent_ID,Patct_Rank,...,effect_wt_score,eq_rank,eq_wt_score,final_Rank,final_score,mort_Rank,mort_wt_score,patct_wt_score,safety_Rank,safety_wt_score
0,23,NORTHWESTERN_MEMORIAL,Comprehensive Academic Medical Center,19,0,Comprehensive Academic Medical Center,UCIRVINE,PERIOD3,50348,42,...,0.12561,93.0,0.0439366,12,0.666522,7,0.20133,0.093915,19,0.156372
1,112,NORTHWESTERN_CDH,Large Specialized Complex Care Medical Center,19,0,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,...,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571
2,219,NORTHWESTERN_LAKEFOREST,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
3,221,NORTHWESTERN_DELNOR,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
4,222,NORTHWESTERN_KISH,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
5,356,VALLEY_WEST_COMMUNITY_HOSPITAL,Critical Access,19,0,Critical Access,MAYOCLINIC_SPARTA,PERIOD3,521305,138,...,0.0947595,,,17,0.690205,1,0.263,0.0169454,1,0.263
6,471,NORTHWESTERN_HUNTLEY,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
7,472,NORTHWESTERN_MCHENRY,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
8,694,NORTHWESTERN_PALOS,Large Specialized Complex Care Medical Center,19,0,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,...,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571


In [None]:
# insert top-decile hospital ranks into calc_hospital_values
# measure_value_id = 8  #Top Decile Target Hospital Ranking

In [78]:
def insert_top_decile_hosp_ranks_from_df(df):
    # connect to the NM_Analytics database
    conn = pyodbc.connect('Driver={SQL Server};'
                          'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                          'Database=NM_Analytics_Prototype;'
                          'Trusted_Connection=yes;')

    cursor = conn.cursor()
    # iterate over df rows and insert into NM_Analytics_Prototype.vizient_qa.datadump
    for index, row in df.iterrows():
        if row['final_Rank'] != 'Missing':
            cursor.execute(
                "INSERT INTO NM_Analytics_Prototype.vizient_qa.calc_hospital_values([calc_id],[hospital_id],[period_id],[measure_value_id],[hospital_value]) values (?,?,?,?,?)",
                row['calc_id'], row['hospital_id'], row['period_id'],8,row['final_Rank'])
            conn.commit()
            

    cursor.close()
    conn.close()
    print('done inserting rows.')

In [79]:
insert_top_decile_hosp_ranks_from_df(joined_df)

done inserting rows.


---

## Baseline Period Top-Decile Hospital Domain Ranks and Domain Scores preprocessing and data inserts

In [12]:
# Domain IDs (NM_Analytics_Prototype.vizient_qa.domain)
'''
1 Effectiveness
2 Efficiency
3 Equity
4 Mortality
5 Outpatient
6 Patient Centeredness
7 Safety
'''

'\n1 Effectiveness\n2 Efficiency\n3 Equity\n4 Mortality\n5 Outpatient\n6 Patient Centeredness\n7 Safety\n'

In [13]:
joined_df

Unnamed: 0,hospital_id,hospital_name,cohort_name,calc_id,period_id,COHORT,HCO_SHORT_NAME,PERIOD,Parent_ID,Patct_Rank,...,effect_wt_score,eq_rank,eq_wt_score,final_Rank,final_score,mort_Rank,mort_wt_score,patct_wt_score,safety_Rank,safety_wt_score
0,23,NORTHWESTERN_MEMORIAL,Comprehensive Academic Medical Center,19,0,Comprehensive Academic Medical Center,UCIRVINE,PERIOD3,50348,42,...,0.12561,93.0,0.0439366,12,0.666522,7,0.20133,0.093915,19,0.156372
1,112,NORTHWESTERN_CDH,Large Specialized Complex Care Medical Center,19,0,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,...,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571
2,219,NORTHWESTERN_LAKEFOREST,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
3,221,NORTHWESTERN_DELNOR,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
4,222,NORTHWESTERN_KISH,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
5,356,VALLEY_WEST_COMMUNITY_HOSPITAL,Critical Access,19,0,Critical Access,MAYOCLINIC_SPARTA,PERIOD3,521305,138,...,0.0947595,,,17,0.690205,1,0.263,0.0169454,1,0.263
6,471,NORTHWESTERN_HUNTLEY,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
7,472,NORTHWESTERN_MCHENRY,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
8,694,NORTHWESTERN_PALOS,Large Specialized Complex Care Medical Center,19,0,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,...,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571


In [14]:
joined_df.columns

Index(['hospital_id', 'hospital_name', 'cohort_name', 'calc_id', 'period_id',
       'COHORT', 'HCO_SHORT_NAME', 'PERIOD', 'Parent_ID', 'Patct_Rank', 'YEAR',
       'eff_Rank', 'eff_wt_score', 'effect_Rank', 'effect_wt_score', 'eq_rank',
       'eq_wt_score', 'final_Rank', 'final_score', 'mort_Rank',
       'mort_wt_score', 'patct_wt_score', 'safety_Rank', 'safety_wt_score'],
      dtype='object')

In [15]:
joined_df

Unnamed: 0,hospital_id,hospital_name,cohort_name,calc_id,period_id,COHORT,HCO_SHORT_NAME,PERIOD,Parent_ID,Patct_Rank,...,effect_wt_score,eq_rank,eq_wt_score,final_Rank,final_score,mort_Rank,mort_wt_score,patct_wt_score,safety_Rank,safety_wt_score
0,23,NORTHWESTERN_MEMORIAL,Comprehensive Academic Medical Center,19,0,Comprehensive Academic Medical Center,UCIRVINE,PERIOD3,50348,42,...,0.12561,93.0,0.0439366,12,0.666522,7,0.20133,0.093915,19,0.156372
1,112,NORTHWESTERN_CDH,Large Specialized Complex Care Medical Center,19,0,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,...,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571
2,219,NORTHWESTERN_LAKEFOREST,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
3,221,NORTHWESTERN_DELNOR,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
4,222,NORTHWESTERN_KISH,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
5,356,VALLEY_WEST_COMMUNITY_HOSPITAL,Critical Access,19,0,Critical Access,MAYOCLINIC_SPARTA,PERIOD3,521305,138,...,0.0947595,,,17,0.690205,1,0.263,0.0169454,1,0.263
6,471,NORTHWESTERN_HUNTLEY,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
7,472,NORTHWESTERN_MCHENRY,Complex Care Medical Center,19,0,Complex Care Medical Center,NORTHSHORE_HIGHLANDPARK,PERIOD3,149610,9,...,0.105355,134.0,0.0452813,19,0.652499,20,0.185266,0.13116,66,0.145131
8,694,NORTHWESTERN_PALOS,Large Specialized Complex Care Medical Center,19,0,Large Specialized Complex Care Medical Center,NORTHWESTERN_CDH,PERIOD3,140242,63,...,0.119589,118.0,0.0453698,15,0.677723,27,0.178373,0.0839071,8,0.180571


In [16]:
# pivot format from wide to long
long_joined_df = pd.melt(joined_df, id_vars=['calc_id','hospital_id','period_id'], value_vars=['Patct_Rank', \
       'eff_Rank', 'eff_wt_score', 'effect_Rank', 'effect_wt_score', 'eq_rank', \
       'eq_wt_score', 'mort_Rank', \
       'mort_wt_score', 'patct_wt_score', 'safety_Rank', 'safety_wt_score'])

In [17]:
long_joined_df

Unnamed: 0,calc_id,hospital_id,period_id,variable,value
0,19,23,0,Patct_Rank,42
1,19,112,0,Patct_Rank,63
2,19,219,0,Patct_Rank,9
3,19,221,0,Patct_Rank,9
4,19,222,0,Patct_Rank,9
...,...,...,...,...,...
103,19,222,0,safety_wt_score,0.145131
104,19,356,0,safety_wt_score,0.263
105,19,471,0,safety_wt_score,0.145131
106,19,472,0,safety_wt_score,0.145131


In [18]:
# Add necessary ids:  domain_id and measure_value_id

# if value is a top-decile domain rank, use measure_value_id = 7 #Top Decile Target Domain Ranking
# if value is a top-decile domain score, use measure_value_id = 11 #Top Decile Domain Aggregate % of Overall

# conditionally create measure_value_id column
long_joined_df['measure_value_id'] = np.where(long_joined_df['variable'].str.lower().str.contains('_rank'), 7, 11)


In [115]:
# conditionally create domain_id column

'''
1 Effectiveness
2 Efficiency
3 Equity
4 Mortality
5 Outpatient
6 Patient Centeredness
7 Safety
'''

domain_conditions = [
    (long_joined_df['variable'].str.lower().str.contains('effect_') == True),
    (long_joined_df['variable'].str.lower().str.contains('eff_') == True),
    (long_joined_df['variable'].str.lower().str.contains('eq_') == True),
    (long_joined_df['variable'].str.lower().str.contains('mort_') == True),
    (long_joined_df['variable'].str.lower().str.contains('patct_') == True),
    (long_joined_df['variable'].str.lower().str.contains('safety_') == True)]


domain_values = [1,2,3,4,6,7]

long_joined_df['domain_id'] = np.select(domain_conditions, domain_values)

In [19]:
#remove Valley West rows with NULL value

long_joined_df = long_joined_df[~((long_joined_df['hospital_id'] == 356) & (long_joined_df['value'].isna()))]

In [20]:
#long_joined_df.to_csv('test.csv')

In [21]:
long_joined_df.to_csv('test.csv')

In [22]:
long_joined_df

Unnamed: 0,calc_id,hospital_id,period_id,variable,value,measure_value_id
0,19,23,0,Patct_Rank,42,7
1,19,112,0,Patct_Rank,63,7
2,19,219,0,Patct_Rank,9,7
3,19,221,0,Patct_Rank,9,7
4,19,222,0,Patct_Rank,9,7
...,...,...,...,...,...,...
103,19,222,0,safety_wt_score,0.145131,11
104,19,356,0,safety_wt_score,0.263,11
105,19,471,0,safety_wt_score,0.145131,11
106,19,472,0,safety_wt_score,0.145131,11


In [123]:
def insert_top_decile_domain_ranks_from_df(df):
    # connect to the NM_Analytics database
    conn = pyodbc.connect('Driver={SQL Server};'
                          'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                          'Database=NM_Analytics_Prototype;'
                          'Trusted_Connection=yes;')

    cursor = conn.cursor()
    # iterate over df rows and insert into NM_Analytics_Prototype.vizient_qa.datadump
    for index, row in df.iterrows():
        if row['value'] != 'Missing':
            cursor.execute(
                "INSERT INTO NM_Analytics_Prototype.vizient_qa.calc_domain_values([calc_id],[hospital_id],[domain_id],[period_id],[measure_value_id],[domain_value]) values (?,?,?,?,?,?)",
                row['calc_id'], row['hospital_id'],row['domain_id'], row['period_id'],row['measure_value_id'],row['value'])
            conn.commit()
            

    cursor.close()
    conn.close()
    print('done inserting rows.')

In [124]:
insert_top_decile_domain_ranks_from_df(long_joined_df)

done inserting rows.
