In [97]:
import pandas as pd
import pyodbc
import openpyxl
import os
import re
import numpy as np
import math
import requests

### This is a helper file to create the hyperlink excel files found in 230 Inpatient Quality Composite/data/hyperlink data.  
### These hyperlink files are used to create a lookup dictionary for the python script to iterate over and call each report template.
### We use the original calculators as input then ouput a raw excel file for the python bot to loop over.

### NOTE:  ATTENTION:  There are a few errors on the TC worksheet that need to be manually fixed each time.  
    *  DCOST Gen Med:  Missing hyperlink on all.
    *  DCOST Gen Surg: Wrong hyperlink on all.  They provide the gen med template instead of gen surg.
    *  DCOST OB/GYN:   Sometimes missing hyperlink
    *  DCOST O/E Trauma:  Wrong hyperlink on all.  They provide the OB/GYN template instead of trauma.

In [98]:
#takes a file path and file name as input and returns an openpyxl workbook object.
def get_wb_object(path_obj,file_obj):
    file_loc = os.path.abspath(os.path.join(path_obj,file_obj))
    wb = openpyxl.load_workbook(file_loc, data_only = True)
    return(wb)

In [99]:
#takes a file path and file name as input and returns a list of all worksheets within an openpyxl workbook object
def grab_worksheet_list(path_obj,file_obj):
    file_loc = os.path.abspath(os.path.join(path_obj,file_obj))
    wb = openpyxl.load_workbook(file_loc, data_only = True)
    sheet_list = wb.sheetnames
    return(sheet_list)

In [100]:
#does the same as above but takes a full file path + filename as input.
def get_wb_object2(path_and_file):
    #file_loc = os.path.abspath(os.path.join(path_obj,file_obj))
    wb = openpyxl.load_workbook(path_and_file, data_only = True)
    return(wb)

In [101]:
#does the same as above but takes a full file path + filename as input.
def grab_worksheet_list2(path_and_file):
    #file_loc = os.path.abspath(os.path.join(path_obj,file_obj))
    wb = openpyxl.load_workbook(path_and_file, data_only = True)
    sheet_list = wb.sheetnames
    return(sheet_list)

In [102]:
calc_path = r'P:\Datastore02\Analytics\230 Inpatient Quality Composite\data\calculator data\original\2023\period3_original_with_proxies'
calc_file = r'QACalculator_140211_Period3_2023.xlsm'

In [103]:
# Columns And Order Needed in the final dataset:
'''
Hospital
Keyword/Metric
Mean
SD
Transformation
metric_direction
shift_value
Formal Name
Hyperlink
JobStoreID
ReportID
AdjustmentModel
AHRQ Version
Domain
'''


'\nHospital\nKeyword/Metric\nMean\nSD\nTransformation\nmetric_direction\nshift_value\nFormal Name\nHyperlink\nJobStoreID\nReportID\nAdjustmentModel\nAHRQ Version\nDomain\n'

#### Grab the excel workbook object into a variable ###

In [104]:
calc_wb = get_wb_object(calc_path,calc_file)

#### Grab the 'TC' worksheet into a pandas dataframe ####

In [105]:
calc_wb.sheetnames

['Calculator',
 'What If-Rank',
 'TC',
 'CurrentQA Cumulative-Metric',
 'CurrentQA Cumulative-Rank',
 'Metric Weights',
 'Percentile Distribution',
 'Data Periods']

In [106]:
# TC hyperlink column we want:
# 2019 Period 4:  Hyperlink from Peter
# 2020 Period 1:  Hyperlink from Peter
# 2020 Period 1 community:  Hyperlink from Peter
# 2020 Period 2:  Hyperlink from Peter
# 2020 Period 2 community:  Hyperlink from Peter
# 2021 Period 2:  Hyperlink from Peter
# 2021 Period 3:  Hyperlink from Peter

In [107]:
df = pd.DataFrame(calc_wb['TC'].values)

#### In openpyxl, the headers are read in as numbers.  The first index row is actually the header (if there is one).  Rename the headers using the first index row.  I'm probably forgetting some parameter here.  Will investigate later ###

In [108]:
df = df.rename(columns=df.iloc[0])

In [109]:
df = df.drop([0])

#### Remove all rows where the 'Keyword/Metric' column is null.  This will remove a lot of the noise in the dataset.  In the original worksheet, there are nulls.  There are zeros.  Reading in using python creates nulls.  Therefore, replace nulls with zeros to match the original excel worksheet. ####

In [110]:
df_all_row_indices =  df['Keyword/Metric'].notnull()
df = df[df_all_row_indices]

In [111]:
if 'Hyperlink from Peter' in list(df.columns):
    df = df.rename(columns={"Hyperlink from Peter": "Hyperlink"})

In [112]:
#replace None with 0
df.Hyperlink.fillna(value=0, inplace=True)

#### Create JobStoreID column using a substring of the url ####

In [113]:
#extract the string between 'JobStoreID=' and the last '&'.  Then split the result on '&' to create a list.
# element of the list should be the JobStoreID.
df['JobStoreID1'] = df.Hyperlink.str.extract('.*JobStoreId=(.*)\&.*')[0].str.split('&')

In [114]:
#Create placeholder column which will be conditionally updated withe the JobStoreID
df['JobStoreID'] = 0

In [115]:
#Replace placeholder JobStoreID rows with the JobStoreID.  This is the first index of the split url.
for i,item in enumerate(df['JobStoreID1']):
    if type(item)  == list:
        df['JobStoreID'].iloc[i] = df['JobStoreID1'].iloc[i][0]
        #print(df['JobStoreID1'].iloc[i][0])
    else:
        #print(item)
        pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [116]:
#drop JobStoreID1 column because it is no longer required
df = df.drop(['JobStoreID1'],axis=1)

#### Do the same thing for ReportID

In [117]:
df['ReportID1'] = df.Hyperlink.str.extract('.*TemplateReportId=(.*)\&.*')[0].str.split('&')

In [118]:
#Create placeholder column which will be conditionally updated withe the ReportID
df['ReportID'] = 0

In [119]:
#Replace placeholder JobStoreID rows with the JobStoreID.  This is the first index of the split url.
for i,item in enumerate(df['ReportID1']):
    if type(item)  == list:
        df['ReportID'].iloc[i] = df['ReportID1'].iloc[i][0]
    else:
        pass

In [120]:
#drop ReportID1 column because it is no longer required
df = df.drop(['ReportID1'],axis=1)

In [121]:
list(df.columns)

['Keyword/Metric',
 'Mean',
 'SD',
 'Transformation',
 't7',
 't6',
 't5',
 't4',
 't3',
 't2',
 't1',
 'metric_direction',
 'P5',
 'P10',
 'P15',
 'P20',
 'P25',
 'P30',
 'P35',
 'P40',
 'P45',
 'P50',
 'P55',
 'P60',
 'P65',
 'P70',
 'P75',
 'P80',
 'P85',
 'P90',
 'P95',
 'shift_value',
 'Formal Name',
 'Hyperlink Final',
 'Hyperlink',
 'Medicare ID = ',
 '140211',
 'JobStoreID',
 'ReportID']

In [122]:
df['Formal Name'].unique()

array(['Cleanliness/Quietness', 'DCOST O/E - Cardiology',
       'DCOST O/E - CT Surgery', 'DCOST O/E - Gastroenterology',
       'DCOST O/E - Medicine General', 'DCOST O/E - Neurology',
       'DCOST O/E - Neurosurgery', 'DCOST O/E - OB/GYN',
       'DCOST O/E - Oncology', 'DCOST O/E - Ortho/Spine',
       'DCOST O/E - Pulmonary/Critical Care',
       'DCOST O/E - Surgery General', 'DCOST O/E - Trauma',
       'DCOST O/E - Urology', 'DCOST O/E - Vascular Surgery', 'Discharge',
       'Doctor', 'Excess Days  - Cardiology', 'Excess Days - CT Surgery',
       'Excess Days - Gastroenterology', 'Excess Days - Medicine General',
       'Excess Days - Neurology', 'Excess Days - Neurosurgery',
       'Excess Days - Oncology', 'Excess Days - Ortho/Spine',
       'Excess Days - Pulmonary/Critical Care',
       'Excess Days - Surgery General', 'Excess Days - Trauma',
       'Excess Days - Vascular Surgery', 'Hypoglycemia in insulin use',
       'LOS O/E - Cardiology', 'LOS O/E - CT Surgery',
   

In [123]:
#query the db to vizient qa
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                     'Trusted_Connection=yes;')

In [124]:
all_domains = '''
          SELECT
         distinct
         m.measure_name as [Formal Name]
         --,d.domain_id
         ,d.domain_nm as Domain
         FROM
         NM_Analytics_Prototype.vizient_qa.measure as m
         join NM_Analytics_Prototype.vizient_qa.domain as d
         on m.domain_id = d.domain_id
        '''

#read query results into pandas dataframe
meas_domain_df = pd.DataFrame(pd.read_sql(all_domains,conn))
#close the db connection
conn.close()


#### left join the domain data to the main dataframe

In [125]:
df = df.merge(meas_domain_df, on='Formal Name', how='left')

In [126]:
df.head()

Unnamed: 0,Keyword/Metric,Mean,SD,Transformation,t7,t6,t5,t4,t3,t2,...,P95,shift_value,Formal Name,Hyperlink Final,Hyperlink,Medicare ID =,140211,JobStoreID,ReportID,Domain
0,CLEANQUIET,62.9327412163368,6.42654633351189,1,82.2124,75.7858,69.3593,62.9327,56.5062,50.0796,...,72.9683,0,Cleanliness/Quietness,,,,,0,0,Patient Centeredness
1,DCOST_CARD,1.13007289239593,0.27419450607155,2,1.95266,1.67846,1.40427,1.13007,0.855878,0.581684,...,1.66148,0,DCOST O/E - Cardiology,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14995,3110,Efficiency
2,DCOST_CT,1.12317599395296,0.25236747031731,2,1.88028,1.62791,1.37554,1.12318,0.870809,0.618441,...,1.51745,0,DCOST O/E - CT Surgery,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14996,3111,Efficiency
3,DCOST_GASTRO,1.12441642891766,0.28940466592906,2,1.99263,1.70323,1.41382,1.12442,0.835012,0.545607,...,1.71865,0,DCOST O/E - Gastroenterology,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14997,3112,Efficiency
4,DCOST_MED,1.10650013964381,0.28151527213975,2,1.95105,1.66953,1.38802,1.1065,0.824985,0.54347,...,1.65205,0,DCOST O/E - Medicine General,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14998,3113,Efficiency


#### Fill in the hospital name column

In [127]:
calc_file

'QACalculator_140211_Period3_2023.xlsm'

In [128]:
if 'Critical_Access' in calc_file:
    hospital_query = '''

     SELECT
     hospital_medicare_id + ' ' + hospital_name as Hospital
     FROM
     NM_Analytics_Prototype.vizient_qa.hospitals
     where
     hospital_medicare_id <> '0'
     and
     hospital_medicare_id = %s
    ''' % calc_file.split('_')[3]    
else:
    hospital_query = '''

     SELECT
     hospital_medicare_id + ' ' + hospital_name as Hospital
     FROM
     NM_Analytics_Prototype.vizient_qa.hospitals
     where
     hospital_medicare_id <> '0'
     and
     hospital_medicare_id = %s
    ''' % calc_file.split('_')[1]


#query the db to vizient qa
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                     'Trusted_Connection=yes;')

#read query results into pandas dataframe
hosp_df = pd.DataFrame(pd.read_sql(hospital_query,conn))
#close the db connection
conn.close()

In [129]:
hosp_df['Hospital'][0]

'140211 NORTHWESTERN_DELNOR'

In [130]:
df['Hospital'] = hosp_df['Hospital'][0]

#### Fill in the AdjustmentModel column

In [131]:
calc_file

'QACalculator_140211_Period3_2023.xlsm'

In [132]:
df.head()

Unnamed: 0,Keyword/Metric,Mean,SD,Transformation,t7,t6,t5,t4,t3,t2,...,shift_value,Formal Name,Hyperlink Final,Hyperlink,Medicare ID =,140211,JobStoreID,ReportID,Domain,Hospital
0,CLEANQUIET,62.9327412163368,6.42654633351189,1,82.2124,75.7858,69.3593,62.9327,56.5062,50.0796,...,0,Cleanliness/Quietness,,,,,0,0,Patient Centeredness,140211 NORTHWESTERN_DELNOR
1,DCOST_CARD,1.13007289239593,0.27419450607155,2,1.95266,1.67846,1.40427,1.13007,0.855878,0.581684,...,0,DCOST O/E - Cardiology,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14995,3110,Efficiency,140211 NORTHWESTERN_DELNOR
2,DCOST_CT,1.12317599395296,0.25236747031731,2,1.88028,1.62791,1.37554,1.12318,0.870809,0.618441,...,0,DCOST O/E - CT Surgery,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14996,3111,Efficiency,140211 NORTHWESTERN_DELNOR
3,DCOST_GASTRO,1.12441642891766,0.28940466592906,2,1.99263,1.70323,1.41382,1.12442,0.835012,0.545607,...,0,DCOST O/E - Gastroenterology,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14997,3112,Efficiency,140211 NORTHWESTERN_DELNOR
4,DCOST_MED,1.10650013964381,0.28151527213975,2,1.95105,1.66953,1.38802,1.1065,0.824985,0.54347,...,0,DCOST O/E - Medicine General,https://cdprm.vizientinc.com/CDPrm/web/reports...,https://cdprm.vizientinc.com/CDPrm/web/reports...,&medicareid=140211,https://cdprm.vizientinc.com/CDPrm/web/reports...,14998,3113,Efficiency,140211 NORTHWESTERN_DELNOR


In [133]:
#assigning the adjustment model variable
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2019.xlsm' in calc_file):
    adjustmentModel = '2018 Risk Model (AMC)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW'] and ('_2019.xlsm' in calc_file):
    adjustmentModel = '2018 Risk Model (Community)'
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2020.xlsm' in calc_file):
    adjustmentModel = '2019 Risk Model (AMC)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW'] and ('_2020.xlsm' in calc_file):
    adjustmentModel = '2019 Risk Model (Community)'
    
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2021.xlsm' in calc_file):
    adjustmentModel = '2020 Risk Model (AMC)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW','141340 Valley West Community Hospital','149916 NORTHWESTERN_HUNTLEY','140116 NORTHWESTERN_MCHENRY'] and ('_2021.xlsm' in calc_file):
    adjustmentModel = '2020 Risk Model (Community) '
    
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2022.xlsm' in calc_file):
    adjustmentModel = '2021 Risk Model (AMC)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW','141340 Valley West Community Hospital','149916 NORTHWESTERN_HUNTLEY','140116 NORTHWESTERN_MCHENRY', '140062 NORTHWESTERN_PALOS','141340 VALLEY_WEST_COMMUNITY_HOSPITAL'] and ('_2022.xlsm' in calc_file):
    adjustmentModel = '2021 Risk Model (Community) '
    
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH','140062 NORTHWESTERN_PALOS']) and ('_2023.xlsm' in calc_file):
    adjustmentModel = '2022 Risk Model (AMC)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW','141340 Valley West Community Hospital','149916 NORTHWESTERN_HUNTLEY','140116 NORTHWESTERN_MCHENRY' ,'141340 VALLEY_WEST_COMMUNITY_HOSPITAL'] and ('_2023.xlsm' in calc_file):
    adjustmentModel = '2022 Risk Model (Community)'

In [134]:
#assigning ahrq version variable
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2019.xlsm' in calc_file):
    ahrq_version = '8.0 (CMS Safety)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW'] and ('_2019.xlsm' in calc_file):
    ahrq_version = '8.0 (CMS Safety)'
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2020.xlsm' in calc_file):
    ahrq_version = 'V2019 (Pediatric) / V2019 (Quality) / V2019 (Safety)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 NORTHWESTERN_VALLEYW'] and ('_2020.xlsm' in calc_file):
    ahrq_version = 'V2019 (Pediatric) / V2019 (Quality) / V2019 (Safety)'
    
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2021.xlsm' in calc_file):
    ahrq_version = 'V2020 (Pediatric) / V2020 (Quality) / V2020 (Safety)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 Valley West Community Hospital','141340 NORTHWESTERN_VALLEYW','149916 NORTHWESTERN_HUNTLEY','140116 NORTHWESTERN_MCHENRY'] and ('_2021.xlsm' in calc_file):
    ahrq_version = 'V2020 (Pediatric) / V2020 (Quality) / V2020 (Safety)'
    

if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH']) and ('_2022.xlsm' in calc_file):
    ahrq_version = 'V2021 (Pediatric) / V2021 (Quality) / V2021 (Safety)'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 Valley West Community Hospital','141340 NORTHWESTERN_VALLEYW','149916 NORTHWESTERN_HUNTLEY','140116 NORTHWESTERN_MCHENRY','140062 NORTHWESTERN_PALOS','141340 VALLEY_WEST_COMMUNITY_HOSPITAL'] and ('_2022.xlsm' in calc_file):
    ahrq_version = 'V2021 (Pediatric) / V2021 (Quality) / V2021 (Safety)' 
    
if (hosp_df['Hospital'][0] in ['140281 NORTHWESTERN_MEMORIAL','140242 NORTHWESTERN_CDH','140062 NORTHWESTERN_PALOS']) and ('_2023.xlsm' in calc_file):
    ahrq_version = 'V2022'
elif hosp_df['Hospital'][0] in ['140130 NORTHWESTERN_LAKEFOREST','140211 NORTHWESTERN_DELNOR','140286 NORTHWESTERN_KISH','141340 Valley West Community Hospital','141340 NORTHWESTERN_VALLEYW','149916 NORTHWESTERN_HUNTLEY','140116 NORTHWESTERN_MCHENRY','141340 VALLEY_WEST_COMMUNITY_HOSPITAL'] and ('_2023.xlsm' in calc_file):
    ahrq_version = 'V2022' 

In [135]:
df['AdjustmentModel'] = adjustmentModel

In [136]:
df['AHRQ Version'] = ahrq_version

In [137]:
df = df[['Hospital','Keyword/Metric','Mean','SD','Transformation','metric_direction','shift_value','Formal Name','Hyperlink','JobStoreID','ReportID','AdjustmentModel','AHRQ Version','Domain']]

### Write the output to excel file

In [138]:
df.head()

Unnamed: 0,Hospital,Keyword/Metric,Mean,SD,Transformation,metric_direction,shift_value,Formal Name,Hyperlink,JobStoreID,ReportID,AdjustmentModel,AHRQ Version,Domain
0,140211 NORTHWESTERN_DELNOR,CLEANQUIET,62.9327412163368,6.42654633351189,1,Higher,0,Cleanliness/Quietness,,0,0,2022 Risk Model (Community),V2022,Patient Centeredness
1,140211 NORTHWESTERN_DELNOR,DCOST_CARD,1.13007289239593,0.27419450607155,2,Lower,0,DCOST O/E - Cardiology,https://cdprm.vizientinc.com/CDPrm/web/reports...,14995,3110,2022 Risk Model (Community),V2022,Efficiency
2,140211 NORTHWESTERN_DELNOR,DCOST_CT,1.12317599395296,0.25236747031731,2,Lower,0,DCOST O/E - CT Surgery,https://cdprm.vizientinc.com/CDPrm/web/reports...,14996,3111,2022 Risk Model (Community),V2022,Efficiency
3,140211 NORTHWESTERN_DELNOR,DCOST_GASTRO,1.12441642891766,0.28940466592906,2,Lower,0,DCOST O/E - Gastroenterology,https://cdprm.vizientinc.com/CDPrm/web/reports...,14997,3112,2022 Risk Model (Community),V2022,Efficiency
4,140211 NORTHWESTERN_DELNOR,DCOST_MED,1.10650013964381,0.28151527213975,2,Lower,0,DCOST O/E - Medicine General,https://cdprm.vizientinc.com/CDPrm/web/reports...,14998,3113,2022 Risk Model (Community),V2022,Efficiency


In [139]:
#drop all rows that do not have at least 6 columns populated.  There are 6 columns that will definitely be populated,
#so we want at least 7
df = df.dropna(thresh=7)

In [140]:
calc_file

'QACalculator_140211_Period3_2023.xlsm'

In [141]:
#set the file path
output_file_path = r'P:\Datastore02\Analytics\230 Inpatient Quality Composite\data\hyperlink data\2023\period3_template_hyperlinks'
#set the file name
if '140281' in calc_file:
    output_file_name = 'nmh_links.xlsx'
elif '140242' in calc_file:
    output_file_name = 'cdh_links.xlsx'
elif '140130' in calc_file:
    output_file_name = 'lfh_links.xlsx'
elif '140211' in calc_file:
    output_file_name = 'dch_links.xlsx'
elif '140286' in calc_file:
    output_file_name = 'kish_links.xlsx'
elif '141340' in calc_file:
    output_file_name = 'vwh_links.xlsx'
elif '149916' in calc_file:
    output_file_name = 'hh_links.xlsx'
elif '140116' in calc_file:
    output_file_name = 'mch_links.xlsx'
elif '140062' in calc_file:
    output_file_name = 'palos_links.xlsx'
#output_file_name = r'mch_links.xlsx'
file_loc_output = os.path.abspath(os.path.join(output_file_path,output_file_name))

In [142]:
file_loc_output

'P:\\Datastore02\\Analytics\\230 Inpatient Quality Composite\\data\\hyperlink data\\2023\\period3_template_hyperlinks\\dch_links.xlsx'

In [143]:
#remove link to report express since it is not helpful to our CDB scraper.
df.loc[df["Hyperlink"] == "https://cdprm.vizientinc.com/CDPrm/web/reports/ReportExpress.aspx", "Hyperlink"] = None

In [144]:
#write to excel file
df.to_excel(file_loc_output,sheet_name='Sheet1',index_label=False,index=False)

### Community Cohort Section:  This should be the same as the Complex Care Medical Center cohort.  Just change the hospital name to a community hospital so it will create a distinct lookup dictionary

In [84]:
calc_file

'CriticalAccessQACalculator_141340_Period1_2023.xlsm'

In [488]:
# read in Lake forest hyperlink file.
filepath = r'P:\Datastore02\Analytics\230 Inpatient Quality Composite\data\hyperlink data\2023\period1_template_hyperlinks'


    
#filename = 'mch_links.xlsx'
comm_df = pd.read_excel(os.path.join(filepath,filename),sheets = 'Sheet1')

NameError: name 'filename' is not defined

In [None]:
comm_df.head(n=3)

In [None]:
comm_df['Hospital'] = '149810 NORTHSHORE_GLENVIEW'

### Output the community cohort file

In [85]:
output_file_name = r'vwh_links.xlsx'
file_loc_output = os.path.abspath(os.path.join(output_file_path,output_file_name))
#write to excel file
comm_df.to_excel(file_loc_output,sheet_name='Sheet1',index_label=False,index=False)

NameError: name 'comm_df' is not defined