In [1]:
import numpy as np
import pandas as pd
import scipy
import scipy.special
import openpyxl
import os
import scipy.stats as stats
import pyodbc
from datetime import datetime, timedelta
from collections import OrderedDict

##UPDATE LOG

##UL001  rbeyer  01.20.2021  We found a missing logic branch.  If denominator meets threshold AND both strata have 0 numerator, then PASS. 
##UL002  rbeyer  04.23.2021  Add dischargemonth to output file.
##UL003  rbeyer  10.28.2021  NULLS introduced to timelapse, bnpchange, and hgbchange columns.  Need to fillna with 0.

### Set Filepath for Equity Domain Data folder

In [2]:
filepath2 = r'P:\Datastore02\Analytics\230 Inpatient Quality Composite\data\Equity Data\fy23\june_mb_test'

### Get list of all NM hospital medicare IDs to filter the dataset with

In [3]:
calculator_id = 15

# query the db to get period id
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                      'Trusted_Connection=yes;')
nm_medicare_ids = """
SELECT
h.hospital_medicare_id
,c.cohort_name
FROM
NM_Analytics_Prototype.vizient_qa.hospitals as h
join NM_Analytics_Prototype.vizient_qa.calc_hospital_cohort as chc
on chc.hospital_id = h.hospital_id
join NM_Analytics_Prototype.vizient_qa.cohort as c
on c.cohort_id = chc.hospital_cohort_id
where
left(h.hospital_name, 12) = 'NORTHWESTERN'
AND
chc.calc_id = %s
AND
c.cohort_name in ('Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center','Complex Care Medical Center','Community')
""" % calculator_id

query_results = pd.DataFrame(pd.read_sql(nm_medicare_ids, conn))

conn.close()
nm_medicare_id_list = list(query_results.set_index('hospital_medicare_id').to_dict()['cohort_name'].keys())
#convert medicare id strings to integers because the csv file lists them in integer format.
nm_medicare_id_ints = [int(i) for i in nm_medicare_id_list if i != 140130]

### Query the database and get the current IQC Period ID based on last month's end date

In [4]:
period_end_dts = '06-30-2023 23:59:59'
period_type = 'NM_FSCL_YTD'

# query the db to get period id
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                      'Trusted_Connection=yes;')
period_ids = """
select
period_id
FROM
NM_Analytics_Prototype.vizient_qa.periods as p
join NM_Analytics_Prototype.vizient_qa.period_types as pt
on pt.period_type_id = p.period_type
where
p.period_end_date = '%s'
AND
pt.period_type_nm = '%s'
""" % (period_end_dts,period_type)

period_query_results = pd.DataFrame(pd.read_sql(period_ids, conn))['period_id'][0]

conn.close()

In [5]:
period_query_results

7514

### Create a dictionary of hospital cohorts and hospital medicare IDS

In [6]:
hospital_cohort_dict = query_results.set_index('hospital_medicare_id').to_dict()['cohort_name']

In [7]:
#hospital_cohort_dict.pop('140062')

In [8]:
hospital_cohort_dict

{'140281': 'Comprehensive Academic Medical Center',
 '140242': 'Large Specialized Complex Care Medical Center',
 '140130': 'Complex Care Medical Center',
 '140211': 'Complex Care Medical Center',
 '140286': 'Complex Care Medical Center',
 '149916': 'Complex Care Medical Center',
 '140116': 'Complex Care Medical Center',
 '140062': 'Complex Care Medical Center'}

In [9]:
nm_medicare_id_ints

[140281, 140242, 140130, 140211, 140286, 149916, 140116, 140062]

In [10]:
#nm_medicare_id_ints = [140062]
#hospital_cohort_dict = {'140062': 'Large Specialized Complex Care Medical Center'}

In [11]:
#nm_medicare_id_ints.remove(140062)

In [12]:
#nm_medicare_id_ints

### Set Parameters for Baseline calculator, Fiscal Year and Period End Dts

In [13]:
calc_nm_input = '2022 Q&A calculator Period 3'
period_type_input = 'NM_FSCL_YTD'
period_end_dts_input = '06/30/2023 23:59:59'

### Get list of discharge months to filter the dataset

In [14]:
start_dts = input('Input the start discharge month (using format yyyy-mm-dd)')

Input the start discharge month (using format yyyy-mm-dd)2022-08-01


In [15]:
end_dts = input('Input the end discharge month (using format yyyy-mm-dd)')

Input the end discharge month (using format yyyy-mm-dd)2023-05-01


In [16]:
keep_discharge_months = pd.date_range(start_dts,end_dts, freq='MS').strftime("%Y%m").tolist()

In [17]:
keep_discharge_months

['202208',
 '202209',
 '202210',
 '202211',
 '202212',
 '202301',
 '202302',
 '202303',
 '202304',
 '202305']

### Set the Low Volume (LV) threshold variables based how many quarters in the fiscal year we have passed

In [18]:
if len(keep_discharge_months) <= 3:
    quarters = 1
    amc_lv_cutoff = 5
    comm_lv_cutoff = 5
elif len(keep_discharge_months) > 3 and len(keep_discharge_months) <= 6:
    quarters = 2
    amc_lv_cutoff = 10
    comm_lv_cutoff = 15
elif len(keep_discharge_months) > 6 and len(keep_discharge_months) <= 9:
    quarters = 3
    amc_lv_cutoff = 15
    comm_lv_cutoff = 20
elif len(keep_discharge_months) >9:
    quarters = 4
    amc_lv_cutoff = 25
    comm_lv_cutoff = 25

In [19]:
print(quarters)
print(amc_lv_cutoff)
print(comm_lv_cutoff)

4
25
25


### Create a dataframe of all Equity Domain measures, IDs and domain name

In [20]:
# query the db to get equity measure ids for later join
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                      'Trusted_Connection=yes;')
equity_meas = """
select
distinct
m.measure_id
,m.measure_name
,d.domain_nm
from
NM_Analytics_Prototype.vizient_qa.measure as m
join NM_Analytics_Prototype.vizient_qa.domain as d
on d.domain_id = m.domain_id
join NM_Analytics_Prototype.vizient_qa.calc_measure_values as cmv
on cmv.measure_id = m.measure_id
where
d.domain_nm = 'Equity'
and
left(m.measure_name, 3) <> 'ED-'
AND
cmv.calc_id = %s
""" % calculator_id

equity_measure_df = pd.DataFrame(pd.read_sql(equity_meas, conn))

conn.close()

### Query the database to get current hospital cohort assignments

In [21]:
# query the db to get equity measure ids for later join
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                      'Database=clarity;'
                      'Trusted_Connection=yes;')
hosp_query = """
select
h.hospital_id
,h.hospital_medicare_id
FROM
NM_Analytics_Prototype.vizient_qa.hospitals as h
join NM_Analytics_Prototype.vizient_qa.calc_hospital_cohort as chc
on chc.hospital_id = h.hospital_id
WHERE
chc.calc_id = %s
""" % calculator_id

hospital_df = pd.DataFrame(pd.read_sql(hosp_query, conn))

conn.close()

In [22]:
hospital_df

Unnamed: 0,hospital_id,hospital_medicare_id
0,0,0
1,1,010033
2,2,030064
3,3,040016
4,4,050025
...,...,...
768,803,170109
769,804,251313
770,805,251315
771,806,251331


In [23]:
hospital_cohort_dict

{'140281': 'Comprehensive Academic Medical Center',
 '140242': 'Large Specialized Complex Care Medical Center',
 '140130': 'Complex Care Medical Center',
 '140211': 'Complex Care Medical Center',
 '140286': 'Complex Care Medical Center',
 '149916': 'Complex Care Medical Center',
 '140116': 'Complex Care Medical Center',
 '140062': 'Complex Care Medical Center'}

In [24]:
quarters

4

### Create a helper dictionary to control how the below python code filters and processes each individual measure.  Each measure uses different columns and each cohort risk model also uses different outcome variables

In [25]:
file_type_dict = {'TROPONIN': ['timelapse','timelapse_group','<=30','>30','deathflag','expecteddead_14','expecteddead_114'],
                  'SEPSIS': ['timelapse','timelapse_group','<=30','>30','deathflag','expecteddead_14','expecteddead_114'],
                 'MB': ['HGBChange','HGBChange_group','<=1', '>1','Transflag'],
                  'CHF': ['BNPChange','BNPChange_group','<=0', '>0','deathflag','expecteddead_14','expecteddead_114']
                 }

### UL001 

In [26]:
#UL001 V2

# Updated processing logic.  Need to make the following changes:
# 1.  If both strata meet the cases number threshold and both have 0 in numerator, then we get a 'PASS'
# 2.  Update logic to first check threshold THEN check to see if both numerators are 0 THEN check for only 1 adverse case.
# 3.  NEED TO FIX MB logic.  Add in MB processing logic step so now things break.



#assign lv case cutoffs for each quarter type.  As time is addded, the lv cutoff grows.
if len(keep_discharge_months) <= 3:
    quarters = 1
    amc_lv_cutoff = 5
    comm_lv_cutoff = 5
elif len(keep_discharge_months) > 3 and len(keep_discharge_months) <= 6:
    quarters = 2
    amc_lv_cutoff = 15
    comm_lv_cutoff = 10
elif len(keep_discharge_months) > 6 and len(keep_discharge_months) <= 9:
    quarters = 3
    amc_lv_cutoff = 20
    comm_lv_cutoff = 15
elif len(keep_discharge_months) >9:
    quarters = 4
    amc_lv_cutoff = 25
    comm_lv_cutoff = 25

result_list = []    
    
for i,item in enumerate(os.listdir(filepath2)):
    if item.endswith('.zip') == False:
        phi_list = [i for i in os.listdir(os.path.join(filepath2,item)) if '_PHI' in i]
        df = pd.read_csv(os.path.join(os.path.abspath(filepath2),item,phi_list[0]))
        
        #get file type from the name of the file
        file_type = phi_list[0].split('_')[0].replace(' ','').upper()
        
        #based on file type, assign process and outcome measure variables for later measure id lookups.
        if file_type == 'TROPONIN':
            process = 'N-STEMI Troponin Timing'
            outcome = 'N-STEMI Mortality O/E'
        elif file_type == 'SEPSIS':
            process = 'Sepsis Lactate Timing'
            outcome = 'Sepsis Mortality O/E'
        elif file_type == 'MB':
            process = 'Maternal Hemoglobin Change'
            outcome = 'Maternal Tranfusion Rate'
        elif file_type == 'CHF':
            process = 'HF BNP Improvement'
            outcome = 'HF Mortality O/E'
        
        
        print(file_type)
        print(process)
        print(outcome)
        
        '''
        if phi_list[0].upper().startswith('TROPO') == True:
            file_type = 'troponin'
        elif phi_list[0].startswith('MB') == True:
            file_type = 'mb'
        elif phi_list[0].upper().startswith('SEP') == True:
            file_type = 'sepsis'
        elif phi_list[0].startswith('CHF') == True:
            file_type = 'bnp'
        '''
        #use the above dictionary to conditionally filter the dataset with the correct column
        #and conditionally create a new column
        df = df.sort_values(file_type_dict[file_type][0], ascending=False).drop_duplicates('medrecnum').sort_index()
        #only keep records with a discharge month within the range of time you want.
        df = df[df['dischargemonth'].isin(keep_discharge_months)]
        #create categorical grouper column
        if file_type in ['TROPONIN','SEPSIS']:
            #UL003 fillna in process column with 0
            df[file_type_dict[file_type][0]].fillna(0,inplace=True)
            df[file_type_dict[file_type][1]] = np.where(df[file_type_dict[file_type][0]]<=30, '<=30', '>30')
        elif file_type == 'CHF':
            #UL003 fillna in process column with 0
            df[file_type_dict[file_type][0]].fillna(0,inplace=True)
            df[file_type_dict[file_type][1]] = np.where(df[file_type_dict[file_type][0]]<=0, '<=0', '>0')
            
        elif file_type == 'MB':
            #UL003 fillna in process column with 0
            df[file_type_dict[file_type][0]].fillna(0,inplace=True)
            df[file_type_dict[file_type][1]] = np.where(df[file_type_dict[file_type][0]]<=1, '<=1', '>1')
        
        #iterate over medicare ids and filter the datasets to only 1 hospital at a time
        #further clean the dataset depending on the hospital cohort type and metric type
        for i,medicare_item in enumerate(nm_medicare_id_ints):
            
                
            #print(hospital_cohort_dict[str(medicare_item)])
            #For outcome measures, further filter the columns based on the risk model type:  AMC vs. Community and also measure type 
            #ul002 - adding discharge month to validation script output.
            if file_type in ['TROPONIN','SEPSIS','CHF']:
                if hospital_cohort_dict[str(medicare_item)] in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']:
                    #use dict index 5 (AMC)
                    df_base = df[['medicareid','medrecnum',file_type_dict[file_type][0],file_type_dict[file_type][4],file_type_dict[file_type][5],'Sex','Race','Ses',file_type_dict[file_type][1],'dischargemonth']]
                else:
                    #use dict index 6 (Community)
                    df_base = df[['medicareid','medrecnum',file_type_dict[file_type][0],file_type_dict[file_type][4],file_type_dict[file_type][6],'Sex','Race','Ses',file_type_dict[file_type][1],'dischargemonth']]
            else:
                #if MB, then just take the 'Transflag' column for outcome measure for both AMC & Community
                df_base = df[['medicareid','medrecnum',file_type_dict[file_type][0],file_type_dict[file_type][4],'Sex','Race','Ses',file_type_dict[file_type][1],'dischargemonth']]

            #filter down to each hospital
            df_filtered = df_base[df_base['medicareid'] == medicare_item]


            #if a hospital is not present in the dataset, they get 'LV'.  Code for 'LV' is 3.
            #denominator and numerator will just be a placeholder value
            #df_filtered.to_csv('lfh_df_filtered'+'_'+file_type+'.csv')

            hospital_medicare_id = medicare_item
            print('shape:',df_filtered.shape[0])
            if df_filtered.shape[0]  == 0:
                print('Result!: Reason:  No data.',file_type)
                #print(item)
                #hospital_medicare_id = medicare_item
                #code for 'LV' value is 3 in the measure value table
                meas_value = 3.0
                meas_num = 0.0
                meas_num_event = 14 #Cases
                meas_denom = 0.0
                meas_denom_event = 14 #Cases

                #Because the entire hospital is missing from the dataset, we give all equity 
                #process/outcome measure types 'LV' so we need to process 'LV' for all these combinations.
                for i, item1 in enumerate([process,outcome]):
                    for j, item2 in enumerate(['Female','HighSES','LowSES','Male','Non-White','White']):
                        if file_type == 'MB' and item2 in ['Female','Male']:
                            pass
                        else:
                            meas_name = item1 + '-' + item2
                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                            #print(row_to_add)
                            result_list.append(row_to_add)

            
            #Calculator Process/Outcome Measure values.  
            #If the hospital medicare ID exists in the dataset, then
            #we proceed on to calculating outcome vs. process measures.
            else:
                df_filtered.to_csv(str(hospital_medicare_id)+'_'+file_type+'.csv')
                #We need 2 different branches of logic depending on outcome or process measures. 
                #What we do next depends on the measure type.
                #Basically, for each file type and medicare ID, we loop over it twice 
                #and do two different branches of logic (Process logic or Outcome logic).
                for i, measure_type in enumerate(['process','outcome']):
                    
                    
                    
                    
                    if measure_type == 'outcome':
                        print('outcome')
                        
                        '''
                        ==========================
                        START OUTCOME LOGIC BRANCH
                        ==========================
                        '''

                        #Outcome logic.
                        #Check for LV.
                        #
                        for i, stratatype in enumerate(['Sex','Race','Ses']):
                            #Maternal Bleeding only has one Sex in the data population so there is no 'Sex' equity strata.
                            if stratatype == 'Sex' and file_type =='MB':
                                pass

                            else:


                                #Create a crosstab of the stratatype vs. outcome column type (deathflag, transflag, etc.)
                                crosstab_df = pd.crosstab(df_filtered[stratatype], df_filtered[file_type_dict[file_type][4]])
                                
                                
                                #If we don't have both strata type to compare against each other,
                                #Then we just have LV.
                                
                                #UL001  Updating below logic.  It should not be either index or columns less than 2
                                #       We only care about if we are missing an entire strata (index rows)
                                
                                #if len(crosstab_df.index) < 2 or len(crosstab_df.columns) < 2:
                                if len(crosstab_df.index) < 2:
                                    print('Result!: Reason:  Do not have both strata type.',file_type)
                                    #hospital_medicare_id = medicare_item
                                    #code for 'LV' value is 3 in the measure value table
                                    meas_value = 3.0
                                    meas_num = 0.0
                                    meas_num_event = 14 #Cases
                                    meas_denom = 0.0
                                    meas_denom_event = 14 #Cases

                                    #If missing both strata for this process measure, 
                                    #Then we get LV for this process measure and strata type.

                                    if stratatype == 'Sex':
                                        for i, item1 in enumerate(['Female','Male']):
                                            meas_name = outcome + '-' + item1
                                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                            result_list.append(row_to_add)
                                    elif stratatype == 'Race':
                                        for i, item1 in enumerate(['Non-White','White']):
                                            meas_name = outcome + '-' + item1
                                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                            result_list.append(row_to_add)

                                    elif stratatype == 'Ses':
                                        for i, item1 in enumerate(['HighSES','LowSES']):
                                            meas_name = outcome + '-' + item1
                                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                            result_list.append(row_to_add)

                                #if the crosstab has more than 2 rows, 
                                #then check to make sure the volumes meet minimum standards.
                                else:
                                    
                                    #check to make sure each strata has at least the minimum cutoff for
                                    #the given number of quarters and risk model type.  If not, then 'LV.'
                                    #Basically, does this strata have enough rows?
                                    if (hospital_cohort_dict[str(medicare_item)] in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']) and (crosstab_df.sum(axis=1)[0] < amc_lv_cutoff or crosstab_df.sum(axis=1)[1] < amc_lv_cutoff):

                                        #hospital_medicare_id = medicare_item
                                        #code for 'LV' value is 3 in the measure value table
                                        meas_value = 3.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If not enough cases for the amount of time and risk model,
                                        #Then we get LV for this process measure and strata type.

                                        if stratatype == 'Sex':
                                            for i, item1 in enumerate(['Female','Male']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)
                                        elif stratatype == 'Race':
                                            for i, item1 in enumerate(['Non-White','White']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)

                                        elif stratatype == 'Ses':
                                            for i, item1 in enumerate(['HighSES','LowSES']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)

                                    elif (hospital_cohort_dict[str(medicare_item)] not in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']) and (crosstab_df.sum(axis=1)[0] < comm_lv_cutoff or crosstab_df.sum(axis=1)[1] < comm_lv_cutoff):
                                        print('Result!: Reason:  Did not meet LV cutoff',file_type)
                                        #hospital_medicare_id = medicare_item
                                        #code for 'LV' value is 3 in the measure value table
                                        meas_value = 3.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If not enough cases for the amount of time and risk model,
                                        #Then we get LV for this process measure and strata type.

                                        if stratatype == 'Sex':
                                            for i, item1 in enumerate(['Female','Male']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)
                                        elif stratatype == 'Race':
                                            for i, item1 in enumerate(['Non-White','White']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)

                                        elif stratatype == 'Ses':
                                            for i, item1 in enumerate(['HighSES','LowSES']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)
                                    
                                    #UL001  If we have both strata and the cases levels meet threshold, then we
                                    #       know we now need to check for both zero in numerator.  In the pandas
                                    #       crosstab, this will cause a blank column.  Therefore, there will only be
                                    #       one column.  Two index rows and one column = 'PASS' because the denominator
                                    #       meets threshold and we had zero adverse cases and both strata were treated
                                    #       the same.
                                    
                                    elif (len(crosstab_df.index) == 2 and len(crosstab_df.columns) == 1) and ((hospital_cohort_dict[str(medicare_item)] in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center'])):
                            
                                        print('Result PASS!: Reason:  Both strata with zero numerator!.',file_type)
                                        
                                        
                                        #get the denominator values
                                        #denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][5]].sum().reset_index()
                                        #UL001
                                        if file_type == 'MB':
                                            #MB outcome = sum(transflag)/count(transflag)
                                            #use .size()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].size().reset_index()
                                            
                                            
                                        else:
                                            #everything else is sum(deathflag)/sum(expecteddead_13 or expecteddead_113)
                                            #use .sum()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][5]].sum().reset_index()
                                        
                                        #collect the denominator values.
                                        #strata1_denom = denom_sum[file_type_dict[file_type][5]][0]
                                        #strata2_denom = denom_sum[file_type_dict[file_type][5]][1]
                                        #UL001
                                        if file_type == 'MB':
                                            #collect the denominator values.
                                            strata1_denom = denom_sum[file_type_dict[file_type][4]][0]
                                            strata2_denom = denom_sum[file_type_dict[file_type][4]][1]
                                        
                                        else:
                                            #collect the denominator values.
                                            strata1_denom = denom_sum[file_type_dict[file_type][5]][0]
                                            strata2_denom = denom_sum[file_type_dict[file_type][5]][1]
                                        
                                        
                                        #code for 'PASS' value is 2 in the measure value table
                                        meas_value = 2.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        #meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If missing both strata for this process measure, 
                                        #Then we get LV for this process measure and strata type.

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0]
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1]

                                        #create row list    
                                        row_to_add1 = [hospital_medicare_id,strata1_meas_name,meas_value,meas_num,meas_num_event,strata1_denom,meas_denom_event] 
                                        row_to_add2 = [hospital_medicare_id,strata2_meas_name,meas_value,meas_num,meas_num_event,strata2_denom,meas_denom_event] 
                                        #append row list to the main list of lists (dataframe)
                                        result_list.append(row_to_add1)
                                        result_list.append(row_to_add2)
                                        
                                    elif (len(crosstab_df.index) == 2 and len(crosstab_df.columns) == 1) and ((hospital_cohort_dict[str(medicare_item)] not in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center'])):
                            
                                        print('Result PASS!: Reason:  Both strata with zero numerator!.',file_type)
                                        
                                        #denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][6]].sum().reset_index()
                                        #UL001
                                        if file_type == 'MB':
                                            #MB outcome = sum(transflag)/count(transflag)
                                            #use .size()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].size().reset_index()
                                            
                                            
                                        else:
                                            #everything else is sum(deathflag)/sum(expecteddead_13 or expecteddead_113)
                                            #use .sum()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][6]].sum().reset_index()
                                        
                                        
                                        #collect the denominator values
                                        #strata1_denom = denom_sum[file_type_dict[file_type][6]][0]

                                        #strata2_denom = denom_sum[file_type_dict[file_type][6]][1]
                                        
                                        #UL001
                                        if file_type == 'MB':
                                            #collect the denominator values.
                                            strata1_denom = denom_sum[file_type_dict[file_type][4]][0]
                                            strata2_denom = denom_sum[file_type_dict[file_type][4]][1]
                                        
                                        else:
                                            #collect the denominator values.
                                            strata1_denom = denom_sum[file_type_dict[file_type][6]][0]
                                            strata2_denom = denom_sum[file_type_dict[file_type][6]][1]

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0]
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1]
                                        
                                        
                                        #code for 'PASS' value is 2 in the measure value table
                                        meas_value = 2.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        #meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If missing both strata for this process measure, 
                                        #Then we get LV for this process measure and strata type.

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0]
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1]

                                        #create row list    
                                        row_to_add1 = [hospital_medicare_id,strata1_meas_name,meas_value,meas_num,meas_num_event,strata1_denom,meas_denom_event] 
                                        row_to_add2 = [hospital_medicare_id,strata2_meas_name,meas_value,meas_num,meas_num_event,strata2_denom,meas_denom_event] 
                                        #append row list to the main list of lists (dataframe)
                                        result_list.append(row_to_add1)
                                        result_list.append(row_to_add2)

                                    
                                    #There needs to be at least 1 adverse case.  That means
                                    #every cell in the crosstab has to have at least total of 1.
                                    #If there are any zero cells, that means at least one strata does not meet
                                    #the minimum adverse case cutoff of 1.
                                    #print('printing crosstab...', file_type)
                                    #print(crosstab_df)

                                    elif crosstab_df[crosstab_df < 1].count().sum() >= 1:
                                        print('HERE HERE')
                                        print('Result!: Reason:  No Adverse case!.',file_type)
                                        #code for 'LV' value is 3 in the measure value table
                                        print("missing an adverse case in at least 1 strata...")
                                        meas_value = 3.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If missing an adverse event in either strata, both strata get LV. 
                                        #Then we get LV for this process measure and strata type.

                                        if stratatype == 'Sex':
                                            for i, item1 in enumerate(['Female','Male']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)
                                        elif stratatype == 'Race':
                                            for i, item1 in enumerate(['Non-White','White']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)

                                        elif stratatype == 'Ses':
                                            for i, item1 in enumerate(['HighSES','LowSES']):
                                                meas_name = outcome + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]

                                                result_list.append(row_to_add)

                                    

                                    #Finally, if we pass to this stage, we have enough cases
                                    #so we can calculate the outcome measure values per strata.
                                    #if AMC, denominator = expecteddead_13, else denominator = expecteddead_113.
                                    elif (hospital_cohort_dict[str(medicare_item)] in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']):
                                        print('AMC FINAL CALCS')
                                        #print(df_filtered['medicareid'].unique())
                                        #print(df_filtered.head())
                                        print(hospital_medicare_id)
                                        print(crosstab_df)
                                        #UL001
                                        if file_type == 'MB':
                                            #MB outcome = sum(transflag)/count(transflag)
                                            #use .size()
                                            num_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].sum().reset_index()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].size().reset_index()
                                            
                                            
                                        else:
                                            #everything else is sum(deathflag)/sum(expecteddead_13 or expecteddead_113)
                                            #use .sum()
                                            #denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][5]].sum().reset_index()
                                            num_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].sum().reset_index()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][5]].sum().reset_index()
                                        
                                        #UL001
                                        if file_type == 'MB':
                                            outcome1 = num_sum[file_type_dict[file_type][4]][0]/denom_sum[file_type_dict[file_type][4]][0]
                                            outcome2 = num_sum[file_type_dict[file_type][4]][1]/denom_sum[file_type_dict[file_type][4]][1]
                                        else:    
                                            outcome1 = num_sum[file_type_dict[file_type][4]][0]/denom_sum[file_type_dict[file_type][5]][0]
                                            outcome2 = num_sum[file_type_dict[file_type][4]][1]/denom_sum[file_type_dict[file_type][5]][1]



                                        if outcome1 > outcome2:
                                            larger_group = outcome1
                                            smaller_group = outcome2
                                        else:
                                            larger_group = outcome2
                                            smaller_group = outcome1

                                        #Assign pass, fail,warning.  The database does not store 
                                        #strings in the measure_value table so we need to store a code here.
                                        #Pass = 2, Fail = 0, Warning = 1, LV = 3

                                        #larger o/e group < 2x o/e of other group EQUAL (PASS)
                                        #>= 2x o/e & < 2.5x o/e of other group WARNING
                                        #> 2.5x o/e of other group UNEQUAL (FAIL)
                                        
                                        
                                        if larger_group/smaller_group < 2.0:
                                            meas_value = 2.0 #Pass
                                        elif larger_group/smaller_group >= 2.0 and larger_group/smaller_group <= 2.50:
                                            meas_value = 1.0 #Warning
                                        else:
                                            meas_value = 0.0 #Fail

                                        #collect the numerator and denominator values.
                                        #UL001
                                        if file_type == 'MB':
                                            strata1_num = num_sum[file_type_dict[file_type][4]][0]
                                            strata1_denom = denom_sum[file_type_dict[file_type][4]][0]

                                            strata2_num = num_sum[file_type_dict[file_type][4]][1]
                                            strata2_denom = denom_sum[file_type_dict[file_type][4]][1]
                                        else:
                                            strata1_num = num_sum[file_type_dict[file_type][4]][0]
                                            strata1_denom = denom_sum[file_type_dict[file_type][5]][0]

                                            strata2_num = num_sum[file_type_dict[file_type][4]][1]
                                            strata2_denom = denom_sum[file_type_dict[file_type][5]][1]
                                            

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0]
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1]
                                        
                                        meas_num_event = 14 #Cases
                                        meas_denom_event = 14 #Cases
                                        
                                        #create row list    
                                        row_to_add1 = [hospital_medicare_id,strata1_meas_name,meas_value,strata1_num,meas_num_event,strata1_denom,meas_denom_event] 
                                        row_to_add2 = [hospital_medicare_id,strata2_meas_name,meas_value,strata2_num,meas_num_event,strata2_denom,meas_denom_event] 
                                        #append row list to the main list of lists (dataframe)
                                        result_list.append(row_to_add1)
                                        result_list.append(row_to_add2)
                                        
                                        
                                            
                                            
                                    elif (hospital_cohort_dict[str(medicare_item)] not in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']):
                                        print('NON AMC FINAL CALCS')
                                        #print(df_filtered['medicareid'].unique())
                                        #print(df_filtered.head())
                                        #print(hospital_medicare_id) 
                                        #print(crosstab_df)
                                        
                                        #UL001
                                        if file_type == 'MB':
                                            #MB outcome = sum(transflag)/count(transflag)
                                            #use .size()
                                            num_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].sum().reset_index()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].size().reset_index() 
                                
                                        else:
                                            #everything else is sum(deathflag)/sum(expecteddead_13 or expecteddead_113)
                                            #use .sum()
                                            #denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][5]].sum().reset_index()
                                            
                                            num_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][4]].sum().reset_index()
                                            denom_sum = df_filtered.groupby([stratatype])[file_type_dict[file_type][6]].sum().reset_index()
                                        
                                        #UL001
                                        if file_type == 'MB':
                                            outcome1 = num_sum[file_type_dict[file_type][4]][0]/denom_sum[file_type_dict[file_type][4]][0]
                                            outcome2 = num_sum[file_type_dict[file_type][4]][1]/denom_sum[file_type_dict[file_type][4]][1]
                                        else:    
                                            
                                            outcome1 = num_sum[file_type_dict[file_type][4]][0]/denom_sum[file_type_dict[file_type][6]][0]
                                            outcome2 = num_sum[file_type_dict[file_type][4]][1]/denom_sum[file_type_dict[file_type][6]][1]

                                            
                                        if outcome1 > outcome2:
                                            larger_group = outcome1
                                            smaller_group = outcome2
                                        else:
                                            larger_group = outcome2
                                            smaller_group = outcome1

                                        #Assign pass, fail,warning.  The database does not store 
                                        #strings in the measure_value table so we need to store a code here.
                                        #Pass = 2, Fail = 0, Warning = 1, LV = 3

                                        #larger o/e group < 2x o/e of other group EQUAL (PASS)
                                        #>= 2x o/e & < 2.5x o/e of other group WARNING
                                        #> 2.5x o/e of other group UNEQUAL (FAIL)
                                        
                                        if larger_group/smaller_group < 2.0:
                                            meas_value = 2.0 #Pass
                                        elif larger_group/smaller_group >= 2.0 and larger_group/smaller_group <= 2.50:
                                            meas_value = 1.0 #Warning
                                        else:
                                            meas_value = 0.0 #Fail
                                        
                                        #collect the numerator and denominator values.
                                        #UL001
                                        if file_type == 'MB':
                                            strata1_num = num_sum[file_type_dict[file_type][4]][0]
                                            strata1_denom = denom_sum[file_type_dict[file_type][4]][0]

                                            strata2_num = num_sum[file_type_dict[file_type][4]][1]
                                            strata2_denom = denom_sum[file_type_dict[file_type][4]][1]
                                        else:
                                            
                                            strata1_num = num_sum[file_type_dict[file_type][4]][0]
                                            strata1_denom = denom_sum[file_type_dict[file_type][6]][0]

                                            strata2_num = num_sum[file_type_dict[file_type][4]][1]
                                            strata2_denom = denom_sum[file_type_dict[file_type][6]][1]

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = outcome + '-' + crosstab_df.index[0]
                                            strata2_meas_name = outcome + '-' + crosstab_df.index[1]
                                        
                                        meas_num_event = 14 #Cases
                                        meas_denom_event = 14 #Cases
                                        
                                        #create row list    
                                        row_to_add1 = [hospital_medicare_id,strata1_meas_name,meas_value,strata1_num,meas_num_event,strata1_denom,meas_denom_event] 
                                        row_to_add2 = [hospital_medicare_id,strata2_meas_name,meas_value,strata2_num,meas_num_event,strata2_denom,meas_denom_event] 
                                        #append row list to the main list of lists (dataframe)
                                        result_list.append(row_to_add1)
                                        result_list.append(row_to_add2)
                                        
                                        


                    
                    else:
                        
                        '''
                        ==========================
                        START PROCESS LOGIC BRANCH
                        ==========================
                        '''
                        
                        print('Process!')
                        #pass

                        #if 
                        #Calculate Process Measures first.
                        #if there are rows in the file, then perform Fisher's exact test per column:  Sex, Race, Ses.
                        for i, stratatype in enumerate(['Sex','Race','Ses']):
                            #Maternal Bleeding only has one Sex in the data population so there is no 'Sex' equity strata.
                            if stratatype == 'Sex' and file_type =='MB':
                                pass
                            else:

                                print(file_type)
                                print(medicare_item)
                                print(df_filtered.shape)
                                #print(df_filtered.head())

                                crosstab_df = pd.crosstab(df_filtered[stratatype], df_filtered[file_type_dict[file_type][1]]) 

                                #if there are zero cases in one or both of the stratas, we will only have 1 or zero index
                                #In that case, we give both a 'LV' because the minimum threshold has not been met.

                                #if len(crosstab_df.index) < 2 or len(crosstab_df.columns) < 2:
                                #UL001  Updating below logic.  It should not be either index or columns less than 2
                                #       We only care about if we are missing an entire strata (index rows)
                                
                                #if len(crosstab_df.index) < 2 or len(crosstab_df.columns) < 2:
                                if len(crosstab_df.index) < 2:
                                    #hospital_medicare_id = medicare_item
                                    #code for 'LV' value is 3 in the measure value table
                                    meas_value = 3.0
                                    meas_num = 0.0
                                    meas_num_event = 14 #Cases
                                    meas_denom = 0.0
                                    meas_denom_event = 14 #Cases

                                    #If missing both strata for this process measure, 
                                    #Then we get LV for this process measure and strata type.

                                    if stratatype == 'Sex':
                                        for i, item1 in enumerate(['Female','Male']):
                                            meas_name = process + '-' + item1
                                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                            print(row_to_add)
                                            result_list.append(row_to_add)
                                    elif stratatype == 'Race':
                                        for i, item1 in enumerate(['Non-White','White']):
                                            meas_name = process + '-' + item1
                                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                            print(row_to_add)
                                            result_list.append(row_to_add)

                                    elif stratatype == 'Ses':
                                        for i, item1 in enumerate(['HighSES','LowSES']):
                                            meas_name = process + '-' + item1
                                            row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                            print(row_to_add)
                                            result_list.append(row_to_add)


                                else:
                                    
                                    #check to make sure each strata has at least the minimum cutoff for
                                    #the given number of quarters and risk model type.  If not, then 'LV.'
                                    if (hospital_cohort_dict[str(medicare_item)] in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']) and (crosstab_df.sum(axis=1)[0] < amc_lv_cutoff or crosstab_df.sum(axis=1)[1] < amc_lv_cutoff):
                                        print('amc cutoff',amc_lv_cutoff)
                                        print('This is LV')
                                        #hospital_medicare_id = medicare_item
                                        #code for 'LV' value is 3 in the measure value table
                                        meas_value = 3.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If not enough cases for the amount of time and risk model,
                                        #Then we get LV for this process measure and strata type.

                                        if stratatype == 'Sex':
                                            for i, item1 in enumerate(['Female','Male']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)
                                        elif stratatype == 'Race':
                                            for i, item1 in enumerate(['Non-White','White']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)

                                        elif stratatype == 'Ses':
                                            for i, item1 in enumerate(['HighSES','LowSES']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)

                                    elif (hospital_cohort_dict[str(medicare_item)] not in ['Comprehensive Academic Medical Center','Large Specialized Complex Care Medical Center']) and (crosstab_df.sum(axis=1)[0] < comm_lv_cutoff or crosstab_df.sum(axis=1)[1] < comm_lv_cutoff):
                                        print('comm cutoff',comm_lv_cutoff)
                                        print('This is LV')
                                        #hospital_medicare_id = medicare_item
                                        #code for 'LV' value is 3 in the measure value table
                                        meas_value = 3.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If not enough cases for the amount of time and risk model,
                                        #Then we get LV for this process measure and strata type.

                                        if stratatype == 'Sex':
                                            for i, item1 in enumerate(['Female','Male']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)
                                        elif stratatype == 'Race':
                                            for i, item1 in enumerate(['Non-White','White']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)

                                        elif stratatype == 'Ses':
                                            for i, item1 in enumerate(['HighSES','LowSES']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)

                                    
                                    #UL001  If we have both strata and the cases levels meet threshold, then we
                                    #       know we now need to check for both zero in numerator.  In the pandas
                                    #       crosstab, this will cause a blank column.  Therefore, there will only be
                                    #       one column.  Two index rows and one column = 'PASS' because the denominator
                                    #       meets threshold and we had zero adverse cases and both strata were treated
                                    #       the same.
                                    
                                    # For process measures, we do not need to check cohort because
                                    # The crosstab does not take into account risk model type.
                                    # So, if we pass the above thresholds, then just check for 2 rows & 1 column.
                                    elif (len(crosstab_df.index) == 2 and len(crosstab_df.columns) == 1):
                            
                                        #UL001
                                        
                                        #The process measure gets denominator in a different way and, therefore, does
                                        #not require differing branch logic between MB and others.
                                            
                                        #collect the numerator and denominator values from the crosstab rows.
                                        strata1 = crosstab_df.index.values[0]
                                        #UL001 No numerator column in this case
                                        #strata1_num = crosstab_df[crosstab_df.columns.values[1]][0]
                                        strata1_denom = crosstab_df.sum(axis=1)[0]

                                        strata2 = crosstab_df.index.values[1]
                                        #UL001 no numerator column in this case
                                        #strata2_num = crosstab_df[crosstab_df.columns.values[1]][1]
                                        strata2_denom = crosstab_df.sum(axis=1)[1]


                                        #code for 'PASS' value is 2 in the measure value table
                                        meas_value = 2.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        #meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If missing both strata for this process measure, 
                                        #Then we get LV for this process measure and strata type.

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = process + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = process + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = process + '-' + crosstab_df.index[0]
                                            strata2_meas_name = process + '-' + crosstab_df.index[1]

                                        #create row list    
                                        row_to_add1 = [hospital_medicare_id,strata1_meas_name,meas_value,meas_num,meas_num_event,strata1_denom,meas_denom_event] 
                                        row_to_add2 = [hospital_medicare_id,strata2_meas_name,meas_value,meas_num,meas_num_event,strata2_denom,meas_denom_event] 
                                        #append row list to the main list of lists (dataframe)
                                        result_list.append(row_to_add1)
                                        result_list.append(row_to_add2)
                                        
                                    
                                    #There needs to be at least 1 adverse case.  That means
                                    #every cell in the crosstab has to have at least total of 1.
                                    #If there are any zero cells, that means at least one strata does not meet
                                    #the minimum adverse case cutoff of 1.
                                    elif crosstab_df[crosstab_df < 1].count().sum() >= 1:
                                        #code for 'LV' value is 3 in the measure value table
                                        print("missing an adverse case in at least 1 strata...")
                                        meas_value = 3.0
                                        meas_num = 0.0
                                        meas_num_event = 14 #Cases
                                        meas_denom = 0.0
                                        meas_denom_event = 14 #Cases

                                        #If missing an adverse event in either strata, both strata get LV. 
                                        #Then we get LV for this process measure and strata type.

                                        if stratatype == 'Sex':
                                            for i, item1 in enumerate(['Female','Male']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)
                                        elif stratatype == 'Race':
                                            for i, item1 in enumerate(['Non-White','White']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)

                                        elif stratatype == 'Ses':
                                            for i, item1 in enumerate(['HighSES','LowSES']):
                                                meas_name = process + '-' + item1
                                                row_to_add = [hospital_medicare_id,meas_name,meas_value,meas_num,meas_num_event,meas_denom,meas_denom_event]
                                                print(row_to_add)
                                                result_list.append(row_to_add)

                                    

                                    else:

                                        
                                        #Finally, if we pass all the above 'LV' conditions, then actually 
                                        #perform Fisher's Exact Test of Independence to give the hospital a 
                                        #Pass, Warning, Fail result.
                                        oddsratio, p_value = stats.fisher_exact(crosstab_df,alternative="two-sided")  #second index is p-value

                                        #assign the p_value a decimal code.  The database does not store 
                                        #strings in the measure_value table so we need to store a code here.
                                        #Pass = 2, Fail = 0, Warning = 1, LV = 3
                                        if p_value > 0.05:
                                            meas_value = 2.0 #Pass
                                        elif p_value >= 0.01 and p_value <= 0.05:
                                            meas_value = 1.0 #Warning
                                        else:
                                            meas_value = 0.0 #Fail

                                        #collect the numerator and denominator values from the crosstab rows.
                                        strata1 = crosstab_df.index.values[0]
                                        strata1_num = crosstab_df[crosstab_df.columns.values[1]][0]
                                        strata1_denom = crosstab_df.sum(axis=1)[0]

                                        strata2 = crosstab_df.index.values[1]
                                        strata2_num = crosstab_df[crosstab_df.columns.values[1]][1]
                                        strata2_denom = crosstab_df.sum(axis=1)[1]

                                        #build the measure name from the index names
                                        if crosstab_df.index[0] in ['High','Low']:

                                            strata1_meas_name = process + '-' + crosstab_df.index[0] + 'SES'
                                            strata2_meas_name = process + '-' + crosstab_df.index[1] + 'SES'
                                        else:
                                            strata1_meas_name = process + '-' + crosstab_df.index[0]
                                            strata2_meas_name = process + '-' + crosstab_df.index[1]

                                        meas_num_event = 14 #Cases
                                        meas_denom_event = 14 #Cases

                                        #create row list    
                                        row_to_add1 = [hospital_medicare_id,strata1_meas_name,meas_value,strata1_num,meas_num_event,strata1_denom,meas_denom_event] 
                                        row_to_add2 = [hospital_medicare_id,strata2_meas_name,meas_value,strata2_num,meas_num_event,strata2_denom,meas_denom_event] 
                                        #append row list to the main list of lists (dataframe)
                                        result_list.append(row_to_add1)
                                        result_list.append(row_to_add2)


#in fy23, Vizient renamed one race strata.  However, in their data files still use the old strata name.
#going through and cleaning up before joining to measure dataframe
#update 'non-white' to 'Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable'

race_strat_dict = {'HF BNP Improvement-Non-White':'HF BNP Improvement-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                   'HF Mortality O/E-Non-White':'HF Mortality O/E-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                   'Maternal Hemoglobin Change-Non-White':'Maternal Hemoglobin Change-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                   'Maternal Tranfusion Rate-Non-White':'Maternal Tranfusion Rate-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                   'N-STEMI Mortality O/E-Non-White':'N-STEMI Mortality O/E-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                    'N-STEMI Troponin Timing-Non-White':'N-STEMI Troponin Timing-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                    'Sepsis Lactate Timing-Non-White':'Sepsis Lactate Timing-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                    'Sepsis Mortality O/E-Non-White':'Sepsis Mortality O/E-Black, Asian, Native American/Alaska Native, Pacific Islander/Native Hawaiian, Other, Unknown, Declined, Unavailable',\
                   }
#loop through result lists and update the measure name for race strata
for i, item in enumerate(result_list):
    if item[1] in race_strat_dict.keys():
        item[1] = race_strat_dict[item[1]]                                    
                                        
final_df = pd.DataFrame(result_list, columns = ['medicare_id', 'measure_name', 'measure_value','numerator','numerator_event_type_id','denominator','denominator_event_type_id'])  

final_df['calc_id'] = calculator_id

final_df2 = final_df.merge(equity_measure_df, left_on='measure_name', right_on='measure_name')
final_df2['medicare_id'] = final_df2['medicare_id'].astype(str).str.replace(' ','')
hospital_df['hospital_medicare_id'] = hospital_df['hospital_medicare_id'].astype(str).str.replace(' ','')
#print(final_df2.head())
final_df3 = final_df2.merge(hospital_df, left_on='medicare_id', right_on='hospital_medicare_id',how='left')
#print(final_df3.head())
final_df3 = final_df3.drop_duplicates()

final_df3['period_id'] = period_query_results
final_df3['measure_value_id'] = 1

final_df3.to_csv('final_df_before_cleaning.csv')

final_df3 = final_df3[['calc_id','hospital_id','measure_id','period_id','measure_value_id','measure_value','numerator','numerator_event_type_id','denominator','denominator_event_type_id']]

final_df3.to_csv('equity_test2.csv')

MB
Maternal Hemoglobin Change
Maternal Tranfusion Rate
shape: 2929
Process!
MB
140281
(2929, 9)
MB
140281
(2929, 9)
outcome
AMC FINAL CALCS
140281
Transflag     0   1
Race               
Non-White  1246  16
White      1661   6
AMC FINAL CALCS
140281
Transflag     0   1
Ses                
High       2193  13
Low         714   9
shape: 965
Process!
MB
140242
(965, 9)
MB
140242
(965, 9)
outcome
HERE HERE
Result!: Reason:  No Adverse case!. MB
missing an adverse case in at least 1 strata...
HERE HERE
Result!: Reason:  No Adverse case!. MB
missing an adverse case in at least 1 strata...
shape: 690
Process!
MB
140130
(690, 9)
MB
140130
(690, 9)
outcome
HERE HERE
Result!: Reason:  No Adverse case!. MB
missing an adverse case in at least 1 strata...
HERE HERE
Result!: Reason:  No Adverse case!. MB
missing an adverse case in at least 1 strata...
shape: 412
Process!
MB
140211
(412, 9)
MB
140211
(412, 9)
outcome
NON AMC FINAL CALCS
NON AMC FINAL CALCS
shape: 315
Process!
MB
140286
(315, 9)
MB
14

In [27]:
final_df3.drop_duplicates()

Unnamed: 0,calc_id,hospital_id,measure_id,period_id,measure_value_id,measure_value,numerator,numerator_event_type_id,denominator,denominator_event_type_id
0,15,23,102,7514,1,0.0,462.0,14,1262.0,14
1,15,112,102,7514,1,0.0,190.0,14,263.0,14
2,15,219,102,7514,1,2.0,153.0,14,225.0,14
3,15,221,102,7514,1,2.0,35.0,14,79.0,14
4,15,222,102,7514,1,2.0,44.0,14,77.0,14
...,...,...,...,...,...,...,...,...,...,...
59,15,221,105,7514,1,0.0,3.0,14,73.0,14
60,15,222,105,7514,1,3.0,0.0,14,0.0,14
61,15,471,105,7514,1,2.0,2.0,14,193.0,14
62,15,472,105,7514,1,3.0,0.0,14,0.0,14


In [28]:
final_df3['hospital_id'].unique()

array([ 23, 112, 219, 221, 222, 471, 472, 694], dtype=int64)

In [29]:
final_df3[final_df3['hospital_id'] == 112]

Unnamed: 0,calc_id,hospital_id,measure_id,period_id,measure_value_id,measure_value,numerator,numerator_event_type_id,denominator,denominator_event_type_id
1,15,112,102,7514,1,0.0,190.0,14,263.0,14
9,15,112,103,7514,1,0.0,440.0,14,702.0,14
17,15,112,100,7514,1,2.0,494.0,14,766.0,14
25,15,112,101,7514,1,2.0,136.0,14,199.0,14
33,15,112,106,7514,1,3.0,0.0,14,0.0,14
41,15,112,107,7514,1,3.0,0.0,14,0.0,14
49,15,112,104,7514,1,3.0,0.0,14,0.0,14
57,15,112,105,7514,1,3.0,0.0,14,0.0,14


In [29]:
def insert_measure_values_from_reports_df(df):
    # connect to the NM_Analytics database
    conn = pyodbc.connect('Driver={SQL Server};'
                          'Server=edw00pd05wva.corp.nm.org\EDWIDS1;'
                          'Database=NM_Analytics_Prototype;'
                          'Trusted_Connection=yes;')

    cursor = conn.cursor()
    # iterate over df rows and insert into NM_Analytics_Prototype.vizient_qa.datadump
    for index, row in df.iterrows():
        if row['measure_value'] != 'Missing':
            cursor.execute(
                "INSERT INTO NM_Analytics_Prototype.vizient_qa.measure_values([calc_id],[hospital_id],[measure_id],[period_id],[measure_value_id],[measure_value],[numerator],[numerator_event_type_id],[denominator],[denominator_event_type_id]) values (?,?,?,?,?,?,?,?,?,?)",
                row['calc_id'], row['hospital_id'], row['measure_id'], row['period_id'], row['measure_value_id'],
                row['measure_value'], row['numerator'], row['numerator_event_type_id'], row['denominator'],
                row['denominator_event_type_id'])
            conn.commit()

    cursor.close()
    conn.close()
    print('done inserting rows.')

In [31]:
insert_measure_values_from_reports_df(final_df3)

done inserting rows.


In [30]:
final_df3

Unnamed: 0,calc_id,hospital_id,measure_id,period_id,measure_value_id,measure_value,numerator,numerator_event_type_id,denominator,denominator_event_type_id
0,15,23,88,7514,1,1.0,75.0,14,405.000000,14
1,15,112,88,7514,1,2.0,31.0,14,259.000000,14
2,15,219,88,7514,1,2.0,34.0,14,151.000000,14
3,15,221,88,7514,1,2.0,26.0,14,113.000000,14
4,15,222,88,7514,1,2.0,13.0,14,56.000000,14
...,...,...,...,...,...,...,...,...,...,...
347,15,221,110,7514,1,3.0,0.0,14,0.000000,14
348,15,222,110,7514,1,3.0,0.0,14,0.000000,14
349,15,471,110,7514,1,3.0,0.0,14,0.000000,14
350,15,472,110,7514,1,3.0,0.0,14,0.000000,14
