In [10]:
import pandas as pd
import numpy as np
import os
from datetime import timedelta
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [11]:
ascvd = [
    'I25.1',
    'I25.10',
    'I25.11',
    'I25110',
    'I25111',
    'I25.118',
    'I25.119', # Values till here are true ICD Codes
    'I251',    # Values from here are not ICD codes but are just here to match the codes mentioned in current data
    'I2510',
    'I2511',
    'I25110',
    'I25111',
    'I25118',
    'I25119'
]

ckd = [
    'N18',
    'N18.1',
    'N18.2',
    'N18.3',
    'N18.4',
    'N18.5',
    'N18.6',
    'N18.9'
]

hf = [
    'I50',
    'I50.1',
    'I50.2',
    'I50.20',
    'I50.21',
    'I50.22',
    'I50.23',
    'I50.3',
    'I50.30',
    'I50.31',
    'I50.32',
    'I50.33',
    'I50.4',
    'I50.40',
    'I50.41',
    'I50.42',
    'I50.43',
    'I50.8',
    'I50.81',
    'I50.810',
    'I50.811',
    'I50.812',
    'I50.813',
    'I50.814',
    'I50.82',
    'I50.83',
    'I50.84',
    'I50.89',
    'I50.9'
]

hypoglycemia = [
    'E16.0',
    'E16.1',
    'E16.2'
]

overweight = [
    'E66',
    'E66.0',
    'E66.01',
    'E66.09',
    'E66.1',
    'E66.2',
    'E66.3',
    'E66.8',
    'E66.9'
]

In [12]:
input_data = pd.read_csv('Recommended Care Pathway data - Data.csv', dtype='str')
input_data['efdt'] = pd.to_datetime(input_data['efdt'], format = '%d/%m/%y')
input_data['rdt'] = pd.to_datetime(input_data['rdt'], format = '%d/%m/%y')
input_data['rxfdt'] = pd.to_datetime(input_data['rxfdt'], format = '%d/%m/%y')

In [13]:
current_date = pd.Timestamp(year=2018, month=12, day=31)
test_window_days = 90

# Since date is the date from where we start checking for tests.
since_date = current_date - timedelta(days = test_window_days)

# This date can have any value greater than the current date 
# It's needed only for identifying tests that didn't happen in observation period.
dummy_later_date = pd.Timestamp(year=2040, month=12, day=31)

In [14]:
def keep_latest_data(empi_data):
    
    rxfdt_condition = empi_data['rxfdt'] >= since_date
    rdt_condition = empi_data['rdt'] >= since_date
    efdt_condition = empi_data['efdt'] >= since_date
    
    empi_data = empi_data[rxfdt_condition + rdt_condition + efdt_condition]
    return pd.DataFrame(empi_data)

In [15]:
def HbA1C_test_check(data):
    icd_code_list = ['4548-4']
    return data['rc'].isin(icd_code_list).any()

def HbA1C_value_check(data):
    value = data[data['rc'] == '4548-4']['rv']
    return float(value.values[0]) >= 7.0

def ASCVD_check(data):
    icd_code_list = ascvd
    return data['pd'].isin(icd_code_list).any()

def HF_check(data):
    icd_code_list = hf
    return data['pd'].isin(icd_code_list).any()

def CKD_check(data):
    icd_code_list = ckd
    return data['pd'].isin(icd_code_list).any()

def egfr_adequate_check(data):
    value = data[data['rn'] == 'eGFR']['rv']
    if value.empty:
        return False
    else:
        return float(value.values[0]) >= 90.0

def glp1ra_check(data,from_date):
    temp_data = data[data['rxfdt'] >= from_date]
    if temp_data.empty:
        return False,dummy_later_date
    
    test_series = temp_data[temp_data['rxn'] == 'GLP1RA']['rxfdt']
    if not test_series.empty:
        first_encounter = test_series.min()
        return True,first_encounter
    else: 
        return False,dummy_later_date
    
def sglt2_check(data,from_date):
    temp_data = data[data['rxfdt'] >= from_date]
    if temp_data.empty:
        return False,dummy_later_date
    
    test_series = temp_data[temp_data['rxn'] == 'SGLT2']['rxfdt']
    if not test_series.empty:
        first_encounter = test_series.min()
        return True,first_encounter
    else: 
        return False,dummy_later_date
        
def tzd_check(data,from_date):
    temp_data = data[data['rxfdt'] >= from_date]
    
    if temp_data.empty:
        return False,dummy_later_date
    
    test_series = temp_data[temp_data['rxn'] == 'TZD']['rxfdt']
    if not test_series.empty:
        first_encounter = test_series.min()
        return True,first_encounter
    else: 
        return False,dummy_later_date
        
def dpp4_check(data,from_date):
    temp_data = data[data['rxfdt'] >= from_date]
    
    if temp_data.empty:
        return False,dummy_later_date
    
    test_series = temp_data[temp_data['rxn'] == 'DPP4']['rxfdt']
    if not test_series.empty:
        first_encounter = test_series.min()
        return True,first_encounter
    else: 
        return False,dummy_later_date
        
def hypoglycemia_check(data):
    icd_code_list = hypoglycemia
    return data['pd'].isin(icd_code_list).any()

def overweight_check(data):
    icd_code_list = overweight
    return data['pd'].isin(icd_code_list).any()

In [16]:
def make_decision(data):
    data = keep_latest_data(data)
    if HbA1C_test_check(data):
        if HbA1C_value_check(data):
            
            if ASCVD_check(data):
                # ASCVD Exists
                if egfr_adequate_check(data):
                    # EGFR Adequate
                    sglt2_tested = sglt2_check(data,since_date)
                    if sglt2_tested[0]:
                        # SGLT2i Tested
                        glp1ra_tested = glp1ra_check(data,sglt2_tested[1])
                        
                        if glp1ra_tested[0]:
                            # GLP1RA Tested
                            return 'DPP-4i, Basal Insulin, TZD, SU.'
                        else:
                            # GLP1RA Not Tested
                            return 'GLP1RA with proven CVD benefits'
                        
                    else:
                        # SGLT2i Not Tested
                        return 'SGLT2i with proven CVD benefits.'
                    
                else:
                    # EGFR Not Adequate
                    glp1ra_tested = glp1ra_check(data,since_date)
                    if glp1ra_tested[0]:
                        # GLP1RA Tested
                        sglt2_tested = sglt2_check(data,glp1ra_tested[1])
                        if sglt2_tested[0]:
                            # SGLT2i Tested
                            return 'Basal Insulin, TZD, SU.'
                        else:
                            # SGLT2i Not Tested
                            return 'SGLT2i with proven CVD benefits.'
                    else:
                        # GLP1RA Not Tested
                        return 'GLP1RA with proven CVD benefits'
                    
                    
            elif (HF_check(data) + CKD_check(data)):
                # HF or CKD Exists
                if egfr_adequate_check(data):
                    # EGFR Adequate
                    sglt2_tested = sglt2_check(data,since_date)
                    if sglt2_tested[0]:
                        # SGLT2i Tested
                        glp1ra_tested = glp1ra_check(data,sglt2_tested[1])
                        
                        if glp1ra_tested[0]:
                            # GLP1RA Tested
                            return 'DPP-4i (not saxagliptin), Basal Insulin SU.'
                        else:
                            # GLP1RA Not Tested
                            return 'GLP1RA with proven CVD benefits'
                        
                    else:
                        # SGLT2i Not Tested
                        return 'SGLT2i with proven CVD benefits.'
                    
                else:
                    # EGFR Not Adequate
                    glp1ra_tested = glp1ra_check(data,since_date)
                    if glp1ra_tested[0]:
                        # GLP1RA Tested
                        sglt2_tested = sglt2_check(data,glp1ra_tested[1])
                        if sglt2_tested[0]:
                            # SGLT2i Tested
                            return 'Basal Insulin, SU.'
                        else:
                            # SGLT2i Not Tested
                            return 'SGLT2i with proven CVD benefits.'
                    else:
                        # GLP1RA Not Tested
                        return 'GLP1RA with proven CVD benefits'
                
            else:
                if hypoglycemia_check(data):
                    # Hypoglycemia Exists
                    
                    # Below code is for checking which test took place first
                    dpp4_tested = dpp4_check(data,since_date)
                    glp1ra_tested = glp1ra_check(data,since_date)
                    sglt2_tested = sglt2_check(data,since_date)
                    tzd_tested = tzd_check(data,since_date)
                    test_dates = {
                        dpp4_tested[1]: 'dpp4',
                        glp1ra_tested[1]: 'glp1ra',
                        sglt2_tested[1]: 'sglt2',
                        tzd_tested[1]: 'tzd'
                    }
                    # Boolean to check if test happened or not
                    test_happened = min(test_dates.keys()) <= current_date
                    earliest_test = test_dates[min(test_dates.keys())]
                    
                    if test_happened & (earliest_test == 'dpp4'):
                        # DPP4-i Exists
                        sglt2_tested = sglt2_check(data,dpp4_tested[1])
                        tzd_tested = tzd_check(data,dpp4_tested[1])
                        if sglt2_tested[0] + tzd_tested[0]:
                            return 'Addition of SU or basal insulin.'
                        else:
                            return 'SGLT2i or TZD.'
                        
                    elif test_happened & (earliest_test == 'glp1ra'):
                        # GLP1RA tested
                        sglt2_tested = sglt2_check(data,glp1ra_tested[1])
                        tzd_tested = tzd_check(data,glp1ra_tested[1])
                        if sglt2_tested[0] + tzd_tested[0]:
                            return 'Addition of SU or basal insulin.'
                        else:
                            return 'SGLT2i or TZD.'
                    
                    elif test_happened & (earliest_test == 'sglt2'):
                        # SGLT2i tested
                        glp1ra_tested = glp1ra_check(data,sglt2_tested[1])
                        dpp4_tested = dpp4_check(data,sglt2_tested[1])
                        tzd_tested = tzd_check(data,sglt2_tested[1])
                        if glp1ra_tested[0] + dpp4_tested[0] + tzd_tested[0]:
                            return 'Addition of SU or basal insulin.'
                        else:
                            return 'GLP-1RA or DPP4i or TZD.'
                        
                    elif test_happened & (earliest_test == 'tzd'):
                        # TZD tested
                        glp1ra_tested = glp1ra_check(data,tzd_tested[1])
                        dpp4_tested = dpp4_check(data,tzd_tested[1])
                        sglt2_tested = sglt2_check(data,tzd_tested[1])
                        if glp1ra_tested[0] + dpp4_tested[0] + sglt2_tested[0]:
                            return 'Addition of SU or basal insulin.'
                        else:
                            return 'SGLT2i or DPP4i or GLP-1RA.'
                        
                    else:
                        # None of the above 4 tests existed
                        return 'DPP4i or GLP-1RA or SGLT2i or TZD.'
                
                elif overweight_check(data):
                    # Overweight Exists
                    
                    glp1ra_tested = glp1ra_check(data,since_date)
                    sglt2_tested = sglt2_check(data,since_date)
                    test_dates = {
                        glp1ra_tested[1]: 'glp1ra',
                        sglt2_tested[1]: 'sglt2'
                    }
                    # Boolean to check if test happened or not
                    test_happened = min(test_dates.keys()) <= current_date
                    earliest_test = test_dates[min(test_dates.keys())]
                    
                    if test_happened & (earliest_test == 'glp1ra'):
                        # GLP1RA Exists
                        sglt2_tested = sglt2_check(data,glp1ra_tested[1])
                        if sglt2_tested[0]:
                            return 'Basal Insulin, TZD, SU.'
                        else:
                            return 'SGLT2i.'
                    
                    elif test_happened & (earliest_test == 'sglt2'):
                        # SGLT2i Exists
                        glp1ra_tested = glp1ra_check(data,sglt2_tested[1])
                        if glp1ra_tested[0]:
                            dpp4_tested = dpp4_check(data,glp1ra_tested[1])
                            if dpp4_tested[0]:
                                return 'Basal Insulin, TZD, SU.'
                            else:
                                return 'DPP-4i.'      
                        else:
                            return 'GLP1RA.'
                        
                    else:
                        return 'GLP1RA or DPP-4i.'  ### What to put if both GLP1RA and SGLT2 doesn't exist.
                
                else:
                    return 'First line therapy is metformin and comprehensive lifestyle.'
                
        else:
            return 'HbA1C Test had value < 7.'
    else:
        return 'No HbA1C Test took place in last 90 days.'

In [18]:
result = pd.DataFrame(columns = ['Empi','Recommendation'])
empis = input_data['empi'].unique()
for empi in empis:
    empi_data = pd.DataFrame(input_data[input_data['empi'] == empi])
    print("Empi: ")
    print(empi)
    print('Recomendation: ')
    recomend = make_decision(empi_data)
    print(make_decision(empi_data))
    result = result.append({'Empi':empi, 'Recommendation': recomend},ignore_index=True)
    print("----------------------")

print(result.sort_values('Empi'))
result.to_csv('Recommendation_output.csv', index = False)

Empi: 
M8206410515
Recomendation: 
GLP1RA with proven CVD benefits
----------------------
Empi: 
M0507565861
Recomendation: 
SGLT2i with proven CVD benefits.
----------------------
Empi: 
M1474337615
Recomendation: 
HbA1C Test had value < 7.
----------------------
Empi: 
M4627963031
Recomendation: 
SGLT2i with proven CVD benefits.
----------------------
Empi: 
M9922241420
Recomendation: 
Basal Insulin, TZD, SU.
----------------------
Empi: 
M4792226656
Recomendation: 
GLP1RA with proven CVD benefits
----------------------
Empi: 
M4179761710
Recomendation: 
DPP-4i (not saxagliptin), Basal Insulin SU.
----------------------
Empi: 
M1704457265
Recomendation: 
SGLT2i or TZD.
----------------------
Empi: 
M1704457277
Recomendation: 
GLP-1RA or DPP4i or TZD.
----------------------
Empi: 
M1704458277
Recomendation: 
Addition of SU or basal insulin.
----------------------
Empi: 
M1704459277
Recomendation: 
Basal Insulin, TZD, SU.
----------------------
Empi: 
M1704455277
Recomendation: 
Basal 