## Imports

In [None]:
import pandas as pd
import numpy as np
import datetime, random

from multiprocessing import Pool
import time

#Potentially useful 
#MRNs - ENC
#2307280 - 205472336

## Functions

In [None]:
def returnAKIpatients(df, aki_calc_type = 'rolling_window', keep_cols = True):
    
    if aki_calc_type == 'both':
        df = df.groupby('enc', sort=False).apply(lambda d: addRollingWindowAKI(d))
        df = df.reset_index('enc', drop=True).reset_index()
        
        df = df.groupby('mrn', sort=False).apply(lambda d: addBaselineCreat(d))
        df = df.groupby('enc', sort=False).apply(lambda d: addBackCalcAKI(d))
        
    elif aki_calc_type == 'rolling_window':
        df = df.groupby('enc', sort=False).apply(lambda d: addRollingWindowAKI(d))
        
    elif aki_calc_type == 'back_calculate':
        df = df.groupby('mrn', sort=False).apply(lambda d: addBaselineCreat(d))
        df = df.groupby('enc', sort=False).apply(lambda d: addBackCalcAKI(d)) 
    
    return df

def addBaselineCreat(df):
    '''
    Adds the baseline creatinine to a dataframe. The baseline creatinine is defined as the median of the outpatient 
     creatinine values from 365 to 7 days prior to admission.
    
    Input: dataframe (typically of a single patient)
    Output: dataframe with baseline creatinine column added in
    '''
    t_m1y = (df.admission - datetime.timedelta(days=365)).values
    t_m7d = (df.admission - datetime.timedelta(days=7)).values
    
    df['baseline_creat'] = [df[~df.inpatient].set_index('time').sort_index().loc[t_m1y[indx]:t_m7d[indx]].creat.median() for indx in range(df.shape[0])]
    
    return df

def addBackCalcAKI(df):
    '''
    Adds the back-calculated AKI conditions, the KDIGO standards on the outpatient values;
     i.e. a 50% increase from baseline creatinine in <7 days
    
    Input: dataframe (typically of a single encounter)
    Output: dataframe with back-calculated aki values added in
    '''
    backcalc_aki = np.empty(df.shape[0])
    backcalc_aki[:] = np.nan
    
    df = df.sort_values('time')
    df_lf = df.set_index('time').loc[df.admission.values[0]:(df.admission + datetime.timedelta(days=7)).values[0]]
    backcalc_aki[:df_lf.shape[0]] = df_lf.creat > 1.5*df_lf.baseline_creat
    
    df['backcalc_aki'] = backcalc_aki
    
    return df 

def addRollingWindowAKI(df):
    '''
    Adds the AKI conditions based on rolling window definition: 0.3 creat increase in < 48 hrs OR 50% increase in < 7 days
    
    Input: dataframe (typically of a single encounter)
    Output: dataframe with rolling-window aki values added in
    '''
    df = df.set_index('time').sort_index()
    df = df[~df.duplicated()]
    
    df['mincreat_48hr'] = df.creat.rolling(pd.Timedelta('2days'), min_periods=1).min().values
    df['mincreat_7day'] = df.creat.rolling(pd.Timedelta('7days'), min_periods=1).min().values

    df['deltacreat_48hr'] = np.round(df.creat - df.mincreat_48hr, decimals = 3)
    df['deltacreat_7day'] = np.round(df.creat - df.mincreat_7day, decimals = 3)

    df['rollingwindow_aki'] = (df.deltacreat_48hr >= 0.3) | (df.deltacreat_7day > 0.5*df.mincreat_7day)
    
    return df

## Reading in file; managing columns

In [None]:
tmp = out.loc[out.enc == 205472336]
backcalc_aki = np.empty(tmp.shape[0])
backcalc_aki[:] = np.nan
tmp = tmp.sort_values('time')
tmp2 = tmp.set_index('time').sort_index().loc[tmp.admission.values[0]:(tmp.admission + datetime.timedelta(days=7)).values[0]]
backcalc_aki[:tmp2.shape[0]] = tmp2.creat > 1.5*tmp2.baseline_creat
tmp['backcalc_aki'] = backcalc_aki
tmp

In [None]:
covid_df = pd.read_csv(r'H:\Data\Standardized AKI definition\dataset\covid creatinines.csv')
covid_df['mrn'] = covid_df.pat_mrn_id.str.strip('MR').astype('int')
covid_df['enc'] = covid_df.enc_id
covid_df['time'] = pd.to_datetime(covid_df.time)
covid_df['sex'] = covid_df.sex.astype('bool')
covid_df['race'] = covid_df.race.astype('bool')
covid_df['inpatient'] = covid_df.inpatient.astype('bool')
covid_df['creat'] = covid_df['creatinine']
covid_df['admission'] = pd.to_datetime(covid_df.admission)
covid_df['discharge'] = pd.to_datetime(covid_df.discharge)
print('Shape:', covid_df.shape)
print(covid_df.dtypes)

In [None]:
df = covid_df[['mrn', 'enc', 'time', 'creat', 'age', 'sex', 'race', 'inpatient', 'admission', 'discharge']]

In [None]:
%%time
out = returnAKIpatients(df, aki_calc_type = 'both')

## Adding Baseline Creat & Back-calculated AKI values

In [None]:
%%time
#out_rw = returnAKIpatients(df, aki_calc_type = 'rolling_window')

In [None]:
%%time
out_bc = returnAKIpatients(df, aki_calc_type = 'back_calculate')

In [None]:
%%time
out = returnAKIpatients(df, aki_calc_type = 'both')

In [None]:
#out.to_csv(r'H:\Data\Standardized AKI definition\dataset\output.csv')

## Two criterion for rolling-window definition of AKI:

#### *$creat \uparrow$ of 0.3 in < 48 hrs* OR *$creat \uparrow$ of 50% in < 7 days*

In [None]:
def eGFR(creat, age, black, female):
    '''
    Calculates the estimated glomerular filtration rate based on the serum creatinine levels, age, sex, and race (black or not black);
    Based on the formula in the paper A New Equation to Estimate Glomerular Filtration Rate (Levey et. Al, 2009) linked below
    
    https://pubmed.ncbi.nlm.nih.gov/19414839/
    
    '''
    #Term 2 - np.clip(creat/(0.9-0.2*female, a_min=1, a_max=None) is the same as taking min(1, creat/k)
    #Term 3 - np.clip(creat/(0.9-0.2*female, a_min=None, a_max=None) is the same as taking max(1, creat/k)
    #where k is the data-derived constant given in the paper: 0.7 for females and 0.9 for males
    
    return 141*(np.clip(creat/(0.9-0.2*female), a_min=1, a_max=None)**(-0.411+0.082*female))*(np.clip(creat/(0.9-0.2*female), a_min=None, a_max=1)**-1.209)*(0.993**age)*(1+female*0.018)*(1+black*0.159)

#Sample test data
creat = np.random.normal(loc=1, scale=0.2, size=10)
age = np.random.normal(loc=55, scale=10, size=10)
black = np.random.rand(10) > 0.5
female = np.random.rand(10) > 0.5

eGFR(creat, age, black, female) 
#values seem pretty reasonable (80-120)