## Imports

In [1]:
import pandas as pd
import numpy as np
import datetime, random

from multiprocessing import Pool
import time

## Functions

In [2]:
def returnAKIpatients(df, 
                      aki_calc_type = 'both', keep_cols = True, 
                      cond1time = '48hours', cond2time = '168hours'):
    
    if aki_calc_type == 'both': 
        df = df.groupby('mrn', sort=False).apply(lambda d: addRollingWindowAKI(d,#enc vs mrn
                                                                              cond1time = cond1time,
                                                                              cond2time = cond2time))
        df = df.reset_index('mrn', drop=True).reset_index()
        
        df = df.groupby('mrn', sort=False).apply(lambda d: addBaselineCreat(d))
        df = df.groupby('enc', sort=False).apply(lambda d: addBackCalcAKI(d,
                                                                         cond2time = cond2time))
        
        df = df.reset_index('enc', drop=True).reset_index()
        
    elif aki_calc_type == 'rolling_window':
        df = df.groupby('mrn', sort=False).apply(lambda d: addRollingWindowAKI(d,
                                                                              cond1time = cond1time,
                                                                              cond2time = cond2time))
        
    elif aki_calc_type == 'back_calculate':
        df = df.groupby('mrn', sort=False).apply(lambda d: addBaselineCreat(d))
        df = df.groupby('enc', sort=False).apply(lambda d: addBackCalcAKI(d,
                                                                        cond2time = cond2time)) 
    
    return df

def addBaselineCreat(df):
    '''
    Adds the baseline creatinine to a dataframe. The baseline creatinine is defined as the median of the outpatient 
     creatinine values from 365 to 7 days prior to admission.
    
    Input: dataframe (typically of a single patient)
    Output: dataframe with baseline creatinine column added in
    '''
    split_dfs = list()
    for adm in df.admission.unique():
        adm_df = df.loc[df.admission == adm]
        adm_df['baseline_creat'] = adm_df[~adm_df.inpatient].set_index('time').loc[adm - pd.Timedelta(days=365):adm - pd.Timedelta(days=7)].creat.median()
        split_dfs.append(adm_df)
    
    return pd.concat(split_dfs)

def addBackCalcAKI(df, 
                   cond2time = '168hours'):
    '''
    Adds the back-calculated AKI conditions, the KDIGO standards on the outpatient values;
    i.e. a 50% increase from baseline creatinine in <7 days
    
    Input: dataframe (typically of a single encounter)
    Output: dataframe with back-calculated aki values added in
    '''
    #backcalc_aki = np.empty(df.shape[0])
    #backcalc_aki[:] = np.nan
    
    df = df.reset_index(drop=True).set_index('time')
    df = df[~df.index.duplicated()]
    df_lf = df.loc[(df.admission - datetime.timedelta(hours=6)).values[0]:(df.admission + pd.Timedelta(cond2time)).values[0]]
    df.loc[df_lf.index, 'backcalc_aki'] = df_lf.creat >= np.round(1.5*df_lf.baseline_creat, decimals=5)
    
    return df 

def addRollingWindowAKI(df,
                        cond1time = '48hours', cond2time = '168hours'):
    '''
    Adds the AKI conditions based on rolling window definition: 0.3 creat increase in < 48 hrs OR 50% increase in < 7 days
    
    Input: dataframe (typically of a single encounter)
    Output: dataframe with rolling-window aki values added in
    '''
    #df = df[~df.duplicated()]
    df = df.set_index('time').sort_index()
    #df = df[~df.duplicated()]
    df_rw = df.loc[df.admission[0] - pd.Timedelta(hours=172):]
    minc_48, minc_7d = np.empty(df.shape[0]), np.empty(df.shape[0])
    minc_48[:], minc_7d[:] = np.nan, np.nan
    minc_48[df.shape[0]-df_rw.shape[0]:] = df_rw.creat.rolling(pd.Timedelta(cond1time), min_periods=1).min().values
    minc_7d[df.shape[0]-df_rw.shape[0]:] = df_rw.creat.rolling(pd.Timedelta(cond2time), min_periods=1).min().values
    
    df['mincreat_48hr'] = minc_48
    df['mincreat_7day'] = minc_7d
    
    df['deltacreat_48hr'] = np.round(df.creat - df.mincreat_48hr, decimals = 5)
    df['deltacreat_7day'] = np.round(df.creat - df.mincreat_7day, decimals = 5)

    df['rollingwindow_aki'] = (df.deltacreat_48hr >= 0.3) | (df.deltacreat_7day >= 0.5*df.mincreat_7day)
    
    return df

In [20]:
%%time
df2 = df.groupby('mrn', sort=False).apply(lambda d: addRollingWindowAKI(d, cond1time='52hours', cond2time='172hours'))
df2 = df2.reset_index('mrn',drop=True).reset_index()

Wall time: 7min 19s


In [21]:
df2.to_csv(r'H:\Data\Standardized AKI definition\dataset\output2.csv')

## Reading in file; managing columns

In [3]:
covid_df = pd.read_csv(r'H:\Data\Standardized AKI definition\dataset\covid creatinines.csv')
covid_df['mrn'] = covid_df.pat_mrn_id.str.strip('MR').astype('int')
covid_df['enc'] = covid_df.enc_id
covid_df['time'] = pd.to_datetime(covid_df.time)
covid_df['sex'] = covid_df.sex.astype('bool')
covid_df['race'] = covid_df.race.astype('bool')
covid_df['inpatient'] = covid_df.inpatient.astype('bool')
covid_df['creat'] = covid_df['creatinine']
covid_df['admission'] = pd.to_datetime(covid_df.admission)
covid_df['discharge'] = pd.to_datetime(covid_df.discharge)
print('Shape:', covid_df.shape)
print(covid_df.dtypes)

Shape: (299660, 16)
pat_mrn_id                 object
pat_enc_csn_id              int64
time               datetime64[ns]
creatinine                float64
hosp_admsn_time            object
hosp_disch_time            object
age                       float64
sex                          bool
race                         bool
enc_id                      int64
admission          datetime64[ns]
discharge          datetime64[ns]
inpatient                    bool
mrn                         int32
enc                         int64
creat                     float64
dtype: object


In [4]:
%%time
df = covid_df[['mrn', 'enc', 'time', 'creat', 'age', 'sex', 'race', 'inpatient', 'admission', 'discharge']]
df = df[~df.duplicated()] #The rows where the pat_enc_csn_id was lumped into enc_id become duplicates
df = df.groupby('enc', sort=False).apply(lambda d: d.sort_values('time'))
df = df.reset_index(drop=True)

Wall time: 1min


## Adding Rolling-window & Back-calculated AKI values

In [6]:
%%time
out_rw = returnAKIpatients(df, aki_calc_type = 'rolling_window')

Wall time: 6min 1s


In [6]:
%%time
out_bc = returnAKIpatients(df, aki_calc_type = 'back_calculate')

Wall time: 3min 59s


In [5]:
%%time
out = returnAKIpatients(df, cond1time='52hours', cond2time='172hours')

Wall time: 18min 11s


In [6]:
out.to_csv(r'H:\Data\Standardized AKI definition\dataset\output.csv')

## Two criterion for rolling-window definition of AKI:

#### *$creat \uparrow$ of 0.3 in < 48 hrs* OR *$creat \uparrow$ of 50% in < 7 days*

In [9]:
def eGFR(creat, age, black, female):
    '''
    Calculates the estimated glomerular filtration rate based on the serum creatinine levels, age, sex, and race (black or not black);
    Based on the formula in the paper A New Equation to Estimate Glomerular Filtration Rate (Levey et. Al, 2009) linked below
    
    https://pubmed.ncbi.nlm.nih.gov/19414839/
    
    '''
    #Term 2 - np.clip(creat/(0.9-0.2*female, a_min=1, a_max=None) is the same as taking min(1, creat/k)
    #Term 3 - np.clip(creat/(0.9-0.2*female, a_min=None, a_max=None) is the same as taking max(1, creat/k)
    #where k is the data-derived constant given in the paper: 0.7 for females and 0.9 for males
    
    return 141*(np.clip(creat/(0.9-0.2*female), a_min=1, a_max=None)**(-0.411+0.082*female))*(np.clip(creat/(0.9-0.2*female), a_min=None, a_max=1)**-1.209)*(0.993**age)*(1+female*0.018)*(1+black*0.159)

#Sample test data
creat = np.random.normal(loc=1, scale=0.2, size=10)
age = np.random.normal(loc=55, scale=10, size=10)
black = np.random.rand(10) > 0.5
female = np.random.rand(10) > 0.5

eGFR(creat, age, black, female) 
#values seem pretty reasonable (80-120)

array([ 80.41598874,  73.62356567,  75.77712721,  88.21709052,
       110.22300329,  85.98249728,  89.78296331, 100.3245742 ,
       109.57435179, 110.65195848])

## Mismatch cases

In [10]:
tmp = out.loc[out.enc == 205472336]
backcalc_aki = np.empty(tmp.shape[0])
backcalc_aki[:] = np.nan
tmp = tmp.sort_values('time')
tmp2 = tmp.set_index('time').sort_index().loc[tmp.admission.values[0]:(tmp.admission + datetime.timedelta(days=7)).values[0]]
backcalc_aki[:tmp2.shape[0]] = tmp2.creat > 1.5*tmp2.baseline_creat
tmp['backcalc_aki'] = backcalc_aki

In [90]:
tmp = df[df.mrn == 2307280]
tmp = tmp.groupby('enc', sort=False).apply(lambda d: addRollingWindowAKI(d, cond1time='52hours', cond2time='172hours'))
tmp = tmp.reset_index('enc', drop=True).reset_index()

tmp = tmp.groupby('mrn', sort=False).apply(lambda d: addBaselineCreat(d))
tmp = tmp.groupby('enc', sort=False).apply(lambda d: addBackCalcAKI(d))
tmp        
tmp = tmp.reset_index(drop=True)

In [12]:
tmp = df[375:475]
tmp = tmp.groupby('enc', sort=False).apply(lambda d: addRollingWindowAKI(d,
                                                                         cond1time = '52hours',
                                                                         cond2time = '172hours'))
tmp = tmp.reset_index('enc', drop=True).reset_index()

tmp = tmp.groupby('mrn', sort=False).apply(lambda d: addBaselineCreat(d))
tmp = tmp.groupby('enc', sort=False).apply(lambda d: addBackCalcAKI(d))

In [92]:
t = df.loc[df.mrn == 3660621]
t = t.set_index('time')
t2 = t.loc[t.admission[0] - pd.Timedelta(hours=172):]
#t2.creat.rolling(pd.Timedelta('52hours'), min_periods=1).min().values.shape
minc_48 = np.empty(t.shape[0])
minc_48[:] = np.nan
minc_48[t.shape[0]-t2.shape[0]:] = t2.creat.rolling(pd.Timedelta('52hours'), min_periods=1).min().values
t['st'] = minc_48
t = addRollingWindowAKI(t)
#t['2020-01-29 08:14:00':]