# AKI - early/delayed renal treatment

*import libraries*

In [63]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import statsmodels.api as sm
from statsmodels.formula.api import ols

from scipy import stats
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn import metrics

In [86]:
aki_data = pd.read_csv('aki_icu.csv')

In [87]:
aki_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2110 entries, 0 to 2109
Columns: 198 entries, subject_id to gcs_unable
dtypes: float64(170), int64(16), object(12)
memory usage: 3.2+ MB


In [88]:
# changing all column names to lower case, and remove special characters and spacing.
aki_data.columns = aki_data.columns.str.lower()
aki_data.columns = aki_data.columns.str.replace(' |/','_')
aki_data.columns

  aki_data.columns = aki_data.columns.str.replace(' |/','_')


Index(['subject_id', 'hadm_id', 'stay_id', 'weight', 'weight_min',
       'weight_max', 'charttime_aki', 'creat_low_past_7day',
       'creat_low_past_48hr', 'creat',
       ...
       'spo2_mean', 'glucose_min.2', 'glucose_max.2', 'glucose_mean',
       'urineoutput', 'gcs_min', 'gcs_motor', 'gcs_verbal', 'gcs_eyes',
       'gcs_unable'],
      dtype='object', length=198)

In [89]:
pd.set_option('display.max_columns', 100)
aki_data.head()

Unnamed: 0,subject_id,hadm_id,stay_id,weight,weight_min,weight_max,charttime_aki,creat_low_past_7day,creat_low_past_48hr,creat,aki_stage_creat,uo_rt_6hr,uo_rt_12hr,uo_rt_24hr,aki_stage_uo,aki_stage,kigoord,new_event_flag,kigoord_flag,charttime_rrt,dialysis_present,dialysis_active,dialysis_type,rrtorder,delay_rrt,gender,dod,admittime,dischtime,los_hospital,admission_age,ethnicity,hospital_expire_flag,hospstay_seq,first_hosp_stay,icu_intime,icu_outtime,los_icu,icustay_seq,first_icu_stay,lactate_min,lactate_max,ph_min,ph_max,so2_min,so2_max,po2_min,po2_max,pco2_min,pco2_max,...,alp_min,alp_max,ast_min,ast_max,amylase_min,amylase_max,bilirubin_total_min,bilirubin_total_max,bilirubin_direct_min,bilirubin_direct_max,bilirubin_indirect_min,bilirubin_indirect_max,ck_cpk_min,ck_cpk_max,ck_mb_min,ck_mb_max,ggt_min,ggt_max,ld_ldh_min,ld_ldh_max,heart_rate_min,heart_rate_max,heart_rate_mean,sbp_min,sbp_max,sbp_mean,dbp_min,dbp_max,dbp_mean,mbp_min,mbp_max,mbp_mean,resp_rate_min,resp_rate_max,resp_rate_mean,temperature_min.1,temperature_max.1,temperature_mean,spo2_min,spo2_max,spo2_mean,glucose_min.2,glucose_max.2,glucose_mean,urineoutput,gcs_min,gcs_motor,gcs_verbal,gcs_eyes,gcs_unable
0,19518348,24328934,30041848,104.5,102.0,107.0,07-04-26 4:00,,,,,0.4657,0.6891,0.6891,1.0,1,8,1,1,16-04-26 14:25,1,1,IHD,1,1,M,24-04-26,06-04-26 17:21,24-04-26 9:00,17.652083,66.262081,UNKNOWN,1,1,t,06-04-26 17:22,24-04-26 13:11,17.83,1,t,1.3,1.3,7.41,7.42,98.0,98.0,90.0,147.0,39.0,40.0,...,61.0,61.0,44.0,44.0,,,0.3,0.3,,,,,484.0,644.0,15.0,23.0,,,485.0,485.0,66.0,138.0,89.625,95.0,152.0,132.5,48.0,91.0,68.5,59.0,98.0,83.090909,18.0,27.0,21.673077,36.83,38.11,37.405,93.0,99.0,96.909091,172.0,321.0,257.5,2895.0,10,5.0,1.0,4.0,0
1,18070922,27400972,30078723,100.0,100.0,100.0,30-06-47 8:00,,,,,0.4334,0.6007,0.9219,1.0,1,32,1,1,05-07-47 19:15,1,1,CRRT,1,1,M,,28-06-47 21:56,30-07-47 14:40,31.697222,70.489938,BLACK/AFRICAN AMERICAN,0,1,t,28-06-47 21:59,30-07-47 15:03,31.71,1,t,1.2,1.2,7.35,7.37,,,99.0,99.0,38.0,40.0,...,36.0,37.0,12.0,13.0,,,0.6,0.6,,,,,,,,,,,,,77.0,96.0,84.208333,95.0,116.0,107.5,49.0,83.5,56.46,62.0,157.0,74.019231,12.0,23.0,17.134615,36.72,37.56,36.978,92.0,99.0,96.68,106.0,140.0,123.666667,2400.0,3,1.0,1.0,1.0,0
2,17414351,23018977,30254621,98.95,85.0,103.6,14-09-79 12:12,,,,,0.0,0.0,0.0,3.0,3,4,1,1,14-09-79 12:23,1,1,CRRT,1,0,M,,14-09-79 0:06,03-01-80 18:15,111.75625,37.700917,UNKNOWN,0,1,t,14-09-79 0:07,17-10-79 19:59,33.83,1,t,1.2,2.5,7.34,7.42,93.0,93.0,36.0,85.0,29.0,36.0,...,351.0,351.0,104.0,104.0,,,36.3,36.3,26.7,26.7,,,36.0,36.0,4.0,4.0,,,331.0,331.0,71.0,93.0,80.956522,82.0,118.0,101.958333,36.0,63.0,52.4375,51.0,78.0,67.541667,12.0,36.0,20.0,35.5,36.7,36.132143,92.0,100.0,95.25,114.0,228.0,152.625,10.0,6,4.0,1.0,1.0,0
3,17555267,21215796,30326472,105.7,104.0,107.4,20-12-28 5:38,,,,,2.7933,2.7933,0.2665,3.0,3,6,1,1,24-12-28 18:20,1,1,CRRT,1,1,F,26-12-28,19-12-28 5:54,26-12-28 0:00,6.754167,59.967156,WHITE,1,1,t,19-12-28 5:58,27-12-28 0:08,7.76,1,t,,,,,,,,,,,...,133.0,164.0,162.0,189.0,,,3.0,3.3,,,,,1011.0,1131.0,14.0,16.0,,,374.0,378.0,78.0,97.0,87.521739,75.0,161.0,108.0,34.0,131.0,56.4,54.0,139.0,70.7,17.0,32.0,24.086957,36.39,37.0,36.776667,88.0,100.0,96.478261,47.0,134.0,102.714286,425.0,13,6.0,5.0,2.0,0
4,18571406,23198317,30341422,69.0,66.7,71.3,23-01-80 9:00,,,,,0.4909,0.6812,0.6812,1.0,1,7,1,1,23-01-80 14:00,1,1,IHD,1,0,M,,23-01-80 2:04,28-01-80 15:00,5.538889,79.06047,WHITE,0,1,t,23-01-80 2:05,24-01-80 0:04,0.92,1,t,,,,,,,,,,,...,,,,,,,,,,,,,277.0,277.0,6.0,6.0,,,,,74.0,100.0,90.318182,113.0,161.0,140.8,26.0,134.0,71.85,60.0,141.0,88.285714,17.0,25.0,21.0,36.33,37.33,36.62,93.0,98.0,95.782609,121.0,126.0,124.0,660.0,15,6.0,5.0,4.0,0


In [101]:
pd.set_option('display.max_rows', 200)
pd.isna(aki_data).sum()

subject_id                      0
hadm_id                         0
stay_id                         0
weight                          9
weight_min                      9
weight_max                      9
charttime_aki                   0
creat_low_past_7day           752
creat_low_past_48hr           778
creat                         752
aki_stage_creat               752
uo_rt_6hr                    1344
uo_rt_12hr                   1344
uo_rt_24hr                   1344
aki_stage_uo                 1344
aki_stage                       0
kigoord                         0
new_event_flag                  0
kigoord_flag                    0
charttime_rrt                   0
dialysis_present                0
dialysis_active                 0
dialysis_type                  90
rrtorder                        0
delay_rrt                       0
gender                          0
dod                          1225
admittime                       0
dischtime                       0
los_hospital  

In [98]:
# uneven sample
aki_data['delay_rrt'].value_counts()

1    1720
0     390
Name: delay_rrt, dtype: int64

**drop columns**

In [83]:
aki_data.drop(columns=['weight_min', 'weight_max',
                       'charttime_aki',
                       'creat_low_past_7day','creat_low_past_48hr','creat','aki_stage_creat',
                       'uo_rt_6hr','uo_rt_12hr','uo_rt_24hr','aki_stage_uo',
                       'kigoord','new_event_flag','kigoord_flag',
                       'charttime_rrt',
                       'dialysis_present','dialysis_active','dialysis_type',
                       'rrtorder',
                       'dod',
                       'admittime','dischtime','los_hospital',
                       'hospital_expire_flag','hospstay_seq','first_hosp_stay',
                       'icu_intime','icu_outtime','los_icu',
                       'icustay_seq', 'first_icu_stay',
                       'lactate_min',
                       'so2_max',
                       'po2_max',
                       'aado2_min','aado2_max',
                       'aado2_calc_min',
                       'pao2fio2ratio_min', 'pao2fio2ratio_max',
                       'baseexcess_max',
                       'bicarbonate_min','bicarbonate_max',
                       'totalco2_min', 'totalco2_max',
                       'hematocrit_min','hematocrit_max',
                       'hemoglobin_min','hemoglobin_max',
                       'carboxyhemoglobin_min','carboxyhemoglobin_max',
                       'methemoglobin_min','methemoglobin_max',
                       'temperature_min','temperature_max',
                       'chloride_min','chloride_max',
                       'calcium_min','calcium_max',
                       'glucose_min','glucose_max',
                       'potassium_min','potassium_max',
                       'sodium_min','sodium_max',
                       'globulin_min','globulin_max',
                       'total_protein_min','total_protein_max',
                       'atypical_lymphocytes_min','atypical_lymphocytes_max',
                       'bands_min','bands_max',
                       'immature_granulocytes_min','immature_granulocytes_max',
                       'metamyelocytes_min','metamyelocytes_max',
                       'nrbc_min','nrbc_max',
                       'd_dimer_min','d_dimer_max',
                       'thrombin_min','thrombin_max',
                       'inr_min',
                       'pt_min',
                       'ptt_min',
                       'alt_min',
                       'alp_min',
                       'ast_min',
                       'amylase_min','amylase_max',
                       'bilirubin_total_min',
                       'bilirubin_direct_min','bilirubin_direct_max',
                       'bilirubin_indirect_min','bilirubin_indirect_max',
                       'ck_cpk_min',
                       'ck_mb_min',
                       'ggt_min','ggt_max',
                       'ld_ldh_min',
                       'heart_rate_min','heart_rate_max',
                       'sbp_min','sbp_max',
                       'dbp_min','dbp_max',
                       'mbp_min','mbp_max',
                       'resp_rate_min','resp_rate_max',
                       'temperature_min.1','temperature_max.1',
                       'spo2_min','spo2_max',
                       'glucose_min.2','glucose_max.2',
                       'gcs_motor','gcs_verbal','gcs_eyes','gcs_unable'],inplace=True)

In [84]:
pd.set_option('display.max_rows', 85)
aki_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2110 entries, 0 to 2109
Data columns (total 78 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   subject_id           2110 non-null   int64  
 1   hadm_id              2110 non-null   int64  
 2   stay_id              2110 non-null   int64  
 3   weight               2101 non-null   float64
 4   aki_stage            2110 non-null   int64  
 5   delay_rrt            2110 non-null   int64  
 6   gender               2110 non-null   object 
 7   admission_age        2110 non-null   float64
 8   ethnicity            2110 non-null   object 
 9   lactate_max          1639 non-null   float64
 10  ph_min               1762 non-null   float64
 11  ph_max               1762 non-null   float64
 12  so2_min              1029 non-null   float64
 13  po2_min              1762 non-null   float64
 14  pco2_min             1762 non-null   float64
 15  pco2_max             1762 non-null   f

**missing data**

In [85]:
pd.isna(aki_data).sum()

subject_id                0
hadm_id                   0
stay_id                   0
weight                    9
aki_stage                 0
delay_rrt                 0
gender                    0
admission_age             0
ethnicity                 0
lactate_max             471
ph_min                  348
ph_max                  348
so2_min                1081
po2_min                 348
pco2_min                348
pco2_max                348
aado2_calc_max          774
baseexcess_min          348
hematocrit_min.1          4
hematocrit_max.1          4
hemoglobin_min.1          6
hemoglobin_max.1          6
platelets_min             4
platelets_max             4
wbc_min                   5
wbc_max                   5
albumin_min             863
albumin_max             863
aniongap_min              3
aniongap_max              3
bicarbonate_min.1         3
bicarbonate_max.1         3
bun_min                   3
bun_max                   3
calcium_min.1            73
calcium_max.1       

In [24]:
aki_data['aki_stage'].unique()

array([1, 0], dtype=int64)

In [25]:
aki_data['age'].unique()

array([49, 76, 61, 64, 69, 53, 82, 74, 44, 55, 66, 85, 72, 77, 79, 59, 81,
       54, 52, 36, 67, 62, 46, 47, 71, 26, 56, 50, 41, 73, 65, 57, 68, 58,
       83, 78, 29, 40, 63, 60, 48, 45, 70, 80, 51, 31, 89, 43, 75, 42, 86,
       30, 21, 87, 38, 84, 34, 24, 88, 33, 27, 37, 39, 22, 35, 32, 25, 23,
       19, 20, 28], dtype=int64)

In [31]:
aki_data['mean_weight'].describe()

count    25089.000000
mean        85.979087
std         21.881759
min          0.500000
25%         71.110000
50%         84.200000
75%         97.900000
max        219.000000
Name: mean_weight, dtype: float64

In [30]:
aki_data['height'].describe()

count    25089.000000
mean       170.525750
std         11.142082
min         33.020000
25%        162.560000
50%        170.180000
75%        178.000000
max        292.100000
Name: height, dtype: float64

In [32]:
aki_data['bmi_group'].unique()

array([4, 3, 1, 2], dtype=int64)

In [26]:
aki_data['valuenum'].unique()

array([2.40e+01, 1.01e+02, 8.00e-01, 1.36e+02, 4.30e+00, 1.33e+02,
       2.00e+01, 1.23e+01, 2.17e+02, 6.40e+00, 1.05e+01, 1.62e+02,
       1.15e+01, 8.00e+01, 2.90e+01, 1.00e+02, 1.70e+01, 1.11e+01,
       2.19e+02, 5.80e+00, 1.35e+01, 2.50e+01, 1.02e+02, 1.20e+00,
       8.80e+01, 4.20e+00, 1.20e+01, 1.41e+01, 4.06e+02, 8.60e+00,
       2.80e+01, 9.80e+01, 1.10e+00, 1.72e+02, 1.38e+02, 1.00e+01,
       1.27e+01, 2.94e+02, 1.19e+01, 1.37e+01, 2.20e+01, 6.00e-01,
       1.24e+02, 3.10e+00, 1.37e+02, 1.28e+01, 2.47e+02, 3.10e+01,
       1.03e+02, 1.00e+00, 7.20e+01, 4.60e+00, 1.41e+02, 1.40e+01,
       1.24e+01, 2.81e+02, 5.70e+00, 1.04e+02, 9.00e-01, 1.64e+02,
       3.50e+00, 1.35e+02, 1.29e+01, 1.81e+02, 5.10e+00, 9.60e+01,
       7.90e+00, 3.48e+02, 1.90e+00, 4.50e+00, 6.60e+01, 8.20e+00,
       1.60e+00, 3.00e+01, 9.10e+01, 1.05e+02, 1.28e+02, 1.90e+01,
       1.17e+01, 2.14e+02, 4.90e+00, 2.70e+01, 1.06e+02, 9.20e+01,
       3.80e+00, 1.40e+02, 2.10e+01, 1.38e+01, 2.09e+02, 9.30e

In [33]:
aki_data['aki_stage'].unique()

array([1, 0], dtype=int64)

In [None]:
# aki_data.isnull()

**creat_low_past_7day**

In [9]:
pd.isna(aki_data['creat_low_past_7day']).sum()

70

70 missing data

In [10]:
aki_data['creat_low_past_7day'].describe()

count    31.000000
mean      2.109677
std       1.421350
min       0.300000
25%       1.150000
50%       1.800000
75%       2.600000
max       5.600000
Name: creat_low_past_7day, dtype: float64

In [17]:
aki_data[aki_data['creat_low_past_7day'].isnull()].head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,creat_low_past_7day,creat_low_past_48hr,creat,aki_stage_creat,uo_rt_6hr,uo_rt_12hr,uo_rt_24hr,aki_stage_uo,kigoord,new_event_flag,kigoord2,timediff
0,17057610,26086792,30045625,2200-06-16 12:00:00,,,,,0.5418,0.4374,0.4417,2.0,1,1,1,17
2,13098601,23996524,30230188,2164-10-18 16:00:00,,,,,0.2062,0.1914,0.1694,3.0,1,1,1,14
3,19017919,28481207,30271658,2130-12-01 19:00:00,,,,,0.4716,0.4986,0.5949,2.0,10,1,1,17
4,14706623,23172713,30387219,2140-08-10 15:12:00,,,,,2.1645,0.3935,0.2973,3.0,1,1,1,32
6,15443439,22088878,30756275,2156-08-26 08:00:00,,,,,0.4976,0.564,0.6087,1.0,9,1,1,36


In [None]:
# drop if 70/101 rows are missing? 

**creat_low_past_48hr**

In [11]:
aki_data['creat_low_past_48hr'].describe()

count    31.000000
mean      2.632258
std       1.766614
min       0.400000
25%       1.350000
50%       2.200000
75%       3.250000
max       7.100000
Name: creat_low_past_48hr, dtype: float64

**uo_rt_6hr**

In [14]:
pd.isna(aki_data['uo_rt_6hr']).sum()

29

In [16]:
aki_data[aki_data['uo_rt_6hr'].isnull()].head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,creat_low_past_7day,creat_low_past_48hr,creat,aki_stage_creat,uo_rt_6hr,uo_rt_12hr,uo_rt_24hr,aki_stage_uo,kigoord,new_event_flag,kigoord2,timediff
1,12149070,26440183,30129989,2160-07-08 16:23:00,1.3,2.1,2.0,1.0,,,,,21,1,1,9
5,13721087,24896771,30555092,2170-08-09 04:15:00,4.3,6.5,6.5,3.0,,,,,5,1,1,9
11,15718873,23096615,31516673,2130-11-16 07:47:00,4.4,4.6,6.2,1.0,,,,,2,1,1,9
12,16501746,22797806,31760184,2132-02-20 02:36:00,1.5,1.5,2.4,1.0,,,,,3,1,1,12
14,16624205,23962991,31832021,2126-02-25 03:25:00,0.9,0.9,1.3,1.0,,,,,26,1,1,15
