# Setup

In [1]:
import pandas as pd

# Data Import

In [2]:
mimic_urineoutput = pd.read_csv('tests/experiment/data/mimic_urineoutput.csv')
mimic_creatinine = pd.read_csv('tests/experiment/data/mimic_creatinine.csv')
mimic_rrt = pd.read_csv('tests/experiment/data/mimic_rrt.csv')
mimic_kdigo = pd.read_csv('tests/experiment/data/mimic_kdigo.csv')
mimic_weights = pd.read_csv('tests/experiment/data/mimic_weights.csv')

In [3]:
# coerce charttime to datetime
mimic_urineoutput['charttime'] = pd.to_datetime(mimic_urineoutput['charttime'])
mimic_creatinine['charttime'] = pd.to_datetime(mimic_creatinine['charttime'])
mimic_rrt['charttime'] = pd.to_datetime(mimic_rrt['charttime'])
mimic_kdigo['charttime'] = pd.to_datetime(mimic_kdigo['charttime'])

In [4]:
# sort values by stay_id and then by charttime
mimic_urineoutput = mimic_urineoutput.sort_values(['stay_id', 'charttime']).reset_index(drop=True)
mimic_creatinine = mimic_creatinine.sort_values(['stay_id', 'charttime']).reset_index(drop=True)
mimic_rrt = mimic_rrt.sort_values(['stay_id', 'charttime']).reset_index(drop=True)
mimic_kdigo = mimic_kdigo.sort_values(['stay_id', 'charttime']).reset_index(drop=True)

In [5]:
mimic_urineoutput.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3490516 entries, 0 to 3490515
Data columns (total 3 columns):
 #   Column       Dtype         
---  ------       -----         
 0   stay_id      int64         
 1   charttime    datetime64[ns]
 2   urineoutput  float64       
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 79.9 MB


## Data Cleaning

### Urine Output

In [6]:
mimic_urineoutput.head()

Unnamed: 0,stay_id,charttime,urineoutput
0,30000153,2174-09-29 12:12:00,280.0
1,30000153,2174-09-29 14:00:00,45.0
2,30000153,2174-09-29 15:00:00,50.0
3,30000153,2174-09-29 16:00:00,50.0
4,30000153,2174-09-29 17:00:00,45.0


In [7]:
mimic_urineoutput.dropna(inplace=True)

### Creatinine

In [8]:
mimic_creatinine.head()

Unnamed: 0,hadm_id,stay_id,charttime,creat,creat_low_past_48hr,creat_low_past_7day
0,23998182,30000153,2174-09-29 10:16:00,1.2,,
1,23998182,30000153,2174-09-29 15:37:00,0.9,1.2,1.2
2,23998182,30000153,2174-09-30 03:34:00,1.1,0.9,0.9
3,27543152,30000213,2162-06-17 09:01:00,2.6,,
4,27543152,30000213,2162-06-18 05:53:00,2.8,2.6,2.6


In [9]:
mimic_creatinine.drop(columns=["hadm_id", "creat_low_past_48hr", "creat_low_past_7day"], inplace=True)

In [10]:
mimic_creatinine.dropna(inplace=True)
mimic_creatinine.head()

Unnamed: 0,stay_id,charttime,creat
0,30000153,2174-09-29 10:16:00,1.2
1,30000153,2174-09-29 15:37:00,0.9
2,30000153,2174-09-30 03:34:00,1.1
3,30000213,2162-06-17 09:01:00,2.6
4,30000213,2162-06-18 05:53:00,2.8


### RRT

In [11]:
mimic_rrt.dropna(inplace=True)

### Weights

In [12]:
mimic_weights.head()

Unnamed: 0,subject_id,stay_id,weight_admit,weight,weight_min,weight_max
0,16454297,32340171,66.0,66.45,66.0,66.9
1,19647914,30573846,83.3,83.3,83.3,83.3
2,14111969,32688871,82.1,82.1,82.1,82.1
3,17117948,32440061,53.7,53.7,53.7,53.7
4,19366075,33269218,62.0,62.0,62.0,62.0


In [13]:
mimic_weights.drop(["subject_id", "weight_admit", "weight_min", "weight_max"], axis=1, inplace=True)

### Reduce Dataframes to Common Stay Ids

In [14]:
mimic_weights.dropna(inplace=True)

In [15]:
mimic_weights.stay_id.unique().shape

(74311,)

In [16]:
# find common stay_ids in weights, creatinine and urineoutput
stay_ids = set(mimic_weights['stay_id']).intersection(set(mimic_creatinine['stay_id'])).intersection(set(mimic_urineoutput['stay_id']))
len(stay_ids)

71607

In [17]:
# subset dataframes
mimic_creatinine = mimic_creatinine[mimic_creatinine['stay_id'].isin(stay_ids)]
mimic_urineoutput = mimic_urineoutput[mimic_urineoutput['stay_id'].isin(stay_ids)]
mimic_weights = mimic_weights[mimic_weights['stay_id'].isin(stay_ids)]

In [18]:
### expand rrt dataframe to include all patients
# get first entry for each patient
mimic_first = mimic_urineoutput.groupby("stay_id").first()["charttime"]
# drop patients already in rrt
mimic_first = mimic_first[~mimic_first.index.isin(mimic_rrt["stay_id"])]
# expand mimic_rrt to include all patients
# convert into dataframe
mimic_first = pd.DataFrame(mimic_first)
# add rrt_status column
mimic_first["crrt_status"] = 0
mimic_first.reset_index(inplace=True)
mimic_rrt.rename(columns={"dialysis_present": "rrt_status"}, inplace=True)
mimic_rrt = pd.concat([mimic_rrt, mimic_first])
mimic_rrt

Unnamed: 0,stay_id,charttime,rrt_status,crrt_status
0,30003226,2123-02-26 16:00:00,1.0,
1,30003226,2123-02-26 16:00:00,1.0,
2,30003226,2123-02-26 16:30:00,1.0,
3,30003226,2123-02-26 16:45:00,1.0,
4,30003226,2123-02-26 18:00:00,1.0,
...,...,...,...,...
71517,39999301,2111-08-18 18:45:00,,0.0
71518,39999384,2158-05-24 22:00:00,,0.0
71519,39999552,2186-07-17 18:00:00,,0.0
71520,39999562,2129-01-25 16:37:00,,0.0


# pyAKI

In [19]:
import pyAKI
import pyAKI.kdigo as kdigo

In [None]:
# ana = kdigo.Analyser([
#             pyAKI.utils.Dataset(pyAKI.utils.DatasetType.URINEOUTPUT, mimic_urineoutput),
#             pyAKI.utils.Dataset(pyAKI.utils.DatasetType.CREATININE, mimic_creatinine),
#             pyAKI.utils.Dataset(pyAKI.utils.DatasetType.DEMOGRAPHICS, mimic_weights),
#             pyAKI.utils.Dataset(pyAKI.utils.DatasetType.CRRT, mimic_rrt),
#         ])

# pyaki_kdigo = ana.process_stays()
# pyaki_kdigo.to_csv("tests/experiment/data/pyaki_kdigo.csv")

In [21]:
sub_stay_ids = list(stay_ids)[0:1000]
sub_mimic_urineoutput = mimic_urineoutput[mimic_urineoutput["stay_id"].isin(sub_stay_ids)]
sub_mimic_creatinine = mimic_creatinine[mimic_creatinine["stay_id"].isin(sub_stay_ids)]
sub_mimic_rrt = mimic_rrt[mimic_rrt["stay_id"].isin(sub_stay_ids)]
sub_mimic_weights = mimic_weights[mimic_weights["stay_id"].isin(sub_stay_ids)]


In [22]:
sub_ana = pyAKI.kdigo.Analyser(
    [
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.URINEOUTPUT, sub_mimic_urineoutput),
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.CREATININE, sub_mimic_creatinine),
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.DEMOGRAPHICS, sub_mimic_weights),
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.CRRT, sub_mimic_rrt),
    ]
)
sub_pyaki_kdigo = sub_ana.process_stays()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[self._time_identifier] = pd.to_datetime(df[self._time_identifier])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[self._time_identifier] = pd.to_datetime(df[self._time_identifier])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[self._time_identifier] = pd.to_datetime(df[self._time_identifi

In [23]:
sub_mimic_kdigo = mimic_kdigo[mimic_kdigo.stay_id.isin(sub_stay_ids)].copy()

In [24]:
# merge two series
comparison = pd.merge(sub_pyaki_kdigo.groupby("stay_id").stage.max(), sub_mimic_kdigo.groupby("stay_id").aki_stage.max(), left_index=True, right_index=True)

In [25]:
comparison[comparison.stage != comparison.aki_stage]

Unnamed: 0_level_0,stage,aki_stage
stay_id,Unnamed: 1_level_1,Unnamed: 2_level_1
30016383,1.0,0
30147534,1.0,0
30147954,1.0,0
30277837,1.0,2
30278219,1.0,0
...,...,...
39846554,1.0,2
39977263,3.0,2
39977636,2.0,1
39977638,2.0,1


In [26]:
sub_pyaki_kdigo.drop(columns=["stay_id_x", "stay_id_y"], inplace=True)


In [27]:
sub_pyaki_kdigo.reset_index(drop=False, inplace=True)

In [34]:
sub_pyaki_kdigo[(sub_pyaki_kdigo.stay_id == 30147534)].tail(50)

Unnamed: 0,stay_id,charttime,urineoutput,urineoutput_stage,creat,abs_creatinine_stage,rel_creatinine_stage,weight,rrt_status,crrt_status,crrt_stage,stage
4052,30147534,2176-04-10 08:00:00,,0.0,0.5,0.0,0.0,66.2,,,,0.0
4053,30147534,2176-04-10 09:00:00,,0.0,0.5,0.0,0.0,66.2,,,,0.0
4054,30147534,2176-04-10 10:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4055,30147534,2176-04-10 11:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4056,30147534,2176-04-10 12:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4057,30147534,2176-04-10 13:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4058,30147534,2176-04-10 14:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4059,30147534,2176-04-10 15:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4060,30147534,2176-04-10 16:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0
4061,30147534,2176-04-10 17:00:00,55.555556,0.0,0.5,0.0,0.0,66.2,,,,0.0


In [29]:
sub_mimic_kdigo[sub_mimic_kdigo["stay_id"] == 30147534]

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,creat_low_past_7day,creat_low_past_48hr,creat,aki_stage_creat,uo_rt_6hr,uo_rt_12hr,uo_rt_24hr,aki_stage_uo,aki_stage_crrt,aki_stage
67055,19897276,28994803,30147534,2176-04-07 19:10:00,,,0.5,0.0,,,,,,0
67056,19897276,28994803,30147534,2176-04-07 21:27:00,0.5,0.5,0.4,0.0,,,,,,0
67057,19897276,28994803,30147534,2176-04-07 23:00:00,,,,,4.5317,4.5317,4.5317,0.0,,0
67058,19897276,28994803,30147534,2176-04-08 05:40:00,0.4,0.4,0.5,0.0,,,,,,0
67059,19897276,28994803,30147534,2176-04-08 06:00:00,,,,,5.287,1.2273,1.2273,0.0,,0
67060,19897276,28994803,30147534,2176-04-08 09:00:00,,,,,2.077,1.1673,1.1673,0.0,,0
67061,19897276,28994803,30147534,2176-04-08 12:00:00,,,,,1.6994,1.7264,1.1869,0.0,,0
67062,19897276,28994803,30147534,2176-04-08 16:00:00,,,,,2.2659,1.7852,1.3427,0.0,,0
67063,19897276,28994803,30147534,2176-04-09 05:45:00,0.4,0.4,0.5,0.0,,,,,,0
67064,19897276,28994803,30147534,2176-04-10 18:39:00,,,,,7.5529,7.5529,7.5529,0.0,,0


In [30]:
sub_mimic_urineoutput[sub_mimic_urineoutput["stay_id"] == 30147534]

Unnamed: 0,stay_id,charttime,urineoutput
53487,30147534,2176-04-07 23:00:00,300.0
53488,30147534,2176-04-08 06:00:00,350.0
53489,30147534,2176-04-08 09:00:00,200.0
53490,30147534,2176-04-08 12:00:00,250.0
53491,30147534,2176-04-08 16:00:00,500.0
53492,30147534,2176-04-10 18:39:00,500.0
53493,30147534,2176-04-11 04:15:00,500.0
53494,30147534,2176-04-11 15:06:00,450.0
53495,30147534,2176-04-11 17:00:00,200.0
53496,30147534,2176-04-11 21:00:00,250.0
