In [114]:
import pandas as pd
import numpy as np

In [115]:
creatinine = pd.read_csv('tests/generate_test/data/creatinine.csv')
urineoutput = pd.read_csv('tests/generate_test/data/urineoutput.csv')
demographics = pd.read_csv('tests/generate_test/data/demographics.csv')
rrt = pd.read_csv('tests/generate_test/data/rrt.csv')
weight = pd.read_csv('tests/generate_test/data/weight.csv')


In [116]:
urineoutput

Unnamed: 0,stay_id,charttime,urineoutput
0,30057454,2171-11-14 11:00:00,200
1,30057454,2171-11-14 12:00:00,150
2,30057454,2171-11-14 13:00:00,125
3,30057454,2171-11-14 14:00:00,150
4,30057454,2171-11-14 15:00:00,150
...,...,...,...
7312,39880770,2148-01-08 12:00:00,100
7313,39880770,2148-01-08 13:00:00,270
7314,39880770,2148-01-08 14:00:00,120
7315,39880770,2148-01-08 15:00:00,125


In [117]:
stay_ids = set(demographics.stay_id.unique())

## Check Values

In [118]:
# check if each patient is in each dataset
patients_with_urine_output = urineoutput.groupby("stay_id")["stay_id"].agg(lambda x: set(x).issubset(stay_ids))

In [119]:
weight.stay_id.unique().shape

(140,)

In [120]:
patients_with_creatinine = creatinine.groupby("stay_id")["stay_id"].agg(lambda x: set(x).issubset(stay_ids))

In [121]:
patients_with_weight = weight.groupby("stay_id")["stay_id"].agg(lambda x: set(x).issubset(stay_ids))


In [122]:
patients_with_rrt = rrt.groupby("stay_id")["stay_id"].agg(lambda x: set(x).issubset(stay_ids))


In [123]:
intersection_ids: set = set(patients_with_creatinine.index).intersection(set(patients_with_urine_output.index)).intersection(set(patients_with_weight.index))
len(intersection_ids)

137

In [124]:
# subset dataframes
urineoutput = urineoutput[urineoutput["stay_id"].isin(intersection_ids)]
creatinine = creatinine[creatinine["stay_id"].isin(intersection_ids)]
weights = weight[weight["stay_id"].isin(intersection_ids)]

In [125]:
first_entry = urineoutput.groupby("stay_id").first().reset_index()
first_entry = first_entry[first_entry.stay_id.isin(intersection_ids)]
# exclude patients in rrt
first_entry = first_entry[~first_entry.stay_id.isin(rrt.stay_id)]
first_entry 

Unnamed: 0,stay_id,charttime,urineoutput
0,30057454,2171-11-14 11:00:00,200
1,30101877,2143-03-22 08:00:00,200
2,30425410,2178-07-22 08:52:00,185
3,30458995,2137-10-12 23:47:00,400
4,30585761,2125-06-17 06:23:00,150
...,...,...,...
132,39635619,2174-12-04 14:00:00,275
133,39711498,2131-05-22 22:38:00,100
134,39804682,2178-12-21 10:08:00,150
135,39864867,2148-08-16 13:32:00,350


In [126]:
first_entry = first_entry.rename(columns={'urineoutput': 'dialysis_present'})
first_entry["dialysis_present"] = 0
rrt = pd.concat([rrt, first_entry])
rrt

Unnamed: 0,stay_id,charttime,dialysis_present
0,30932571,2116-03-04 14:00:00,1
1,30932571,2116-03-04 14:42:00,1
2,30932571,2116-03-04 14:43:00,1
3,30932571,2116-03-04 14:45:00,1
4,30932571,2116-03-04 15:00:00,1
...,...,...,...
132,39635619,2174-12-04 14:00:00,0
133,39711498,2131-05-22 22:38:00,0
134,39804682,2178-12-21 10:08:00,0
135,39864867,2148-08-16 13:32:00,0


In [127]:
rrt.rename(columns={'dialysis_present':'crrt_status'}, inplace=True)

## pyAKI

In [128]:
import pyAKI.kdigo

ana = pyAKI.kdigo.Analyser(
    [
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.URINEOUTPUT, urineoutput),
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.CREATININE, creatinine),
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.DEMOGRAPHICS, weight),
        pyAKI.utils.Dataset(pyAKI.utils.DatasetType.CRRT, rrt),
    ]
)

In [129]:
ana.process_stays().to_csv("tests/generate_test/data/test_machine_aki.csv")
ana.process_stays().to_excel("tests/generate_test/data/test_machine_aki.xlsx")

In [130]:
aki = ana.process_stays()

In [131]:
aki.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,urineoutput,urineoutput_stage,stay_id_x,creat,abs_creatinine_stage,rel_creatinine_stage,weight,stay_id_y,crrt_status,crrt_stage,stage
stay_id,charttime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
30057454,2171-11-07 21:00:00,,,30057454.0,2.0,0.0,0.0,87.2,,,,0.0
30057454,2171-11-07 22:00:00,,,30057454.0,2.0,0.0,0.0,87.2,,,,0.0
30057454,2171-11-07 23:00:00,,,30057454.0,2.0,0.0,0.0,87.2,,,,0.0
30057454,2171-11-08 00:00:00,,,30057454.0,2.0,0.0,0.0,87.2,,,,0.0
30057454,2171-11-08 01:00:00,,,30057454.0,2.0,0.0,0.0,87.2,,,,0.0
