In [3]:
%matplotlib inline
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

### CHLA's Demographics

In [2]:
fileDir = os.path.dirname('__file__')
fname1 = os.path.join(fileDir, 'io_ism5', 
                      'ism_onset_io_tlag006_twin006_aki.pkl')
fname2 = os.path.join(fileDir, 'io_ism5', 
                      'ism_onset_io_tlag006_twin006_con.pkl')

In [3]:
io_aki = pd.read_pickle(fname1)
io_con = pd.read_pickle(fname2)

In [5]:
nb_aki = len(io_aki.patient_id.unique())
nb_aki01 = np.sum(io_aki.AKI_stage==1)
nb_aki23 = np.sum(io_aki.AKI_stage>1)
nb_con = len(io_con.patient_id.unique())
nb_tot = nb_aki + nb_con
nb_male_aki = np.sum(io_aki.sex=='M')
nb_female_aki = np.sum(io_aki.sex=='F')
nb_male_con = np.sum(io_con.sex=='M')
nb_female_con = np.sum(io_con.sex=='F')
nb_male = nb_male_aki + nb_male_con
nb_female = nb_female_aki + nb_female_con
print('AKI patients: {}'.format(nb_aki))
print('AKI stage 1 patients: {}'.format(nb_aki01))
print('AKI stage 2,3 patients: {}'.format(nb_aki23))
print('Stable patients: {}'.format(nb_con))
print('Total patients: {}'.format(nb_tot))
print('AKI prevalence: {}'.format(nb_aki/float(nb_tot)*100))
print('Male: {}({:}%)'.format(nb_male, nb_male/float(nb_tot)*100))
print('Female: {}({}%)'.format(nb_female, nb_female/float(nb_tot)*100))
print('Male in AKI group: {}({:}%)'.format(nb_male_aki, nb_male_aki/float(nb_tot)*100))
print('Female in AKI group: {}({}%)'.format(nb_female_aki, nb_female_aki/float(nb_tot)*100))
print('Male in Control group: {}({:}%)'.format(nb_male_con, nb_male_con/float(nb_tot)*100))
print('Female in Control group: {}({}%)'.format(nb_female_con, nb_female_con/float(nb_tot)*100))


AKI patients: 763
AKI stage 1 patients: 519
AKI stage 2,3 patients: 244
Stable patients: 7341
Total patients: 8104
AKI prevalence: 9.41510365252
Male: 4426(54.6150049358%)
Female: 3678(45.3849950642%)
Male in AKI group: 414(5.10858835143%)
Female in AKI group: 349(4.30651530109%)
Male in Control group: 4012(49.5064165844%)
Female in Control group: 3329(41.0784797631%)


In [6]:
avg_age_aki = io_aki.age.mean()/365.
avg_age_con = io_con.age.mean()/365.
print('Age of AKI group: {}'.format(avg_age_aki))
print('Age of Control group: {}'.format(avg_age_con))

Age of AKI group: 6.4877610011
Age of Control group: 6.39124228157


In [7]:
cr_df = pd.read_pickle(os.path.join(fileDir, 'item_df_ism', 
                                    'ism_creatinine_df.pkl'))

In [8]:
ceid_aki = io_aki.encounter_id.unique().tolist()
ceid_con = io_con.encounter_id.unique().tolist()

cr_df_aki = cr_df.loc[np.in1d(cr_df.encounter_id, ceid_aki),:]
cr_df_con = cr_df.loc[np.in1d(cr_df.encounter_id, ceid_con),:]

los_aki = cr_df_aki.groupby('encounter_id')['los'].mean()
los_con = cr_df_con.groupby('encounter_id')['los'].mean()

In [9]:
print('LOS (days) of AKI group: {}'.format(np.mean(los_aki)/60./24))
print('LOS (days) of Control group: {}'.format(np.mean(los_con)/60./24))

LOS (days) of AKI group: 29.9631753313
LOS (days) of Control group: 7.47918066718


In [19]:
disch_aki = cr_df_aki.groupby('encounter_id')['dischstatus'].unique()
disch_aki = disch_aki.to_frame().reset_index()

disch_con = cr_df_con.groupby('encounter_id')['dischstatus'].unique()
disch_con = disch_con.to_frame().reset_index()

In [17]:
def countDeceased(disch_df):
    count = 0
    for idx, row in disch_df.iterrows():
        if 'Deceased' in row.dischstatus:
            count += 1
    return count

In [23]:
nb_disch_aki = countDeceased(disch_aki)
prv_disch_aki = nb_disch_aki/float(nb_aki)*100
nb_disch_con = countDeceased(disch_con)
prv_disch_con = nb_disch_con/float(nb_con)*100
print('AKI patients deceased: {}/{} ({}%)'.format(nb_disch_aki, nb_aki, prv_disch_aki))
print('Stable patients deceased: {}/{} ({}%)'.format(nb_disch_con, nb_con, prv_disch_con))

AKI patients deceased: 150/763 (19.6592398427%)
Stable patients deceased: 172/7341 (2.34300504019%)


In [10]:
def getPresentPrev(io_df, fts):
    io_df_interest = io_df.loc[:, fts]
    nb_present = io_df_interest.count()
    nb_tot = len(io_df.encounter_id.unique())
    prev_present = nb_present/float(nb_tot)*100
    
    return prev_present
    

In [11]:
fts = ['age', 'nsbp_last', 'ndbp_last', 'hr_last', 'spo2_last',
       'hemoglobin_last', 'temperature_last', 'wbc_last', 'platelet_last',
       'albumin_last', 'urine_last', 'potassium_last', 'glucose_last',
       'creatinine_last', 'lactic_acid_last', 'osi_last', 'si_last', 'oi_last',
       'bilirubin_last', 'calcium_last', 'bun_last', 'ratio_pao2_flo2_last', 
       'ph_last']

In [12]:
io_all = pd.concat([io_aki, io_con], axis=0)
getPresentPrev(io_all, fts)

age                     100.000000
nsbp_last                77.011352
ndbp_last                76.999013
hr_last                  92.978776
spo2_last                61.043929
hemoglobin_last          24.987660
temperature_last         92.929418
wbc_last                 23.210760
platelet_last            23.840079
albumin_last              4.540967
urine_last               74.222606
potassium_last           40.362784
glucose_last             25.012340
creatinine_last          23.309477
lactic_acid_last          0.851431
osi_last                 24.765548
si_last                  76.961994
oi_last                   9.279368
bilirubin_last            4.207799
calcium_last             23.013327
bun_last                 23.186081
ratio_pao2_flo2_last     31.539980
ph_last                  31.391905
dtype: float64

In [13]:
# Get number of patients that were on ventilator by investigating MAP df
ism_map_df = pd.read_pickle(os.path.join(fileDir, 'item_df_ism', 'ism_map_df.pkl'))
ceid_map = ism_map_df.encounter_id.unique().tolist()

In [14]:
nb_vent_aki = np.sum(np.in1d(ceid_aki, ceid_map))
nb_vent_con = np.sum(np.in1d(ceid_con, ceid_map))

prv_vent_aki = nb_vent_aki/float(nb_tot)*100
prv_vent_con = nb_vent_con/float(nb_tot)*100

print('Number of AKI patients on ventilator: {} ({}%)'.format(nb_vent_aki, prv_vent_aki))
print('Number of Stable patients on ventilator: {} ({}%)'.format(nb_vent_con, prv_vent_con))

Number of AKI patients on ventilator: 651 (8.03307008885%)
Number of Stable patients on ventilator: 3852 (47.532082922%)


### St.Mary's Demographics

In [24]:
import stm_utilities as stm

In [25]:
stmdb = stm.queryDB()

In [26]:
enc_df = stmdb.getEncounterData()

KeyboardInterrupt: 

In [None]:
print('Total encounters in St.Mary DB: {}'.format(len(enc_df.encounter_id.unique())))
print('Total patients in St.Mary DB: {}'.format(len(enc_df.patient_id.unique())))

In [27]:
cr_df = pd.read_pickle(os.path.join(fileDir, 'item_df_stm', 'stm_creatinine_df.pkl'))

In [28]:
print('Total encounters with Creatinine measurements: {}'.format(len(cr_df.encounter_id.unique())))
print('Total patients with Creatinine measurements: {}'.format(len(cr_df.patient_id.unique())))
print('Total Creatinine measurements: {}'.format(len(cr_df.index)))

Total encounters with Creatinine measurements: 1388
Total patients with Creatinine measurements: 1388
Total Creatinine measurements: 12913


In [29]:
stm_aki = pd.read_pickle(os.path.join(fileDir, 'io_stm5' , 'stm_onset_io_tlag006_twin006_aki.pkl'))
stm_con = pd.read_pickle(os.path.join(fileDir, 'io_stm5' , 'stm_onset_io_tlag006_twin006_con.pkl'))

In [30]:
nb_stm_aki = len(stm_aki.encounter_id.unique())
nb_stm_aki01 = np.sum(stm_aki.AKI_stage==1)
nb_stm_aki23 = np.sum(stm_aki.AKI_stage>1)
nb_stm_con = len(stm_con.encounter_id.unique())
nb_stm_tot = nb_stm_aki + nb_stm_con
prv_stm_aki = nb_stm_aki/float(nb_stm_tot)*100
print('Total patients: {}'.format(nb_stm_tot))
print('Total AKI patients: {}'.format(nb_stm_aki))
print('AKI stage 1 patients: {}'.format(nb_stm_aki01))
print('AKI stage 2-3 patients: {}'.format(nb_stm_aki23))
print('Total stable patients: {}'.format(nb_stm_con))
print('Prevalence of AKI patients: {}%'.format(prv_stm_aki))

Total patients: 1388
Total AKI patients: 163
AKI stage 1 patients: 123
AKI stage 2-3 patients: 40
Total stable patients: 1225
Prevalence of AKI patients: 11.7435158501%


In [42]:
nb_stm_aki_male = np.sum(stm_aki.sex=='M')
prv_stm_aki_male = nb_stm_aki_male/float(nb_stm_tot)*100
nb_stm_aki_female = np.sum(stm_aki.sex=='F')
prv_stm_aki_female = nb_stm_aki_female/float(nb_stm_tot)*100
nb_stm_con_male = np.sum(stm_con.sex=='M')
prv_stm_con_male = nb_stm_con_male/float(nb_stm_tot)*100
nb_stm_con_female = np.sum(stm_con.sex=='F')
prv_stm_con_female = nb_stm_con_female/float(nb_stm_tot)*100

print('Number of male in AKI group: {}({}%)'.format(nb_stm_aki_male, prv_stm_aki_male))
print('Number of female in AKI group: {}({}%)'.format(nb_stm_aki_female, prv_stm_aki_female))
print('Number of male in stable group: {}({}%)'.format(nb_stm_con_male, prv_stm_con_male))
print('Number of female in stable group: {}({}%)'.format(nb_stm_con_female, prv_stm_con_female))


Number of male in AKI group: 85(6.12391930836%)
Number of female in AKI group: 78(5.61959654179%)
Number of male in stable group: 683(49.2074927954%)
Number of female in stable group: 542(39.0489913545%)


In [41]:
ceid_stm_aki = stm_aki.encounter_id.unique().tolist()
ceid_stm_con = stm_con.encounter_id.unique().tolist()

In [39]:
cr_df_stm_aki = cr_df.loc[np.in1d(cr_df.encounter_id, ceid_stm_aki),:]
cr_df_stm_con = cr_df.loc[np.in1d(cr_df.encounter_id, ceid_stm_con),:]

los_stm_aki = cr_df_stm_aki.groupby('encounter_id')['ICU_LOS_min'].mean()
los_stm_con = cr_df_stm_con.groupby('encounter_id')['ICU_LOS_min'].mean()

avg_los_stm_aki = los_stm_aki.mean()
avg_los_stm_con = los_stm_con.mean()

print('average LOS in AKI group: {} days'.format(avg_los_stm_aki/60./24.))
print('average LOS in stable group: {} days'.format(avg_los_stm_con/60./24.))

average LOS in AKI group: 20.3828476483 days
average LOS in stable group: 6.06435827664 days


In [34]:
age_aki = stm_aki.age.mean()/365.
age_con = stm_con.age.mean()/365.

print('Average age in AKI group: {} years'.format(age_aki))
print('Average age in stable group: {} years'.format(age_con))

Average age in AKI group: 4.70985920224 years
Average age in stable group: 4.00117783458 years


In [35]:
# Get number of patients that were on ventilator by investigating MAP df
stm_map_df = pd.read_pickle(os.path.join(fileDir, 'item_df_stm', 'stm_map_df.pkl'))
ceid_stm_map = stm_map_df.encounter_id.unique().tolist()


nb_vent_stm_aki = np.sum(np.in1d(ceid_stm_aki, ceid_stm_map))
nb_vent_stm_con = np.sum(np.in1d(ceid_stm_con, ceid_stm_map))

prv_vent_stm_aki = nb_vent_stm_aki/float(nb_stm_tot)*100
prv_vent_stm_con = nb_vent_stm_con/float(nb_stm_tot)*100

print('Number of AKI patients on ventilator: {} ({}%)'.format(nb_vent_stm_aki, prv_vent_stm_aki))
print('Number of Stable patients on ventilator: {} ({}%)'.format(nb_vent_stm_con, prv_vent_stm_con))

Number of AKI patients on ventilator: 158 (11.3832853026%)
Number of Stable patients on ventilator: 1037 (74.711815562%)


In [46]:
def countDeceasedSTM(disch_df):
    count = 0
    for idx, row in disch_df.iterrows():
        if row.is_deceased == True:
            count += 1
    return count

In [47]:
disch_stm_aki = cr_df_stm_aki.groupby('encounter_id')['is_deceased'].unique()
disch_stm_aki = disch_stm_aki.to_frame().reset_index()

disch_stm_con = cr_df_stm_con.groupby('encounter_id')['is_deceased'].unique()
disch_stm_con = disch_stm_con.to_frame().reset_index()

In [48]:
nb_disch_stm_aki = countDeceasedSTM(disch_stm_aki)
prv_disch_stm_aki = nb_disch_stm_aki/float(nb_stm_aki)*100
nb_disch_stm_con = countDeceasedSTM(disch_stm_con)
prv_disch_stm_con = nb_disch_stm_con/float(nb_stm_con)*100
print('AKI patients deceased: {}/{} ({}%)'.format(nb_disch_stm_aki, nb_stm_aki, prv_disch_stm_aki))
print('Stable patients deceased: {}/{} ({}%)'.format(nb_disch_stm_con, nb_stm_con, prv_disch_stm_con))

AKI patients deceased: 32/163 (19.6319018405%)
Stable patients deceased: 36/1225 (2.9387755102%)


## Banner's demographics

In [4]:
fileDir = os.path.dirname('__file__')
fname1_banner = os.path.join(fileDir, 'io_banner4', 
                             'banner_onset_io_tlag006_twin006_aki.pkl')
fname2_banner = os.path.join(fileDir, 'io_banner4', 
                             'banner_onset_io_tlag006_twin006_con.pkl')

In [5]:
io_aki_banner = pd.read_pickle(fname1_banner)
io_con_banner = pd.read_pickle(fname2_banner)

In [9]:
nb_aki_banner = len(io_aki_banner.encounter_id.unique())
nb_aki01_banner = np.sum(io_aki_banner.AKI_stage==1)
nb_aki23_banner = np.sum(io_aki_banner.AKI_stage>1)
nb_con_banner = len(io_con_banner.encounter_id.unique())
nb_tot_banner = nb_aki_banner + nb_con_banner
nb_male_aki_banner = np.sum(io_aki_banner.sex=='M')
nb_female_aki_banner = np.sum(io_aki_banner.sex=='F')
nb_male_con_banner = np.sum(io_con_banner.sex=='M')
nb_female_con_banner = np.sum(io_con_banner.sex=='F')
nb_male_banner = nb_male_aki_banner + nb_male_con_banner
nb_female_banner = nb_female_aki_banner + nb_female_con_banner
print('AKI patients: {}'.format(nb_aki_banner))
print('AKI stage 1 patients: {}'.format(nb_aki01_banner))
print('AKI stage 2,3 patients: {}'.format(nb_aki23_banner))
print('Stable patients: {}'.format(nb_con_banner))
print('Total patients: {}'.format(nb_tot_banner))
print('AKI prevalence: {}'.format(nb_aki_banner/float(nb_tot_banner)*100))
print('Male: {}({:}%)'.format(nb_male_banner, nb_male_banner/float(nb_tot_banner)*100))
print('Female: {}({}%)'.format(nb_female_banner, nb_female_banner/float(nb_tot_banner)*100))
print('Male in AKI group: {}({:}%)'.format(nb_male_aki_banner, nb_male_aki_banner/float(nb_aki_banner)*100))
print('Female in AKI group: {}({}%)'.format(nb_female_aki_banner, nb_female_aki_banner/float(nb_aki_banner)*100))
print('Male in Control group: {}({:}%)'.format(nb_male_con_banner, nb_male_con_banner/float(nb_con_banner)*100))
print('Female in Control group: {}({}%)'.format(nb_female_con_banner, nb_female_con_banner/float(nb_con_banner)*100))


AKI patients: 62
AKI stage 1 patients: 56
AKI stage 2,3 patients: 6
Stable patients: 1593
Total patients: 1655
AKI prevalence: 3.74622356495
Male: 908(54.8640483384%)
Female: 747(45.1359516616%)
Male in AKI group: 38(61.2903225806%)
Female in AKI group: 24(38.7096774194%)
Male in Control group: 870(54.6139359699%)
Female in Control group: 723(45.3860640301%)


In [52]:
56/1655.

0.033836858006042296

In [11]:
avg_age_aki_banner = io_aki_banner.age.mean()/365.
avg_age_con_banner = io_con_banner.age.mean()/365.
print('Age of AKI group: {}'.format(avg_age_aki_banner))
print('Age of Control group: {}'.format(avg_age_con_banner))

Age of AKI group: 6.95483870968
Age of Control group: 6.6329566855


In [18]:
cr_df_banner = pd.read_pickle(os.path.join(fileDir, 'item_df_banner', 
                                           'banner_creatinine_df.pkl'))

In [23]:
ceid_aki_banner = io_aki_banner.encounter_id.unique().tolist()
ceid_con_banner = io_con_banner.encounter_id.unique().tolist()

In [26]:
cr_df_aki_banner = cr_df_banner.loc[np.in1d(cr_df_banner.ENCNTR_ID, ceid_aki_banner),:]
cr_df_con_banner = cr_df_banner.loc[np.in1d(cr_df_banner.ENCNTR_ID, ceid_con_banner),:]

los_aki_banner = cr_df_aki_banner.groupby('ENCNTR_ID')['LOSm'].mean()
los_con_banner = cr_df_con_banner.groupby('ENCNTR_ID')['LOSm'].mean()

In [27]:
print('LOS (days) of AKI group: {}'.format(np.mean(los_aki_banner)/60./24))
print('LOS (days) of Control group: {}'.format(np.mean(los_con_banner)/60./24))

LOS (days) of AKI group: 21.6584453405
LOS (days) of Control group: 5.37099375741


In [28]:
# Get number of patients that were on ventilator by investigating MAP df
map_df_banner = pd.read_pickle(os.path.join(fileDir, 'item_df_banner', 'banner_map_df.pkl'))
ceid_map_banner = map_df_banner.ENCNTR_ID.unique().tolist()

nb_vent_aki_banner = np.sum(np.in1d(ceid_aki_banner, ceid_map_banner))
nb_vent_con_banner = np.sum(np.in1d(ceid_con_banner, ceid_map_banner))

prv_vent_aki_banner = nb_vent_aki_banner/float(nb_aki_banner)*100
prv_vent_con_banner = nb_vent_con_banner/float(nb_con_banner)*100

print('Number of AKI patients on ventilator: {} ({}%)'.format(nb_vent_aki_banner, prv_vent_aki_banner))
print('Number of Stable patients on ventilator: {} ({}%)'.format(nb_vent_con_banner, prv_vent_con_banner))

Number of AKI patients on ventilator: 2 (3.22580645161%)
Number of Stable patients on ventilator: 2 (0.125549278092%)


In [49]:
def countDeceasedBanner(disch_df):
    count = 0
    for idx, row in disch_df.iterrows():
        if row.deceased_flag != 0:
            count += 1
    return count

In [50]:
disch_aki_banner = cr_df_aki_banner.groupby('ENCNTR_ID')['deceased_flag'].unique()
disch_aki_banner = disch_aki_banner.to_frame().reset_index()

disch_con_banner = cr_df_con_banner.groupby('ENCNTR_ID')['deceased_flag'].unique()
disch_con_banner = disch_con_banner.to_frame().reset_index()

In [51]:
nb_disch_aki_banner = countDeceasedBanner(disch_aki_banner)
prv_disch_aki_banner = nb_disch_aki_banner/float(nb_aki_banner)*100
nb_disch_con_banner = countDeceasedBanner(disch_con_banner)
prv_disch_con_banner = nb_disch_con_banner/float(nb_con_banner)*100
print('AKI patients deceased: {}/{} ({}%)'.format(nb_disch_aki_banner, nb_aki_banner, prv_disch_aki_banner))
print('Stable patients deceased: {}/{} ({}%)'.format(nb_disch_con_banner, nb_con_banner, prv_disch_con_banner))

AKI patients deceased: 0/62 (0.0%)
Stable patients deceased: 0/1593 (0.0%)
