### Plausibility filter to filter out unreasonable values for each feature
* Set db_name variable accordingly befor running the code: db_name $\in$ {'ism', 'stm', 'banner'}

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import re
fileDir = os.path.dirname('__file__')


db_name = 'ism'
# db_name should be either one of {'ism', 'stm', 'banner'}

### Plausibility filter

In [2]:
pl_flt = {'urine': [0, 20], 
          'hemoglobin': [0, 20],
          'nsbp': [0, 200],
          'ndbp': [0, 200],
          'osi': [0, 70], 
          'lactic_acid': [0, 120], 
          'hr': [0, 200], 
          'creatinine': [0, 1.8],
          'albumin': [0, 6],
          'glucose': [0, 600],
          'si': [0, 5],          
          'fio2': [0, 1],
          'spo2': [60, 100],
          'oi': [0, 70],
          'platelet': [0, 1000],
          'potassium': [1, 12],
          'si': [0, 5],
          'spo2': [60, 100],
          'temperature': [32, 42],
          'urine': [0, 20],
          'wbc': [0, 50]
         }

In [3]:
def filterPlausible(io_df, db_name):
    for ft in pl_flt:
        pl_range = pl_flt[ft]
        pattern = re.compile('^{}_'.format(ft))
        cols = [col for col in io_df.columns if pattern.match(col)]
        if (db_name=='stm') & (ft=='lactic_acid'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*9.009
        elif (db_name=='stm') & (ft=='glucose'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*18.0182
        elif (db_name=='stm') & (ft=='albumin'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*0.1
        elif (db_name=='banner') & (ft=='lactic_acid'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*9.009
        
        tmp_df = io_df.loc[:,cols]
        mask = (tmp_df<=pl_range[0]) | (tmp_df>=pl_range[1])
        tmp_df[mask] = np.NaN
        io_df.loc[:, cols] = tmp_df
        
        if (db_name=='stm') & (ft=='lactic_acid'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*1.0/9.009
        elif (db_name=='stm') & (ft=='glucose'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*1.0/18.0182
        elif (db_name=='stm') & (ft=='albumin'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*1.0/0.1
        elif (db_name=='banner') & (ft=='lactic_acid'):
            io_df.loc[:,cols] = io_df.loc[:,cols]*1.0/9.009
    
    return io_df
        

In [4]:
def wrapper(combination, ioDir_from, ioDir_to, db_name):
    for tlag, twin in combination:
        fname_con = os.path.join(fileDir, ioDir_from, 
                                 '{}_onset_io_tlag{:03d}_twin{:03d}_con.pkl'.format(db_name, abs(tlag), twin))
        fname_aki = os.path.join(fileDir, ioDir_from, 
                                 '{}_onset_io_tlag{:03d}_twin{:03d}_aki.pkl'.format(db_name, abs(tlag), twin))
        f_con = open(fname_con, 'rb')
        io_con = pickle.load(f_con)
        f_con.close()
        
        f_aki = open(fname_aki, 'rb')
        io_aki = pickle.load(f_aki)
        f_aki.close()
        
        io_con = filterPlausible(io_con, db_name)
        io_aki = filterPlausible(io_aki, db_name)
        
        fname2_con = os.path.join(fileDir, ioDir_to,
                                  '{}_onset_io_tlag{:03d}_twin{:03d}_con.pkl'.format(db_name, abs(tlag), twin))
        fname2_aki = os.path.join(fileDir, ioDir_to, 
                                  '{}_onset_io_tlag{:03d}_twin{:03d}_aki.pkl'.format(db_name, abs(tlag), twin))
        
        io_con.to_pickle(fname2_con)
        io_aki.to_pickle(fname2_aki)

In [5]:
timelag = np.arange(-6,-25,-1)
timewin = [6, 12]
combination = [(tlag, twin) for tlag in timelag for twin in timewin if abs(tlag)>=twin]



if db_name=='ism':
    if not os.path.exists(os.path.join(fileDir, 'io_ism3')):
        os.makedirs(os.path.join(fileDir, 'io_ism3'))
    wrapper(combination, 'io_ism2', 'io_ism3', 'ism')
elif db_name=='stm':
    if not os.path.exists(os.path.join(fileDir, 'io_stm3')):
        os.makedirs(os.path.join(fileDir, 'io_stm3'))
    wrapper(combination, 'io_stm2', 'io_stm3', 'stm')
elif db_name=='banner':
    if not os.path.exists(os.path.join(fileDir, 'io_banner3')):
        os.makedirs(os.path.join(fileDir, 'io_banner3'))
    wrapper(combination, 'io_banner2', 'io_banner3', 'banner')