In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from pathlib import Path
import importlib
import warnings
warnings.filterwarnings("ignore")
import pickle
import sys

sys.path.append(str(Path.cwd().parents[0]))
    
    
%load_ext autoreload
%autoreload 2

In [None]:
import utils.lab_utils
from config.constants import MIMIC_DIR
from config.constants import MIMIC_LABS_DIR

# Extract Lab data 

### Load Target cohort
(Cohorts are previously defined and saved in ./MIMIC_IV/saved_data/cohorts/')

In [None]:
target_cohort = 'mimic_cohort_NF_30_days'
cohort_path = './saved_data/cohorts/'+target_cohort+'.csv.gz'
cohort = pd.read_csv(cohort_path, compression='gzip', parse_dates = ['admittime','dischtime'])
print(target_cohort)
print('number of unique patients: ',cohort['subject_id'].nunique())
print('number of unique admissions: ',cohort['hadm_id'].nunique())

#### Extract Lab data for target cohort 

In [None]:
print('---------------------------------------------')
print(f"[EXTRACTING LABS DATA for the {target_cohort} COHORT")
lab = utils.lab_utils.extract_cohort_labs(MIMIC_DIR, cohort,'charttime', dtypes=None, usecols=None)
lab = utils.lab_utils.drop_wrong_uom(lab, 0.95)
os.makedirs('./saved_data/features', exist_ok=True) 
lab[['subject_id', 'hadm_id', 'charttime', 'itemid','valuenum']].to_csv(f'./saved_data/features/'+target_cohort+'_labs.csv.gz', compression='gzip', index=False)
print("[SUCCESSFULLY SAVED COHORT LABS DATA]")
print('---------------------------------------------')

#### Extract Lab data for admissions 
(Choose the number of days you want to include before discharge)

In [None]:
days_before_discharge = 14

In [None]:

cohort_labs = pd.read_csv(f'./saved_data/features/'+target_cohort+'_labs.csv.gz', compression='gzip',header=0, parse_dates = ['charttime'])
#remove H-I-L
itemids_to_remove = [50934, 50947, 51678]
cohort_labs = cohort_labs[~cohort_labs['itemid'].isin(itemids_to_remove)]

print('------------------------------------------------------------------------------------------')
print("[EXTRACTING LABS DATA FOR SELECTED NUMBER OF DAYS BEFORE DISCHARGE FOR EACH ADMISSIIN]")

lab_results = cohort.apply(lambda x: utils.lab_utils.extract_admission_labs(x, days_before_discharge ,cohort_labs), axis=1)
admission_labs = pd.concat(lab_results.tolist(), ignore_index=True)
    

new_columns = ['subject_id',  'hadm_id' , 'date', 'itemid','value', 'admittime' ,'dischtime','lab_time_from_disch'] # check the order
admission_labs.columns = new_columns

print("# Itemid: ",    admission_labs['itemid'].nunique())
print("# Subjects: ",  admission_labs['subject_id'].nunique())
print("# Asmissions: ",admission_labs['hadm_id'].nunique())


admission_labs.to_csv(f'./saved_data/features/'+target_cohort+f'_admissions_labs_{days_before_discharge}_days.csv.gz', compression='gzip', index=False)
print("[SUCCESSFULLY SAVED ADMISSIONS LABS DATA]")
print('------------------------------------------------------------------------------------------')