In [None]:
import sqlite3
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from pathlib import Path
import importlib
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
tqdm.pandas()  
import sys

sys.path.append(str(Path.cwd().parents[0]))
from config.constants import UKER_DIR

%load_ext autoreload
%autoreload 2

In [None]:
def read_SQL(data_path, query, parameters):
    
    with sqlite3.connect(data_path) as conn:
        df = pd.read_sql(query, conn, params=parameters)

    return df

# Extract Lab data 

### Load Target cohort

In [None]:
target_cohort = 'uker_cohort_NF_30_days'
days_before_discharge = 14 #  select the number of days to extract the lab data for (before discharge time)


cohort = pd.read_csv('./saved_data/cohorts/'+target_cohort+'.csv.gz', compression='gzip')

print(target_cohort)
print('number of unique patients: ',cohort['subject_id'].nunique())
print('number of unique admissions: ',cohort['hadm_id'].nunique())
print('number of positive admissions: ',cohort.label.sum())


pid_list = cohort.subject_id.unique().tolist()
fid_list = cohort.hadm_id.unique().tolist()

#### Extract Lab data for target cohort 

In [None]:
placeholders = ', '.join(['?'] * len(fid_list))


query = f"""
SELECT l.pid, f.fid, l.loinc, l.'alter', l.wert, f.aufnahme_alter, f.entlassung_alter
FROM lab l
JOIN fall f ON l.pid = f.pid  -- Join on patient ID
JOIN pat p ON l.pid = p.pid  -- Join on patient ID
--WHERE l.'alter' BETWEEN f.aufnahme_alter AND f.entlassung_alter
WHERE l.'alter' BETWEEN (f.entlassung_alter - ?) AND f.entlassung_alter
AND f.fid IN ({placeholders}) 
ORDER BY f.pid, l.'alter'
"""
params = [days_before_discharge] + fid_list

admission_labs = read_SQL(UKER_DIR, query, params)
columns_name = ['subject_id',  'hadm_id' ,'itemid', 'date','value', 'admittime' ,'dischtime']
admission_labs.columns = columns_name
print('Extracted Lab data information:')
print("# Itemid: ",    admission_labs['itemid'].nunique())
print("# Subjects without lab measurement: ",  cohort['subject_id'].nunique() -admission_labs['subject_id'].nunique())
print("# Asmissions without lab measurement: ",cohort['hadm_id'].nunique() -admission_labs['hadm_id'].nunique() )


admission_labs.to_csv(f'./saved_data/features/'+target_cohort+f'_admissions_labs_{days_before_discharge}_days.csv.gz', compression='gzip', index=False)
print("[SUCCESSFULLY SAVED ADMISSIONS LABS DATA]")
print('------------------------------------------------------------------------------------------')


# Go to main for training