In [1]:
import os
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import h5py
import nbimporter
from Functions import process_file_vital

In [2]:
labs = pd.read_csv('D:/simedy/CSVs/vital_labs_processed.csv')
#rename Parameter to ITEMID, and dt to CHARTTIME
labs.rename(columns={'caseid':'SUBJECT_ID', 'Parameter':'ITEMID', 'dt':'CHARTTIME', 'lab_flag':'FLAG'}, inplace=True)

#convert CHARTTIME to datetime
labs['CHARTTIME'] = pd.to_datetime(labs['CHARTTIME'])
labs.head()

Unnamed: 0,SUBJECT_ID,CHARTTIME,result,ITEMID,FLAG
0,p000001,1970-01-07 21:07:50,2.9,Albumin,1
1,p000001,1970-01-05 14:59:35,3.2,Albumin,1
2,p000001,1970-01-01 03:30:14,3.4,Albumin,0
3,p000001,1970-01-02 14:17:35,3.6,Albumin,0
4,p000001,1970-01-05 14:59:35,12.0,Alanine Aminotransferase (ALT),0


In [3]:
vital_labs = labs.ITEMID.unique().tolist()
mimic_labs = set([x.split('_')[1].split('.')[0] for x in os.listdir('D:/simedy/FT models/')])

# Get the labs that are in both the mimic and the vital lists
labs_to_use = list(set(vital_labs).intersection(set(mimic_labs)))
# print(vital_labs)
# print(mimic_labs)
labs_to_use.append('Hemoglobin')
labs_to_use

['Bicarbonate',
 'Albumin',
 'Creatinine',
 'Chloride',
 'Sodium',
 'Potassium',
 'Platelet Count',
 'Hematocrit',
 'White Blood Cells',
 'Urea Nitrogen',
 'Free Calcium',
 'Asparate Aminotransferase (AST)',
 'Alanine Aminotransferase (ALT)',
 'Estimated GFR (MDRD equation)',
 'Hemoglobin']

In [4]:
#Filter by subjects with available waveform data
path = "D:/PulseDB_Vital_modified"
labs_filtered = labs[labs.SUBJECT_ID.isin([x[:-4] for x in os.listdir(path)])]
print(labs_filtered.shape, labs.shape)

(412427, 5) (912008, 5)


In [5]:
time = '15 min'
interval = pd.Timedelta(time)

for test in labs_to_use:
    df_results = pd.DataFrame()
    
    #select the entries for that particular test and add the .mat extension to the subject_id
    files = labs_filtered[(labs_filtered.ITEMID == test)].SUBJECT_ID.value_counts()
    files = files.index + ".mat"

    for file in tqdm(files, desc = f'Processing {test}'):
        subject_id = file[:-4]
        if file in os.listdir(path):
            df = process_file_vital(path, file, subject_id, interval,
                                    labs_filtered[labs_filtered.ITEMID == test])
            df_results = pd.concat([df_results, df], axis = 0)
    df_results.to_csv(f'D:/simedy/CSVs/Vital_{test}_{time}.csv', index = False)

Processing Bicarbonate:   0%|          | 0/1548 [00:00<?, ?it/s]

Processing Albumin:   0%|          | 0/2348 [00:00<?, ?it/s]

Processing Creatinine:   0%|          | 0/2303 [00:00<?, ?it/s]

Processing Chloride:   0%|          | 0/2332 [00:00<?, ?it/s]

Processing Sodium:   0%|          | 0/2400 [00:00<?, ?it/s]

Processing Potassium:   0%|          | 0/2400 [00:00<?, ?it/s]

Processing Platelet Count:   0%|          | 0/2390 [00:00<?, ?it/s]

Processing Hematocrit:   0%|          | 0/2438 [00:00<?, ?it/s]

Processing White Blood Cells:   0%|          | 0/2392 [00:00<?, ?it/s]

Processing Urea Nitrogen:   0%|          | 0/2303 [00:00<?, ?it/s]

Processing Free Calcium:   0%|          | 0/1664 [00:00<?, ?it/s]

Processing Asparate Aminotransferase (AST):   0%|          | 0/2348 [00:00<?, ?it/s]

Processing Alanine Aminotransferase (ALT):   0%|          | 0/2348 [00:00<?, ?it/s]

Processing Estimated GFR (MDRD equation):   0%|          | 0/2293 [00:00<?, ?it/s]

Processing Hemoglobin:   0%|          | 0/2392 [00:00<?, ?it/s]

In [6]:
from datetime import datetime, timezone, timedelta

# Define the timezone for GMT+3
gmt_plus_3 = timezone(timedelta(hours=3))

# Get the current time in GMT+3
current_time_gmt_plus_3 = datetime.now(gmt_plus_3)

# Print the current time in GMT+3
print("Current time in GMT+3:", current_time_gmt_plus_3.strftime('%Y-%m-%d %H:%M:%S'))

Current time in GMT+3: 2024-08-14 23:22:11
