Skip to content

Commit

Permalink
v0.3.0: Adapted pipeline to MIMIC-IV v2.2, improved speed by changing…
Browse files Browse the repository at this point in the history
… storage of timeseries, fixing bug from Issue #21
  • Loading branch information
USM-CHU-FGuyon committed Feb 19, 2024
1 parent 871f2ac commit fe67a50
Show file tree
Hide file tree
Showing 38 changed files with 6,354 additions and 1,044 deletions.
11 changes: 8 additions & 3 deletions 0_prepare_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@

ingredient_to_drug = om.run()

mm = MedicationMapping(pth_dic)

medication_json = mm.run(load_drugnames=True, fname='medications.json')
mm = MedicationMapping(pth_dic,
datasets=['hirid',
'amsterdam',
'mimic4',
'mimic3',
'eicu'])

medication_json = mm.run(load_drugnames=False, fname='medications.json')
9 changes: 4 additions & 5 deletions 1_extract_hirid.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@

hirid_prep = hiridPreparator(
variable_ref_path='hirid_variable_reference_v1.csv',
raw_ts_path='raw_stage/observation_tables_parquet.tar.gz',
raw_pharma_path='raw_stage/pharma_records_parquet.tar.gz',
admissions_path='reference_data.tar.gz',
imputedstage_path='imputed_stage/imputed_stage_parquet.tar.gz',
untar=True)
ts_path='observation_tables/parquet/',
pharma_path='pharma_records/parquet/',
admissions_path='reference_data/general_table.csv',
imputedstage_path='imputed_stage/parquet/')

hirid_prep.gen_labels()
hirid_prep.gen_medication()
Expand Down
21 changes: 0 additions & 21 deletions 1_extract_mimic.py

This file was deleted.

1 change: 1 addition & 0 deletions 1_extract_mimic3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

mimic3_prep.load_raw_tables()

mimic3_prep.icustays = mimic3_prep.gen_icustays()
mimic3_prep.gen_labels()
mimic3_prep.gen_flat()
mimic3_prep.gen_medication()
Expand Down
22 changes: 22 additions & 0 deletions 1_extract_mimic4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
This code extracts the data from the MIMIC-IV dataset
('mimic_source_path' in paths.json).
It creates a set of .parquet files at the specified path
('mimic' in paths.json).
"""
from mimic4_preprocessing.mimic4preparator import mimic4Preparator

mimic4_prep = mimic4Preparator(
chartevents_pth='/icu/chartevents.csv.gz',
labevents_pth='/hosp/labevents.csv.gz')

mimic4_prep.load_raw_tables()

mimic4_prep.icustays = mimic4_prep.gen_icustays()
mimic4_prep.gen_labels()
mimic4_prep.gen_flat()
mimic4_prep.gen_medication()
mimic4_prep.gen_timeseriesoutputs()
mimic4_prep.gen_timeserieslab()
mimic4_prep.gen_timeseries()
8 changes: 4 additions & 4 deletions 2_mimic.py → 2_mimic4.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
Note that this produces the 'raw' data of the BlendedICU dataset.
The preprocessed BlendedICU dataset will then be obtained with 3_blendedICU.py
"""
from mimic_preprocessing.flat_and_labels import mimic_FLProcessor
from mimic_preprocessing.timeseries import mimicTSP
from mimic4_preprocessing.flat_and_labels import mimic4_FLProcessor
from mimic4_preprocessing.timeseries import mimic4TSP

tsp = mimicTSP(
tsp = mimic4TSP(
med_pth='medication.parquet',
ts_pth='timeseries.parquet',
tslab_pth='timeserieslab.parquet',
outputevents_pth='timeseriesoutputs.parquet')

tsp.run()

flp = mimic_FLProcessor()
flp = mimic4_FLProcessor()

flp.run_labels()
7 changes: 3 additions & 4 deletions 3_blendedICU.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@
from blended_preprocessing.timeseries import blendedicuTSP
from blended_preprocessing.flat_and_labels import blended_FLProcessor

flp = blended_FLProcessor(datasets=['mimic3',
flp = blended_FLProcessor(datasets=['mimic4',
'mimic3',
'hirid',
'amsterdam',
'mimic',
'eicu'])

flp.run_flat_and_labels()

tsp = blendedicuTSP(recompute_index=False)
tsp = blendedicuTSP(compute_index=False)

tsp.run()
1 change: 0 additions & 1 deletion 5_figures_and_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from figures_and_tables.blendedICU_stats import Blendedicu_stats

s = Blendedicu_stats()

# basic statistics on how many ingredients and drugnames were included
#s.medication_inclusion_stats()
# Flat statistics that make the Tables 1 and 2 in the manuscript
Expand Down
4 changes: 3 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ keywords = {OMOP common data format, Intensive care unit database, Data integrat
}
```
This repository contains the codes and files that allow the creation of the
BlendedICU dataset from the AmsterdamUMCdb, eICU, HiRID, and MIMIC-IV databases.
BlendedICU dataset from the AmsterdamUMCdb, eICU, HiRID, MIMIC-III and MIMIC-IV databases.

[<img src="plot/kdeplot.png" width="600"/>](plot/kdeplot.png)

Before you begin
---
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
drugname;drugcount
drugname;count
Drukzak;0.8887301999480655
NaCl 0,45%/Glucose 2,5%;0.7943391326928071
Paracetamol;0.78230762572492
Expand Down
Binary file modified auxillary_files/medication_mapping_files/drugnames.parquet
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
drugname;drugcount
drugname;count
ACETAMINOPHEN 325 MG PO TABS;0.10559646319059639
ACETAMINOPHEN;0.08462652905769719
ONDANSETRON 2 MG/1 ML 2ML SDV INJ;0.07290188639792093
Expand Down
Loading

0 comments on commit fe67a50

Please sign in to comment.