In [1]:
import numpy as np
import pandas as pd
import datetime
import copy
import time
import os
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import operator

from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
from datetime import timedelta

tqdm.pandas()

In [3]:
# Edit to point to your MIMIC directory.
dataDirStr = '/Users/gmessier/data/mimic-1.4/'

In [4]:
microbiologyevents_df = pd.read_csv(dataDirStr + "MICROBIOLOGYEVENTS.csv")
microbiologyevents_df.columns = microbiologyevents_df.columns.str.lower()
microbiologyevents_df

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,spec_itemid,spec_type_desc,org_itemid,org_name,isolate_num,ab_itemid,ab_name,dilution_text,dilution_comparison,dilution_value,interpretation
0,744,96,170324,2156-04-13 00:00:00,2156-04-13 14:18:00,70021.0,BRONCHOALVEOLAR LAVAGE,80026.0,PSEUDOMONAS AERUGINOSA,1.0,,,,,,
1,745,96,170324,2156-04-20 00:00:00,2156-04-20 13:10:00,70062.0,SPUTUM,,,,,,,,,
2,746,96,170324,2156-04-20 00:00:00,2156-04-20 16:00:00,70012.0,BLOOD CULTURE,,,,,,,,,
3,747,96,170324,2156-04-20 00:00:00,,70012.0,BLOOD CULTURE,,,,,,,,,
4,748,96,170324,2156-04-20 00:00:00,,70079.0,URINE,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631721,630931,99783,126090,2125-11-05 00:00:00,2125-11-05 13:15:00,70076.0,TISSUE,80066.0,ASPERGILLUS FUMIGATUS,2.0,,,,,,
631722,630932,99783,126090,2125-11-06 00:00:00,2125-11-06 10:24:00,70076.0,TISSUE,80066.0,ASPERGILLUS FUMIGATUS,1.0,,,,,,
631723,630933,99783,126090,2125-11-06 00:00:00,2125-11-06 10:24:00,70076.0,TISSUE,80066.0,ASPERGILLUS FUMIGATUS,2.0,,,,,,
631724,630934,99783,126090,2125-11-07 00:00:00,2125-11-07 12:40:00,70012.0,BLOOD CULTURE,,,,,,,,,


`MICROBIOLOGYEVENTS.parquet` describes microbiology information, such as cultures acquired and associated sensitivites.

In [5]:
print(f"There are {microbiologyevents_df.subject_id.nunique()} patients.")

There are 39184 patients.


`spec_itemid` and `spec_type_desc` are the specimen which is biologically tested for bacterial growth. The itemids for the specimens are defined in the `D_LABITEMS` table.

`spec_type_desc` is a human-readable description of the `itemid`.

In [6]:
c = microbiologyevents_df.spec_itemid.value_counts().nlargest(5)
p = microbiologyevents_df.spec_itemid.value_counts(normalize=True).mul(100).round(2).nlargest(5)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
70012.0,179930,28.49
70079.0,137558,21.78
70062.0,99887,15.81
70070.0,33623,5.32
70091.0,32280,5.11


In [7]:
c = microbiologyevents_df.spec_type_desc.value_counts()[:5]
p = microbiologyevents_df.spec_type_desc.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
BLOOD CULTURE,179930,28.48
URINE,140671,22.27
SPUTUM,99887,15.81
SWAB,37895,6.0
MRSA SCREEN,32280,5.11


`org_itemid` and `org_name` refer for the organism, if any, which grew when tested. If NULL, then no organism grew.

In [8]:
c = microbiologyevents_df.org_itemid.value_counts().nlargest(5)
p = microbiologyevents_df.org_itemid.value_counts(normalize=True).mul(100).round(2).nlargest(5)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
80023.0,63947,19.56
80002.0,60133,18.4
80155.0,32777,10.03
80004.0,30628,9.37
80026.0,28926,8.85


In [9]:
c = microbiologyevents_df.org_name.value_counts()[:5]
p = microbiologyevents_df.org_name.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
STAPH AUREUS COAG +,63947,19.5
ESCHERICHIA COLI,60133,18.33
"STAPHYLOCOCCUS, COAGULASE NEGATIVE",32777,9.99
KLEBSIELLA PNEUMONIAE,30628,9.34
PSEUDOMONAS AERUGINOSA,28926,8.82


`interpretation` is the results of the test. "S" is sensitive, "R" is resistant, "I" is intermediate, "P" is pending.

In [10]:
c = microbiologyevents_df.interpretation.value_counts()
p = microbiologyevents_df.interpretation.value_counts(normalize=True).mul(100).round(2)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
S,188635,68.39
R,77169,27.98
I,10023,3.63
P,7,0.0
