In [2]:
import numpy as np
import pandas as pd
import datetime
import copy
import time
import os
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import operator

from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
from datetime import timedelta

tqdm.pandas()

In [3]:
# Edit to point to your MIMIC directory.
dataDirStr = '/Users/gmessier/data/mimic-1.4/'

In [4]:
caregivers_df = pd.read_csv(dataDirStr + "CAREGIVERS.csv")
caregivers_df.columns = caregivers_df.columns.str.lower()
caregivers_df

Unnamed: 0,row_id,cgid,label,description
0,2228,16174,RO,Read Only
1,2229,16175,RO,Read Only
2,2230,16176,Res,Resident/Fellow/PA/NP
3,2231,16177,RO,Read Only
4,2232,16178,RT,Respiratory
...,...,...,...,...
7562,6300,20303,MD,
7563,6301,20304,RN,RN
7564,6302,20305,MDs,
7565,6303,20306,RPH,Pharmacist


`CAREGIVERS.parquet` defines the role of caregivers, for example a medical doctor (MD) or a registered nurse (RN).

Each caregiver is given a unique identifier `cgid`, with `label` and `description` defining the type of caregiver.

`cgid` is a unique identifier for each distinct caregiver present in the database. `cgid` is sourced from two tables in the raw data: the CareVue and Metavision ICU databases. 

In [5]:
print(f"There are {caregivers_df.cgid.nunique()} unique caregivers")

There are 7567 unique caregivers


`label` defines the type of caregiver: e.g. RN, MD, PharmD, etc. Note that `label` is a free text field and as such contains many typographical errors and spelling variants of the same concept (e.g. MD, MDs, M.D.).



In [6]:
c = caregivers_df.label.value_counts()[:5]
p = caregivers_df.label.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
RO,1658,22.05
MD,1380,18.36
Res,1238,16.47
RN,1185,15.76
MDs,226,3.01


`description` provides additional information regarding the caregiver alongside `label`.



In [7]:
c = caregivers_df.description.value_counts()[:5]
p = caregivers_df.description.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
Read Only,1954,37.9
Resident/Fellow/PA/NP,1546,29.99
RN,600,11.64
Attending,189,3.67
Rehabilitation,173,3.36
