In [1]:
import numpy as np
import pandas as pd
import datetime
import copy
import time
import os
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import operator

from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
from datetime import timedelta

tqdm.pandas()

In [2]:
# Edit to point to your MIMIC directory.
dataDirStr = '/Users/gmessier/data/mimic-1.4/'

In [3]:
transfers_df = pd.read_csv(dataDirStr + "TRANSFERS.csv")
transfers_df.columns = transfers_df.columns.str.lower()
transfers_df

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,eventtype,prev_careunit,curr_careunit,prev_wardid,curr_wardid,intime,outtime,los
0,657,111,192123,254245.0,carevue,transfer,CCU,MICU,7.0,23.0,2142-04-29 15:27:11,2142-05-04 20:38:33,125.19
1,658,111,192123,,carevue,transfer,MICU,,23.0,45.0,2142-05-04 20:38:33,2142-05-05 11:46:32,15.13
2,659,111,192123,,carevue,discharge,,,45.0,,2142-05-05 11:46:32,,
3,660,111,155897,249202.0,metavision,admit,,MICU,,52.0,2144-07-01 04:13:59,2144-07-01 05:19:39,1.09
4,661,111,155897,,metavision,transfer,MICU,,52.0,32.0,2144-07-01 05:19:39,2144-07-01 06:28:29,1.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...
261892,259671,98385,195599,,metavision,transfer,,,36.0,49.0,2108-10-06 11:27:11,2108-10-06 13:05:57,1.65
261893,259672,98385,195599,292167.0,metavision,transfer,,SICU,49.0,33.0,2108-10-06 13:05:57,2108-10-11 17:00:31,123.91
261894,259673,98385,195599,,metavision,discharge,SICU,,33.0,,2108-10-11 17:00:31,,
261895,259674,98389,155368,,metavision,admit,,,,29.0,2153-10-14 22:12:58,2153-10-14 22:21:06,0.14


`TRANSFERS` table describes the physical locations for patients throughout each `icustay_id`. Care units are defined based off the `wardid` being associated with an ICU cost center.

In [4]:
print(f"There has been a total of {transfers_df.subject_id.nunique()} transfers")

There has been a total of 46520 transfers


`dbsource` contains the ICU database which the data was sourced from. Patients from 2001-2008 are CareVue, and 2008-2012 are Metavision.

In [5]:
c = transfers_df.dbsource.value_counts()
p = transfers_df.dbsource.value_counts(normalize=True).mul(100).round(2)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
carevue,161797,61.84
metavision,99306,37.95
both,553,0.21


`eventtype` describes what transfer event occurred: `admit` for an admission, `transfer` for an intra-hospital transfer and `discharge` for a discharge from the hospital.

In [6]:
c = transfers_df.eventtype.value_counts()
p = transfers_df.eventtype.value_counts(normalize=True).mul(100).round(2)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
transfer,144045,55.01
discharge,58919,22.5
admit,58909,22.5


`prev_careunit` and `curr_careunit` contain the care unit in which the patient previously and currently reside respectively. The care unit is defined based upon the ward: if the ward is an ICU cost center, then the care unit defines the type of ICU. The `intime` and `outtime` of the transfer correspond to the `curr_careunit`. 

In [7]:
c = transfers_df.prev_careunit.value_counts()[:5]
p = transfers_df.prev_careunit.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
MICU,26398,27.5
NICU,18631,19.41
CSRU,13662,14.23
SICU,11321,11.79
CCU,9534,9.93


In [8]:
c = transfers_df.curr_careunit.value_counts()[:5]
p = transfers_df.curr_careunit.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
MICU,26400,27.5
NICU,18629,19.4
CSRU,13663,14.23
SICU,11323,11.79
CCU,9542,9.94


`CCU`	Coronary care unit

`CSRU`	Cardiac surgery recovery unit

`MICU`	Medical intensive care unit

`NICU`	Neonatal intensive care unit

`NWARD`	Neonatal ward

`SICU`	Surgical intensive care unit

`TSICU`	Trauma/surgical intensive care unit


`prev_wardid` and `curr_wardid` contain the previous and current ward in which the patient stayed. Note that the grouping of physical locations in the hospital database is referred to as a ward. Though in practice ICUs are not referred to as wards, the hospital database technically tracks ICUs as “wards with an ICU cost center”. As a result, each ICU is associated with a WARDID, but not every WARDID is an ICU.

In [9]:
c = transfers_df.prev_wardid.value_counts().nlargest(5)
p = transfers_df.prev_wardid.value_counts(normalize=True).mul(100).round(2).nlargest(5)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
56.0,17344,8.55
55.0,12073,5.95
14.0,10471,5.16
52.0,10413,5.13
2.0,9343,4.6


In [10]:
c = transfers_df.curr_wardid.value_counts().nlargest(5)
p = transfers_df.curr_wardid.value_counts(normalize=True).mul(100).round(2).nlargest(5)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
56.0,17336,8.54
55.0,12072,5.95
14.0,10469,5.16
52.0,10412,5.13
2.0,9345,4.6


`intime` provides the date and time the patient was transferred into the current care unit from the previous care unit. 

`outtime` provides the date and time the patient was transferred out of the current care unit.

`los` is the length of stay for the patient in hours for the given ward stay, which may be within or outside of the ICU.

In [11]:
transfers_df.los.describe()

count    202921.000000
mean         71.129606
std         133.267475
min           0.000000
25%           8.640000
50%          36.190000
75%          81.910000
max       20879.990000
Name: los, dtype: float64

In [12]:
print(f"Average length of stay: {transfers_df.los.mean()} hours")

Average length of stay: 71.12960639854921 hours
