In [1]:
import numpy as np
import pandas as pd
import datetime
import copy
import time
import os
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import operator

from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
from datetime import timedelta

tqdm.pandas()

In [3]:
# Edit to point to your MIMIC directory.
dataDirStr = '/Users/gmessier/data/mimic-1.4/'

In [4]:
cptevents_df = pd.read_csv(dataDirStr + "CPTEVENTS.csv")
cptevents_df.columns = cptevents_df.columns.str.lower()
cptevents_df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,row_id,subject_id,hadm_id,costcenter,chartdate,cpt_cd,cpt_number,cpt_suffix,ticket_id_seq,sectionheader,subsectionheader,description
0,317,11743,129545,ICU,,99232,99232.0,,6.0,Evaluation and management,Hospital inpatient services,
1,318,11743,129545,ICU,,99232,99232.0,,7.0,Evaluation and management,Hospital inpatient services,
2,319,11743,129545,ICU,,99232,99232.0,,8.0,Evaluation and management,Hospital inpatient services,
3,320,11743,129545,ICU,,99232,99232.0,,9.0,Evaluation and management,Hospital inpatient services,
4,321,6185,183725,ICU,,99223,99223.0,,1.0,Evaluation and management,Hospital inpatient services,
...,...,...,...,...,...,...,...,...,...,...,...,...
573141,573142,78876,163404,Resp,2105-09-01 00:00:00,94003,94003.0,,,Medicine,Pulmonary,VENT MGMT;SUBSQ DAYS(INVASIVE)
573142,573143,78879,136071,Resp,2150-08-29 00:00:00,94003,94003.0,,,Medicine,Pulmonary,VENT MGMT;SUBSQ DAYS(INVASIVE)
573143,573144,78879,136071,Resp,2150-08-28 00:00:00,94002,94002.0,,,Medicine,Pulmonary,"VENT MGMT, 1ST DAY (INVASIVE)"
573144,573145,78892,175171,Resp,2125-06-11 00:00:00,94003,94003.0,,,Medicine,Pulmonary,VENT MGMT;SUBSQ DAYS(INVASIVE)


`CPTEVENTS` contains a list of which current procedural terminology (CPT) codes were billed for each patient. This is useful to determine which and what procedures have been performed. 

Refer to `D_CPT` table for exact definitions of `cpt_cd` and `cpt_number`.

In [5]:
print(f"There are {cptevents_df.subject_id.nunique()} patients who had a CPT code billed")

There are 34005 patients who had a CPT code billed


`costcenter` is categorical data, where the cost center which billed for the corresponding CPT code. There are two possible entries for `costcenter`. "ICU" corresponds to procedures billed for by the "ICU" and Resp corresponds to mecahnical or non invasive ventilation and were billed by the respiratory therapist.

In [6]:
c = cptevents_df.costcenter.value_counts()
p = cptevents_df.costcenter.value_counts(normalize=True).mul(100).round(2)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
ICU,471601,82.28
Resp,101545,17.72


`cpt_cd` is the original CPT code. `cpt_number` is a numeric version of `cpt_cd` column, as not all `cpt_cd` is numeric. The `cpt_suffix` contains the text suffix when it is not numeric. The CPT codes are defined in the `d_cpt` table.

In [7]:
c = cptevents_df.cpt_cd.value_counts()[:5]
p = cptevents_df.cpt_cd.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
99232,63066,11.0
99291,60576,10.57
99233,48390,8.44
94003,46018,8.03
99232,45368,7.92


`ticket_id_seq` is the order of the `cpt_cd`.

`sectionheader` and `subsectionheader` are the categories for the given CPT code. These categories are defined in the dictionary file `D_CPT.parquet`.

In [8]:
c = cptevents_df.sectionheader.value_counts()[:5]
p = cptevents_df.sectionheader.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
Evaluation and management,404388,70.56
Medicine,114194,19.92
Surgery,50807,8.86
Radiology,2974,0.52
Anesthesia,687,0.12


In [9]:
c = cptevents_df.subsectionheader.value_counts()[:5]
p = cptevents_df.subsectionheader.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
Hospital inpatient services,268296,46.81
Critical care services,106469,18.58
Pulmonary,101563,17.72
Consultations,25925,4.52
Cardiovascular system,21485,3.75


`description` is the information about the meaning of the CPT code when the cost center is "Resp".

In [10]:
c = cptevents_df.description.value_counts()[:5]
p = cptevents_df.description.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
VENT MGMT;SUBSQ DAYS(INVASIVE),86712,85.39
"VENT MGMT, 1ST DAY (INVASIVE)",12552,12.36
VENT MGMT;SUBSQ DAYS(NIV),1272,1.25
"VENT MGMT,1ST DAY (NIV)",1009,0.99
