In [1]:
import numpy as np
import pandas as pd
import datetime
import copy
import time
import os
import re
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import operator

from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
from datetime import timedelta

tqdm.pandas()

In [2]:
# Edit to point to your MIMIC directory.
dataDirStr = '/Users/gmessier/data/mimic-1.4/'

In [3]:
prescriptions_df = pd.read_csv(dataDirStr + "PRESCRIPTIONS.csv")
prescriptions_df.columns = prescriptions_df.columns.str.lower()
prescriptions_df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,startdate,enddate,drug_type,drug,drug_name_poe,drug_name_generic,formulary_drug_cd,gsn,ndc,prod_strength,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,route
0,2214776,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Tacrolimus,Tacrolimus,Tacrolimus,TACR1,021796,4.690617e+08,1mg Capsule,2,mg,2,CAP,PO
1,2214775,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Warfarin,Warfarin,Warfarin,WARF5,006562,5.601728e+07,5mg Tablet,5,mg,1,TAB,PO
2,2215524,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Heparin Sodium,,,HEPAPREMIX,006522,3.380550e+08,"25,000 unit Premix Bag",25000,UNIT,1,BAG,IV
3,2216265,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,BASE,D5W,,,HEPBASE,,0.000000e+00,HEPARIN BASE,250,ml,250,ml,IV
4,2214773,6,107064,,2175-06-11 00:00:00,2175-06-12 00:00:00,MAIN,Furosemide,Furosemide,Furosemide,FURO20,008208,5.482972e+07,20mg Tablet,20,mg,1,TAB,PO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4156445,3996662,98887,121032,238144.0,2144-09-06 00:00:00,2144-09-06 00:00:00,MAIN,PredniSONE,PredniSONE,PredniSONE,PRED20,006751,5.400182e+07,20 mg Tablet,40,mg,2,TAB,PO/NG
4156446,3996070,98887,121032,238144.0,2144-09-06 00:00:00,2144-09-06 00:00:00,MAIN,Ipratropium Bromide Neb,Ipratropium Bromide Neb,Ipratropium Bromide Neb,IPRA2H,021700,4.879801e+08,2.5mL Vial,1,NEB,1,VIAL,IH
4156447,3996063,98887,121032,238144.0,2144-09-06 00:00:00,2144-09-06 00:00:00,MAIN,HYDROmorphone (Dilaudid),HYDROmorphone (Dilaudid),HYDROmorphone,HYDR20/100NS,048078,6.155302e+10,20 mg / 100 mL Premix Bag,0.12,mg,0.01,BAG,IVPCA
4156448,3996062,98887,121032,238144.0,2144-09-06 00:00:00,2144-09-06 00:00:00,MAIN,Docusate Sodium,Docusate Sodium,Docusate Sodium,DOCU100,003009,9.042245e+08,100mg Capsule,100,mg,1,CAP,PO


`PRESCRIPTIONS` contains medication related order entries.

In [4]:
print(f"There are {prescriptions_df.subject_id.nunique()} records of a prescription being ordered.")

There are 39363 records of a prescription being ordered.


`startdate` and `enddate` specify the date period for which the prescription is valid

`drugtype` is categorical data, which provides the type of drug prescribed.

In [5]:
c = prescriptions_df.drug_type.value_counts()
p = prescriptions_df.drug_type.value_counts(normalize=True).mul(100).round(2)
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
MAIN,3216882,77.39
BASE,925089,22.26
ADDITIVE,14479,0.35


`drug`, `drug_name_poe`, `drug_name_generic` are all categorical data, which contain various representations of the drug prescribed to the patient.

In [6]:
c = prescriptions_df.drug.value_counts()[:5]
p = prescriptions_df.drug.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
Potassium Chloride,192993,4.64
Insulin,143465,3.45
D5W,142241,3.42
Furosemide,133122,3.2
0.9% Sodium Chloride,130147,3.13


In [7]:
c = prescriptions_df.drug_name_poe.value_counts()[:5]
p = prescriptions_df.drug_name_poe.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
Insulin,143462,5.76
Furosemide,123311,4.95
Potassium Chloride,99247,3.98
Sodium Chloride 0.9% Flush,83392,3.35
Acetaminophen,78748,3.16


In [8]:
c = prescriptions_df.drug_name_generic.value_counts()[:5]
p = prescriptions_df.drug_name_generic.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
Furosemide,123216,4.94
Potassium Chloride,86443,3.47
Sodium Chloride 0.9% Flush,83395,3.34
Metoprolol,73787,2.96
Insulin - Sliding Scale,68441,2.74


`formulary_drug_cd`, `gsn`, `ndc` is categorical data, which provide a representation of the drug in various coding systems. `gsn` is Generic Sequence Number, `ndc` is National Drug Code.

In [9]:
c = prescriptions_df.formulary_drug_cd.value_counts()[:5]
p = prescriptions_df.formulary_drug_cd.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
FURO40I,87763,2.11
NACLFLUSH,83395,2.01
INSULIN,81343,1.96
D5W250,66932,1.61
NS1000,60032,1.44


In [10]:
c = prescriptions_df.gsn.value_counts()[:5]
p = prescriptions_df.gsn.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
1210,242031,6.63
1972,150187,4.12
8205,104839,2.87
1723,66695,1.83
16546,52849,1.45


`route` is categorical data. `route` is the method used to prescribe the patient the drug.

In [14]:
c = prescriptions_df.route.value_counts()[:5]
p = prescriptions_df.route.value_counts(normalize=True).mul(100).round(2)[:5]
pd.concat([c,p], axis=1, keys=['counts', '%'])

Unnamed: 0,counts,%
IV,2028438,48.82
PO,993068,23.9
IV DRIP,320727,7.72
PO/NG,247132,5.95
SC,212253,5.11
