## This notebook has many modalities:  
- If interested in comparing the capturing or adjudicating regex patterns, uncomment or comment out lines in the code cell defining the `text_match_all_risk_factors` function.  
- One thing is to count how many notes were text-matched, and another is how many notes were labeled "yes". Go to code cell #10 to modify what you want to count.  
- The code cells creating the plots will depend on the above choices.  

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from pathlib import Path

In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
# Custom imports
import sys
sys.path.append("../")
import src.plots as plots

In [None]:
# this function will have the regex patterns to match on text
def text_match_all_risk_factors(attn_notes):
    '''
    Adds boolean columns (flags) for specific risk factors
    and for cardiogenic language. These flags represent whether
    the note text contained the regular expression pattern.
    
    Inputs:
    - attn_notes: pandas dataframe, attending physician notes data
    
    Outputs:
    - attn_notes: pandas dataframe, with a flag columns added
    '''

    # patterns to search for in the text
    pneumonia_pattern = '(?<!\w)(?:PCPpneumonia|pneumonia|Pneumonia|PNEUMONIA|pneumoniae|pneunonia|pneunoniae|pnuemonia|bronchopneumonia|parapneumonic|PNA|CAP|VAP|HAP|HCAP|hcap|infection|abx|PCP)(?!\w)'
    aspiration_pattern = '(?i)(?<!\w)(?<!possibility\sof\s)(?<!\(\?)(?<!no\s{4}e\/o\s)(?<!unclear\sif\sthis\sis\s)(?<!cannot\srule\sout\s)(?<!risk\sfor\s)(?<!risk\sof\s)(?<!\?\s)(?<!cover\sfor\s)(?<!no\switnessed\s)(?:aspiration|aspirating)(?!\svs)(?!\svs.)(?!\?)(?!\ss\/p\sR\smainstem\sintubation)(?!\sprecautions)(?!\sand\sdrainage)(?!\w)'
    inhalation_pattern = '(?i)(?<!\w)(?:inhaled\sswimming\spool\swater|inhalation\sinjury)(?!\w)'
    pulm_contusion_pattern = '(?i)(?<!\w)(?:pulmonary|pulmoanry)\s+(?:contusion|contusions)(?!\w)'
    vasculitis_pattern = '(?i)(?<!\w)(?<!\?\s)(?<!less\slikely\s)(?:pulmonary\svasculitis|vasculitis)(?!\slabs)(?!\sworkup)(?!\sand\scarcinomatosis\sis\sless\slikely)(?!\shighly\sunlikely)(?!\sless\slikely)(?!\w)'
    drowning_pattern = '(?i)(?<!\w)(?:drowned|drowning)(?!\w)'
    
    sepsis_exclusion = ['r/o sepsis', 'no longer in', 'sepsis or cardiogenic shock', 'cardiogenic vs septic', 'potential for septic shock', 'cardiac vs septic', 'searching for evidence of',
                        'shock mixed cardiogenic/vasodilatory', 'sepsis vs cardiogemnic','shock-septic vs cargdiogenic', 'shock-septic vs cardiogenic', 'severe sepsis resolved',
                        'now off low dose vasopressor', 'shock-septic vs hypovolemic resolved', 'septic schock-resolved', 'shock-septic vs hypovolemic', 'septic shock off pressors', 
                        'cannot rule out septic/vasodilatory shock', 'previously vasoactive support for sepsis', 'billiary sepsis', 'septic shock secondary to esbl bacteremia',
                        'c/b septic joints', 'septic shock due to pseudomonas bacteremia', 'no evidence of hemorrhage or sepsis', 'no evidence of ongoing hemorrhage or sepsis',
                        'admitted with septic shock about months ago', 'history of aspergillus pneumonia/sepsis', 'mssa bactermia septic shock resolved', 'hypotension/sepsis vs hypercoaguable',
                        'septic shock ards copd exacerbation hcap bacteremia', 'septic shock found to have klebsiella bacteremia', 'suspected sepsis', 'septic emboli syndrome',
                        'takotsubo possible', 'takatsubo with possible', "septic shock suspect recurrent takatsubo's",'without active hemorrhage or sepsis', 'w/u for sepsis underway',
                        'also concern for sepsis', 'cytopenias likely due to sepsis', 'sedation sepsis', 'septic shock with picture', 'no signs of sepsis at this time', 'potentially sepsis',
                        'does not have septic shock', 'sepsis unlikely', 'no evidence of sepsis', 'does not have sig signs/sxs infection or sepsis', 'no source of sepsis', 'h/o urosepsis']
    # sepsis_pattern = '(?i)(?<!\w)(?:sepsis|urosepsis|septiuc|septic|ssepsis|sseptic|spetic)(?!\w)'
    
    shock_exclusion = ['is no longer in septic shock', 'chest compressions or shocks', 'shock now resolved', 'potential for septic', 'septic shock off pressors', 'septic shock with picture',
                       'no longer in shock', 'septic shock-resolved', 'septic shock due to pseudomonas bacteremia', 'shock has resolved', 'septic shock source uncertain', 'not in shock',
                       'weaned off pressors', 'septic shock due to e-coli bactermia resolved', 'most likely distributive liver failure vs sepsis', 'shock--improving', 'terminated by shock',
                       'icd interrogation reported two shocks', 'unlikely to be cardiogenic shock', 'cpr/shocks', 'shocks before rosc', 'underwent cardiopulmonary resuscitation and',
                       'now off low dose vasopressor requirement', 'schock-resolved', 'vs hypovolemic resolved', 'cannot rule out septic/vasodilatory', 'obstructive shock due to pe and septic',
                       'no operative intervention recommended except in shock situation']
    # shock_pattern = '(?i)(?<!\w)(?:shock|shocks|schock)(?!\w)'
    
    overdose_pattern = '(?i)(?<!\w)(?:overdose|drug\soverdose)(?!\w)'
    trauma_pattern = '(?i)(?<!\w)(?<!OGT\s)(?<!hx\sof\s)(?<!no\sreported\sfalls\sor\s)(?<!to\sprevent\s)(?<!thoracic\sand\s)(?<!per\s)(?<!spoke\swith\s)(?<!pmh\sremote\s)(?:trauma|traumatic|barotrauma|barotraumatic)(?!\sbay)(?!\/critical\scare\sservice)(?!\'s\sserivice\sblessing)(?!\w)'
    pancreatitis_pattern = '(?i)(?<!\w)pancreatitis(?!\w)'
    burn_pattern = '(?i)(?<!\w)(?:burn|burns)(?!\w)'
    trali_pattern = '(?<!\w)(?:TRALI|transfusion(?:-|\s)related\sacute\slung\sinjury|transfusion(?:-|\s)associated\sacute\slung\sinjury)(?!\w)'
    ards_pattern = '(?i)(?<!\w)(?:ards|acute\srespiratory\sdistress\ssyndrome|acute\slung\sinjury|ali|ardsnet|acute\shypoxemic\srespiratory\sfailure)(?!\w)'
    pregnant_pattern = '(?i)(?<!\w)(?:IUP|G\dP\d)(?!\w)'
    chf_pattern = '(?i)(?<!\w)(?<!h\/o\s)(?:congestive\sheart\sfailure|chf|diastolic\sHF|systolic\sHF|heart\sfailure|diastolic\sdysfunction|LV\sdysfunction|low\scardiac\soutput\ssyndrome|low\scardiac\soutput\ssyndrom|low\scardiac\souput\ssyndrome|low\sCO\sstate)(?!\swith\spreserved\sef)(?!\swas\sanother\spossible\sexplan)(?!\w)'
    cardiogenic_pattern = '(?i)(?<!\w)(?<!no\se\/o\sobstructive\sor\s)(?<!versus\s)(?<!rule\sout\s)(?<!ruled\sout\s)(?<!less\slikley\s)(?<!w\/o\sevidence\ssuggestive\sof\s\s)(?<!non\s)(?<!less\slikely\s)(?<!not\slikely\s)(?<!unlikely\sto\sbe\s)(?<!no\sclear\sevidence\sof\sacute\s)(?<!non-)(?<!than\s)(?<!no\sevidence\sof\s)(?:cardiogenic|cardigenic|cardiogemic|cardiac\spulmonary\sedema|cardiac\sand\sseptic\sshock|Shock.{1,15}suspect.{1,15}RV\sfailure)(?!\s\(not\slikely\sgiven\sECHO\sresults\))(?!\sshock\sunlikely)(?!\svs\.\sseptic)(?!\scomponent\salthough\sSvO2\snormal)(?!\w)'
    non_cardiogenic_pattern = '(?i)(?<!\w)(?<!cardiogenic\sor\s)(?<!cardiogenic\svs\s)(?<!and\/or\s)(?:non(?:-|\s)cardiogenic|noncardiogenic|non(?:-|\s)cardigenic|noncardigenic)(?!\svs\scardiogenic)(?!\w)'
    palliative_pattern = '(?i)(?<!\w)(?:palliative\scare|comfort\scare|withdraw\scare|comfort\salone|withdraw\ssupport\sin\sfavor\sof\spalliation)(?!\w)'
    cardiac_arrest_pattern = '(?i)(?<!\w)(?:arrest|cardiorespiratory arrest)(?!\w)'
    dementia_pattern = '(?i)(?<!\w)dementia(?!\w)'
    stroke_pattern = '(?i)(?<!\w)(?:stroke|strokes|cerebellar\shemorrhage|intracerebral\shemorrhage|BG\shemorrhage|cva|cerebrovascular\saccident|cefrebellar\sinfarcts\/basilar\sstenosis)(?!\w)'
    alcohol_pattern = '(?i)(?<!\w)(?:alcohol\swithdrawal|dts|dt''s|dt|alcohol\sdependence|alcohol\sabuse|etoh\sabuse|etoh\swithdrawal|etoh\swithdrawl|etoh\sw\/drawal|delirium\stremens)(?!\w)'
    seizure_pattern = '(?i)(?<!\w)(?<!no\se\/o\ssubclinical\s)(?:seizure|seizures)(?!\w)'
    ami_pattern = '(?i)(?<!\w)(?:ami|acute\smyocardial\sischemia|acute\smyocardial\sinfarction|myocardial\sinfarction|nstemi|non-st\selevation\smi|stemi|st\selevation\smi|acute\smi)(?!\w)'

    # Adding the flag/boolean columns
    attn_notes['pneumonia_matched'] = attn_notes.notes_text.str.contains(pneumonia_pattern)
    attn_notes['aspiration_matched'] = attn_notes.notes_text.str.contains(aspiration_pattern)
    attn_notes['inhalation_matched'] = attn_notes.notes_text.str.contains(inhalation_pattern)
    attn_notes['pulmonary_contusion_matched'] = attn_notes.notes_text.str.contains(pulm_contusion_pattern)
    attn_notes['vasculitis_matched'] = attn_notes.notes_text.str.contains(vasculitis_pattern)
    attn_notes['drowning_matched'] = attn_notes.notes_text.str.contains(drowning_pattern)
    
    # Sepsis and shock are a little special
    boolean_sepsis_list = []
    for seg_sepsis in attn_notes['seg_sepsis']:
        # If any of the exclusion phrases is found in the text snippet, make False. Otherwise, make True.
        boolean_sepsis_list.append(not any([phrase in seg_sepsis for phrase in sepsis_exclusion]) and seg_sepsis != "Invalid")
    attn_notes['sepsis_matched'] = boolean_sepsis_list
    # attn_notes['sepsis_matched'] = attn_notes.notes_text.str.contains(sepsis_pattern)
    
    boolean_shock_list = []
    for seg_shock in attn_notes['seg_shock']:
        # If any of the exclusion phrases is found in the text snippet, make False. Otherwise, make True.
        boolean_shock_list.append(not any([phrase in seg_shock for phrase in shock_exclusion]) and seg_shock != "Invalid")
    attn_notes['shock_matched'] = boolean_shock_list
    # attn_notes['shock_matched'] = attn_notes.notes_text.str.contains(shock_pattern)

    attn_notes['overdose_matched'] = attn_notes.notes_text.str.contains(overdose_pattern)
    attn_notes['trauma_matched'] = attn_notes.notes_text.str.contains(trauma_pattern)
    attn_notes['pancreatitis_matched'] = attn_notes.notes_text.str.contains(pancreatitis_pattern)
    attn_notes['burn_matched'] = attn_notes.notes_text.str.contains(burn_pattern)
    attn_notes['trali_matched'] = attn_notes.notes_text.str.contains(trali_pattern)
    attn_notes['ards_matched'] = attn_notes.notes_text.str.contains(ards_pattern)
    attn_notes['pregnant_matched'] = attn_notes.notes_text.str.contains(pregnant_pattern)  
    attn_notes['chf_matched'] = attn_notes.notes_text.str.contains(chf_pattern)
    attn_notes['cardiogenic_matched'] = attn_notes.notes_text.str.contains(cardiogenic_pattern)
    attn_notes['non_cardiogenic_matched'] = attn_notes.notes_text.str.contains(non_cardiogenic_pattern)
    attn_notes['palliative_matched'] = attn_notes.notes_text.str.contains(palliative_pattern)
    attn_notes['cardiac_arrest_matched'] = attn_notes.notes_text.str.contains(cardiac_arrest_pattern)
    attn_notes['dementia_matched'] = attn_notes.notes_text.str.contains(dementia_pattern)
    attn_notes['stroke_matched'] = attn_notes.notes_text.str.contains(stroke_pattern)
    attn_notes['alcohol_matched'] = attn_notes.notes_text.str.contains(alcohol_pattern)
    attn_notes['seizure_matched'] = attn_notes.notes_text.str.contains(seizure_pattern)
    attn_notes['ami_matched'] = attn_notes.notes_text.str.contains(ami_pattern)

    return attn_notes

In [None]:
# set plotting params
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
plt.style.reload_library()
rcparams = plots.stdrcparams1()
mpl.rcParams.update(rcparams)

In [None]:
basedir = Path("../..")
analysis_location = basedir / 'Analysis_data'
cohort = 'hospital_a_2013'
path = analysis_location / cohort
mimic3_path = analysis_location / "MIMIC_III" / "labeled_subset"

# Figures
figure_path = basedir / "Figures"

In [None]:
# Load risk factor file with notes
notes = pd.read_csv(path / "attending_notes_annotated.csv")
notes['notes_timestamp'] = pd.to_timedelta(notes['notes_timestamp'])

# Reading in MIMIC III
mimic_iii = pd.read_csv(mimic3_path / "attending_notes.csv")
mimic_iii['notes_timestamp'] = pd.to_datetime(mimic_iii['notes_timestamp'])

### Alright, text-match the notes!

In [None]:
notes = text_match_all_risk_factors(notes)

In [None]:
# This is to subset label columns into those text-matched by EDW and by me
annotated_sw = [i for i in list(notes.columns) if "sw" in i]
annotated_regex = [i for i in list(notes.columns) if "matched" in i]

In [None]:
counts = list(notes[annotated_sw].sum())
counts_regex = list(notes[annotated_regex].sum())

In [None]:
agg = []

annotations_nice = ['Pneumonia', 'Aspiration', 'Inhalation', 'Pulmonary contusion',
                    'Vasculitis', 'Drowning', 'Sepsis', 'Shock', 'Overdose', 'Trauma',
                    'Pancreatitis', 'Burn', 'TRALI', 'ARDS', 'Pregnant', 'Congestive Heart Failure',
                    'Cardiogenic', 'Noncardiogenic', 'Palliative', 'Cardiac arrest',
                    'Dementia', 'Stroke', 'Alcohol', 'Seizure', 'Acute myocardial infarction']

for i in range(len(annotated_sw)):
    temp = {'risk_factor': annotations_nice[i], 'counts': counts[i], 'method': "Labeled 'yes'"}
    agg.append(temp)
    
for i in range(len(annotated_regex)):
    temp = {'risk_factor': annotations_nice[i], 'counts': counts_regex[i], 'method': 'Regex-matched'}
    agg.append(temp)
    
for_plot = pd.DataFrame(agg).sort_values(by='counts', ascending=False)

In [None]:
fig, ax = plt.subplots()
sns.barplot(x='counts', y='risk_factor', hue='method',
            hue_order=["Labeled 'yes'", 'Regex-matched'],
            data=for_plot, errorbar=None, ax=ax)

ax.set_xlabel('Count of notes')
ax.set_ylabel('')
ax.grid(linestyle=':', axis='x')
ax.legend(loc='lower right', title=None, frameon=False)

plt.tight_layout()
# plt.savefig(figure_path / 'risk_factor_count.png')
plt.show()

In [None]:
# of_interest = [
#     'Pneumonia', 'Aspiration', 'Inhalation', 'Pulmonary contusion',
#     'Sepsis', 'Shock', 'Trauma', 'Pancreatitis', 'Congestive Heart Failure',
#     'Cardiogenic', 'Alcohol', 'Acute myocardial infarction'
#     ]

of_interest = [
    'Sepsis', 'Shock'
    ]

In [None]:
# of_interest_sw = [
#     'pneumonia_sw', 'aspiration_sw', 'inhalation_sw', 'pulmonary_contusion_sw',
#     'sepsis_sw', 'shock_sw', 'trauma_sw', 'pancreatitis_sw', 'chf_sw',
#     'cardiogenic_sw', 'alcohol_sw', 'ami_sw'
#     ]

# of_interest_regex = [
#     'pneumonia_matched', 'aspiration_matched', 'inhalation_matched', 'pulmonary_contusion_matched',
#     'sepsis_matched', 'shock_matched', 'trauma_matched', 'pancreatitis_matched', 'chf_matched',
#     'cardiogenic_matched', 'alcohol_matched', 'ami_matched'
#     ]

of_interest_sw = [
    'sepsis_sw', 'shock_sw'
    ]

of_interest_regex = [
    'sepsis_matched', 'shock_matched'
    ]

In [None]:
strings = np.asarray([['True negatives\n', 'False positives\n'],
                      ['False negatives\n', 'True positives\n']])

In [None]:
# i = 0

# fig2, ax2 = plt.subplots(4, 3, figsize=plots.stdfigsize(68, n_rows=4, n_cols=3, layout="double"))

# for theirs, mine in zip(of_interest_sw, of_interest_regex):
#     # fillna(0) is to compare regex against positively-annotated records by SW
#     # Another option is to use notnull() to compare regex against whether or not SW annotated (make 0 if her column is NaN, 1 otherwise)
    
#     y_true = notes[theirs].fillna(0).astype(int)
#     y_pred = notes[mine].astype(int)
#     cf = confusion_matrix(y_true, y_pred).transpose()[::-1, ::-1]
#     sns.heatmap(cf, fmt='d', annot=True, cmap='Blues', cbar=False, ax=ax2[i//3,i%3])
    
#     ax2[i//3,i%3].set_title(of_interest[i], fontweight='bold')
    
#     if i % 3 == 0:
#         ax2[i//3,i%3].set_ylabel("Regex-captured")
#         ax2[i//3,i%3].set_yticklabels(['Yes', 'No'], rotation=0)
#     else:
#         ax2[i//3,i%3].set_yticklabels([])
#         ax2[i//3,i%3].set_yticks([])
        
        
#     if i // 3 == 3:
#         ax2[i//3,i%3].set_xlabel("Labeled as 'yes'")
#         ax2[i//3,i%3].set_xticklabels(['Yes', 'No'])
#     else:
#         ax2[i//3,i%3].set_xticklabels([])
#         ax2[i//3,i%3].set_xticks([])
        
#     i += 1

# plt.tight_layout()
# # plt.savefig(figure_path/ 'SIfig5.png')
# plt.show()

In [None]:
i = 0

fig2, ax2 = plt.subplots(1, 2, figsize=plots.stdfigsize(75, n_cols=2, layout="double"))

for theirs, mine in zip(of_interest_sw, of_interest_regex):
    # fillna(0) is to compare regex against positively-annotated records by SW
    # Another option is to use notnull() to compare regex against whether or not SW annotated (make 0 if her column is NaN, 1 otherwise)
    
    y_true = notes[theirs].fillna(0).astype(int)
    y_pred = notes[mine].astype(int)
    cf = confusion_matrix(y_true, y_pred).transpose()[::-1, ::-1]
    sns.heatmap(cf, fmt='d', annot=True, cmap='Blues', cbar=False, ax=ax2[i%2])
    
    ax2[i%2].set_title(of_interest[i], fontweight='bold')
    
    if i % 2 == 0:
        ax2[i%2].set_ylabel("Regex-captured")
        ax2[i%2].set_yticklabels(['Yes', 'No'], rotation=0)
    else:
        ax2[i%2].set_yticklabels([])
        ax2[i%2].set_yticks([])
        
    ax2[i%2].set_xlabel("Labeled as 'yes'")
    ax2[i%2].set_xticklabels(['Yes', 'No'])
        
    i += 1

plt.tight_layout()
# plt.savefig(figure_path/ 'SIfig6.png')
plt.show()

### Repeating the process for MIMIC-III notes

In [None]:
mimic_iii = text_match_all_risk_factors(mimic_iii)

In [None]:
# This is to subset label columns into those labeled by Curt and by me
annotated_regex = [i for i in list(mimic_iii.columns) if "matched" in i]
annotated_curt = [i for i in list(mimic_iii.columns) if "curt" in i]

In [None]:
# Take away regex annotations that are not in Curt's annotations
to_remove = []
for element in annotated_regex:
    
    cleaned_element = element.replace("_matched", "")
    
    bool_array = []
    
    for curt_annotation in annotated_curt:            
        bool_array.append(cleaned_element in curt_annotation)
        
    if not any(bool_array):
        to_remove.append(element)
        
for element in to_remove:
    annotated_regex.remove(element)

In [None]:
annotated_regex.sort()
annotated_regex.remove("ards_matched")
annotated_regex.remove("pneumonia_matched")
annotated_regex.remove("shock_matched")
annotated_regex.remove("trauma_matched")
annotated_regex.remove("cardiogenic_matched")

In [None]:
annotated_curt = [
    'curt_aspiration_(1=yes)',
    'curt_burns_(1=yes)',
    'curt_pancreatitis_(1=yes)',
    'curt_pulmonary_contusion_(1=yes)',
    'curt_sepsis_(1=yes)',
    'curt_vasculitis_(1=yes)'
    ]

In [None]:
# Create a list with names of the risk factors formatted nicely
annotations_nice = [i.replace("_matched", "").replace("_", " ").capitalize() for i in annotated_regex]

In [None]:
i = 0

fig3, ax3 = plt.subplots(3, 2, figsize=plots.stdfigsize(44, n_rows=3, n_cols=2, layout="double"))

for theirs, mine in zip(annotated_curt, annotated_regex):
        
    y_true = mimic_iii[theirs].astype(int)
    y_pred = mimic_iii[mine].astype(int)
    
    cf = confusion_matrix(y_true, y_pred)
    cf_mod = cf.transpose()[::-1, ::-1]
    
    sns.heatmap(cf_mod, fmt='d', annot=True, cmap='Blues', cbar=False, ax=ax3[i//2,i%2])
    
    ax3[i//2,i%2].set_title(annotations_nice[i], fontweight='bold')
    
    if i % 2 == 0:
        ax3[i//2,i%2].set_yticklabels(['Yes', 'No'], rotation=0)
        ax3[i//2,i%2].set_ylabel("Regex-adjudicated")
    else:
        ax3[i//2,i%2].set_yticklabels([None, None])
        ax3[i//2,i%2].set_yticks([])
        
    if i // 2 == 2:
        ax3[i//2,i%2].set_xticklabels(['Yes', 'No'])
        ax3[i//2,i%2].set_xlabel("Ground truth")
    else:
        ax3[i//2,i%2].set_xticklabels([None, None])
        ax3[i//2,i%2].set_xticks([])
        
    i += 1

plt.tight_layout()
# plt.savefig(figure_path / 'fig6.png')
# plt.savefig(figure_path / 'fig6.pdf')
plt.show()

In [None]:
fig4, ax4 = plt.subplots(1, 1, figsize=plots.stdfigsize(0, layout="single"))

y_true = mimic_iii['curt_sepsis_(1=yes)'].astype(int)
y_pred = mimic_iii['sepsis_matched'].astype(int)
    
cf = confusion_matrix(y_true, y_pred).transpose()[::-1, ::-1]

strings = np.asarray([['True positives\n', 'False positives\n'],
                      ['False negatives\n', 'True negatives\n']])

labels = (np.asarray(["{0} {1:.0f}".format(string, value)
                      for string, value in zip(strings.flatten(),
                                               cf.flatten())])
         ).reshape(2, 2)
    
sns.heatmap(cf, fmt='', annot=labels, cmap='Blues', cbar=False, ax=ax4)
    
ax4.set_title("Sepsis", fontweight='bold')
ax4.set_ylabel("Regex-adjudicated")
ax4.set_xlabel("Ground truth")
ax4.tick_params(axis='both', bottom=False, left=False,
                labelbottom=False, labelleft=False)

plt.tight_layout()
# plt.savefig(figure_path / 'fig6_sepsis.png')
plt.show()

In [None]:
mimic_iii.loc[~mimic_iii['curt_trauma_(1=yes)'].astype(bool) & mimic_iii['trauma_matched'], ['notes_text', 'curt_trauma_(1=yes)', 'trauma_matched']]

In [None]:
print(mimic_iii.loc[743, 'notes_text'])