In [67]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pulp import *
import os, glob

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/conorcorbin/.config/gcloud/application_default_credentials.json' 
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery




### First We'll find misses on the level of the organism instead of the encounter
This requires us to use code from the label generation notebook, but we won't collapse to the encounter level at the end.  

In [68]:
# Define Fill in And Processing Functions
def fill_in_cefazolin(x):
    
    # If not missing just return what it is. 
    if x.Cefazolin == 'Susceptible' or x.Cefazolin == 'Resistant':
        return x.Cefazolin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Cefazolin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # If anything resistant to Cefepime or Ceftriaxone and Cefazolin not listed, assume resistant to Cefazolin
    if x.Cefepime == "Resistant" or x.Ceftriaxone == 'Resistant':
        return "Resistant"
    
    # For anything STREPTOCOCCUS related except STREPTOCOCCUS PNEUMONIAE  - refer to penicillin if not listed 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism) and x.organism != 'STREPTOCOCCUS PNEUMONIAE':
        if x.Penicillin is not None:
            return x.Penicillin
    
    # Check For MRSA in STAPH AUREUS if not listed
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_ceftriaxone(x):
    
    # If not missing just return what it is. 
    if x.Ceftriaxone == 'Susceptible' or x.Ceftriaxone == 'Resistant':
        return x.Ceftriaxone
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Ceftriaxone' and `Always Resistant` == 1")['bug'].values)
    
    # If anything resistant to Cefepime, assume resistant to Ceftriaxone
    if x.Cefepime == "Resistant":
        return "Resistant"
    
    # If susceptible to Cefazolin, assume susceptible to Ceftriaxone
    if x.Cefazolin == "Susceptible": 
        return "Susceptible"
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"

    # For anything STREPTOCOCCUS related - refer to penicillin if not listed
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_cefepime(x):
    
    # If not missing just return what it is. 
    if x.Cefepime == 'Susceptible' or x.Cefepime == 'Resistant':
        return x.Cefepime
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Cefepime' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # If susceptible to Cefazolin or Ceftriaxone assume susceptible to Cefepime
    if x.Cefazolin == "Susceptible" or x.Ceftriaxone == "Susceptible":
        return "Susceptible"
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_zosyn(x):
     
    # If not missing just return what it is. 
    if x.Zosyn == 'Susceptible' or x.Zosyn == 'Resistant':
        return x.Zosyn
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules
                 .query("antibiotic == 'Piperacillin/Tazobactam' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If susceptible to ampicillin, then zosyn susceptible - useless rule because of the following rule
    if x.Ampicillin == "Susceptible":
        return x.Ampicillin
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"


def fill_in_vancomycin(x):
    # If not missing just return what it is. 
    if x.Vancomycin == 'Susceptible' or x.Vancomycin == 'Resistant':
        return x.Vancomycin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Vancomycin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Susceptible"

def fill_in_meropenem(x):
    # If not missing just return what it is. 
    if x.Meropenem == 'Susceptible' or x.Meropenem == 'Resistant':
        return x.Meropenem
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Meropenem' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If susceptible to ampicillin, then meropenem susceptible
    if x.Ampicillin == "Susceptible": # not the case with Enteroccocus, but this should be in alwasys resistant list above. 
        return x.Ampicillin # also - useless rule because of rule below
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_ciprofloxacin(x):
    # If not missing just return what it is. 
    if x.Ciprofloxacin == 'Susceptible' or x.Ciprofloxacin == 'Resistant':
        return x.Ciprofloxacin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Ciprofloxacin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # If Resistant to Levofloxacin, resistant to Cipro
    if x.Levofloxacin == "Resistant":
        return "Resistant"
    
    # Check for MRSA - assume same as oxacillin if not listed
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_ampicillin(x):
    
    # If not missing just return what it is. 
    if x.Ampicillin == 'Susceptible' or x.Ampicillin == 'Resistant':
        return x.Ampicillin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Ampicillin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
        
    # Check for MRSA - assume same as oxacillin if not listed
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"


def combine_labels(arr):
    """
    Sometimes organisms have multiple suscept labels. When they do, if any of them are not one of
    Susceptible, Positive, or Susceptible - Dose Dependent then we say organism is resistant to 
    said antibiotic
    """
    for a in arr:
        if a not in ['Susceptible', 'Positive', 'Susceptible - Dose Dependent']:
            return 'Resistant'
    return 'Susceptible'

def combine_antibiotic_syns(x):
    """
    Antibiotics often are given different names in the AST tables, this function combines
    synnonyms so that we don't have to refer to multiple names in downstream analysis. 
    """
    if x == 'Aztreonam.':
        return 'Aztreonam'
    elif x == 'Cefazolin..':
        return 'Cefazolin'
    elif x == 'Ceftazidime.':
        return 'Ceftazidime'
    elif x in ('Ceftriaxone (Meningeal)', 'Ceftriaxone (Non-Meningeal)', 'Ceftriaxone.'):
        return 'Ceftriaxone'
    elif x in ('Ciprofloxacin.'):
        return 'Ciprofloxacin'
    elif x == 'Gentamicin 500 mcg/ml.':
        return 'Gentamicin'
    elif x in ('Oxacillin Screen', 'Oxacillin.'):
        return 'Oxacillin'
    elif x in ('PENICILLIN G (MENINGEAL)','PENICILLIN G (NON-MENINGEAL)', 'PENICILLIN V (ORAL)', 'Penicillin..'):
        return 'Penicillin'
    elif x == 'Trimethoprim/Sulfamethoxazole.':
        return 'Trimethoprim/Sulfamethoxazole'
    else:
        return x

In [69]:
import pdb
# Query positive culture data
query = """
SELECT DISTINCT 
    cohort.pat_enc_csn_id_coded, cults.order_proc_id_coded, cults.description,
    cs.sens_organism_sid, cs.line, cs.organism,
    cs.antibiotic, cs.suscept, cs.sensitivity_value, cs.sens_ref_range 
FROM 
    `mining-clinical-decisions.abx.culture_orders_within_24_hrs` cults
INNER JOIN 
    `mining-clinical-decisions.abx.cohort_not_infected_rules` cohort
USING 
    (pat_enc_csn_id_coded)
INNER JOIN 
    `shc_core.culture_sensitivity` cs
USING 
    (order_proc_id_coded)
WHERE 
    organism not LIKE "%CANDIDA%"
AND 
    organism not in ('COAG NEGATIVE STAPHYLOCOCCUS', 'HAEMOPHILUS INFLUENZAE', 'HAEMOPHILUS PARAINFLUENZAE')
ORDER BY 
    cohort.pat_enc_csn_id_coded, cults.order_proc_id_coded, cs.line
"""

query_job = client.query(query)
df=query_job.to_dataframe()
df.head()

Unnamed: 0,pat_enc_csn_id_coded,order_proc_id_coded,description,sens_organism_sid,line,organism,antibiotic,suscept,sensitivity_value,sens_ref_range
0,131003775398,357750415,URINE CULTURE,ZZ00,1,KLEBSIELLA OXYTOCA,Ampicillin,Resistant,>=32,
1,131003775398,357750415,URINE CULTURE,ZZ00,2,KLEBSIELLA OXYTOCA,Cefazolin,Susceptible,8,
2,131003775398,357750415,URINE CULTURE,ZZ00,3,KLEBSIELLA OXYTOCA,Piperacillin/Tazobactam,Susceptible,<=4,
3,131003775398,357750415,URINE CULTURE,ZZ00,4,KLEBSIELLA OXYTOCA,Gentamicin,Susceptible,<=1,
4,131003775398,357750415,URINE CULTURE,ZZ00,5,KLEBSIELLA OXYTOCA,Ciprofloxacin,Susceptible,<=0.25,


In [71]:
filter_cols = ['example', 'pat_enc_csn_id_coded', 'organism', 'description', 'order_proc_id_coded', 'sens_organism_sid',
               'Cefepime', 'Ceftriaxone', 'Cefazolin', 'Vancomycin', 'Meropenem', 'Zosyn',
               'Ampicillin', 'Penicillin', 'Oxacillin', 'Ciprofloxacin', 'Levofloxacin']

df_wide = (df
    .assign(example=lambda x: x.pat_enc_csn_id_coded.map(str) + ', ' \
            + x.order_proc_id_coded.map(str) + ', ' + x.sens_organism_sid + ', ' + x.organism + ', ' + x.description,
            antibiotic=lambda x: [combine_antibiotic_syns(a) for a in x.antibiotic])
    .groupby(['example', 'antibiotic']) 
    .agg({'suscept' : lambda x: combine_labels(x)})          
    .reset_index()
    .pivot(index='example', columns='antibiotic', values='suscept')
    .reset_index()
    .assign(organism = lambda x: [a.split(', ')[3] for a in x.example],
            order_proc_id_coded = lambda x: [a.split(', ')[1] for a in x.example],
            sens_organism_sid = lambda x: [a.split(', ')[2] for a in x.example],
            pat_enc_csn_id_coded = lambda x: [a.split(', ')[0] for a in x.example],
            description = lambda x: [a.split(', ')[4] for a in x.example])
    .rename(columns={'Piperacillin/Tazobactam' : 'Zosyn'})
)
df_wide = df_wide[filter_cols]
df_wide.head()


antibiotic,example,pat_enc_csn_id_coded,organism,description,order_proc_id_coded,sens_organism_sid,Cefepime,Ceftriaxone,Cefazolin,Vancomycin,Meropenem,Zosyn,Ampicillin,Penicillin,Oxacillin,Ciprofloxacin,Levofloxacin
0,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYT...",131003775398,KLEBSIELLA OXYTOCA,URINE CULTURE,357750415,ZZ00,,,Susceptible,,,Susceptible,Resistant,,,Susceptible,Susceptible
1,"131003775398, 357750417, ZZ00, KLEBSIELLA OXYT...",131003775398,KLEBSIELLA OXYTOCA,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLES),357750417,ZZ00,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Resistant,,,Susceptible,Susceptible
2,"131003775398, 357750417, ZZ01, PROTEUS MIRABIL...",131003775398,PROTEUS MIRABILIS,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLES),357750417,ZZ01,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Susceptible,,,Susceptible,Susceptible
3,"131004006894, 358490468, ZZ00, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,FLUID CULTURE AND GRAM STAIN,358490468,ZZ00,,Susceptible,,,,,,,,,
4,"131004006894, 358490468, ZZ01, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,FLUID CULTURE AND GRAM STAIN,358490468,ZZ01,,Susceptible,,,,,,Susceptible,,,


In [72]:
df_wide = (df_wide
    .assign(Ampicillin=lambda x: df_wide.apply(lambda x: fill_in_ampicillin(x), axis=1),
            Cefepime=lambda x: df_wide.apply(lambda x: fill_in_cefepime(x), axis=1),
            Ceftriaxone=lambda x: df_wide.apply(lambda x: fill_in_ceftriaxone(x), axis=1),
            Cefazolin=lambda x: df_wide.apply(lambda x: fill_in_cefazolin(x), axis=1),
            Ciprofloxacin=lambda x: df_wide.apply(lambda x: fill_in_ciprofloxacin(x), axis=1),
            Zosyn=lambda x: df_wide.apply(lambda x: fill_in_zosyn(x), axis=1),
            Vancomycin=lambda x: df_wide.apply(lambda x: fill_in_vancomycin(x), axis=1),
            Meropenem=lambda x: df_wide.apply(lambda x: fill_in_meropenem(x), axis=1),
            Vancomycin_Ceftriaxone=lambda x: ["Susceptible" if a == "Susceptible" 
                                              or b == "Susceptible" else "Resistant"
                                              for a, b in zip(x.Vancomycin, x.Ceftriaxone)],
            Vancomycin_Cefepime=lambda x: ["Susceptible" if a == "Susceptible" 
                                           or b == "Susceptible" else "Resistant"
                                           for a, b in zip(x.Vancomycin, x.Cefepime)],
            Vancomycin_Zosyn=lambda x: ["Susceptible" if a == "Susceptible" 
                                        or b == "Susceptible" else "Resistant"
                                        for a, b in zip(x.Vancomycin, x.Zosyn)],
            Vancomycin_Meropenem=lambda x: ["Susceptible" if a == "Susceptible" 
                                            or b == "Susceptible" else "Resistant"
                                            for a, b in zip(x.Vancomycin, x.Meropenem)])
    .reset_index()
)
df_wide.head()


antibiotic,index,example,pat_enc_csn_id_coded,organism,description,order_proc_id_coded,sens_organism_sid,Cefepime,Ceftriaxone,Cefazolin,...,Zosyn,Ampicillin,Penicillin,Oxacillin,Ciprofloxacin,Levofloxacin,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Vancomycin_Zosyn,Vancomycin_Meropenem
0,0,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYT...",131003775398,KLEBSIELLA OXYTOCA,URINE CULTURE,357750415,ZZ00,Susceptible,Susceptible,Susceptible,...,Susceptible,Resistant,,,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
1,1,"131003775398, 357750417, ZZ00, KLEBSIELLA OXYT...",131003775398,KLEBSIELLA OXYTOCA,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLES),357750417,ZZ00,Susceptible,Susceptible,Susceptible,...,Susceptible,Resistant,,,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
2,2,"131003775398, 357750417, ZZ01, PROTEUS MIRABIL...",131003775398,PROTEUS MIRABILIS,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLES),357750417,ZZ01,Susceptible,Susceptible,Susceptible,...,Susceptible,Susceptible,,,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
3,3,"131004006894, 358490468, ZZ00, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,FLUID CULTURE AND GRAM STAIN,358490468,ZZ00,,Susceptible,,...,,Susceptible,,,Resistant,,Susceptible,Susceptible,Susceptible,Susceptible
4,4,"131004006894, 358490468, ZZ01, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,FLUID CULTURE AND GRAM STAIN,358490468,ZZ01,Susceptible,Susceptible,Susceptible,...,Susceptible,Susceptible,Susceptible,,Resistant,,Susceptible,Susceptible,Susceptible,Susceptible


In [79]:
# Fill in any NA's with Susceptible - nancy watz rule
abx_map = {'Ceftriaxone' : "CEFTRIAXONE",
           'Vancomycin_Zosyn' : "PIPERACILLIN-TAZOBACTAM VANCOMYCIN",
           'Zosyn' : "PIPERACILLIN-TAZOBACTAM",
           'Vancomycin_Ceftriaxone' : "CEFTRIAXONE VANCOMYCIN",
           'Vancomycin_Cefepime' : "CEFEPIME VANCOMYCIN",
           'Cefepime' : "CEFEPIME",
           'Vancomycin' :  "VANCOMYCIN",
           'Meropenem' : "MEROPENEM",
           'Vancomycin_Meropenem' : "MEROPENEM VANCOMYCIN",
           'Cefazolin' : "CEFAZOLIN",
           'Ciprofloxacin' : "CIPROFLOXACIN",
           'Ampicillin' : 'AMPICILLIN'
          }
df_wide[list(abx_map.keys())] = df_wide[list(abx_map.keys())].fillna('Susceptible')
df_ast_labels_by_bug = df_wide[['pat_enc_csn_id_coded', 'organism', 'order_proc_id_coded', 'sens_organism_sid'] + list(abx_map.keys())]
df_ast_labels_by_bug.head()


antibiotic,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Ceftriaxone,Vancomycin_Zosyn,Zosyn,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Cefepime,Vancomycin,Meropenem,Vancomycin_Meropenem,Cefazolin,Ciprofloxacin,Ampicillin
0,131003775398,KLEBSIELLA OXYTOCA,357750415,ZZ00,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Resistant
1,131003775398,KLEBSIELLA OXYTOCA,357750417,ZZ00,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Resistant
2,131003775398,PROTEUS MIRABILIS,357750417,ZZ01,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
3,131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ00,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible
4,131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ01,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible


In [15]:
abx_map = {'Ceftriaxone' : "CEFTRIAXONE",
           'Vancomycin_Zosyn' : "PIPERACILLIN-TAZOBACTAM VANCOMYCIN",
           'Zosyn' : "PIPERACILLIN-TAZOBACTAM",
           'Vancomycin_Ceftriaxone' : "CEFTRIAXONE VANCOMYCIN",
           'Vancomycin_Cefepime' : "CEFEPIME VANCOMYCIN",
           'Cefepime' : "CEFEPIME",
           'Vancomycin' :  "VANCOMYCIN",
           'Meropenem' : "MEROPENEM",
           'Vancomycin_Meropenem' : "MEROPENEM VANCOMYCIN",
           'Cefazolin' : "CEFAZOLIN",
           'Ciprofloxacin' : "CIPROFLOXACIN",
           'Ampicillin' : 'AMPICILLIN'
          }
abx_map_inverse = {abx_map[key] : key for key in abx_map}
abx_map_inverse['CEFTRIAXONE PIPERACILLIN-TAZOBACTAM VANCOMYCIN'] = 'Vancomycin_Zosyn'
# abx_map_inverse['LEVOFLOXACIN PIPERACILLIN-TAZOBACTAM VANCOMYCIN'] = 'Vancomycin_Zosyn'
abx_map_inverse['AZITHROMYCIN PIPERACILLIN-TAZOBACTAM VANCOMYCIN'] = 'Vancomycin_Zosyn'
# abx_map_inverse['MEROPENEM PIPERACILLIN-TAZOBACTAM VANCOMYCIN'] = 'Vancomycin_Meropenem'
abx_map_inverse['AZITHROMYCIN CEFTRIAXONE'] = 'Ceftriaxone'

query = """
SELECT
    om.anon_id, om.pat_enc_csn_id_coded, om.order_med_id_coded, l.index_time, om.med_description,
    l.Ampicillin, l.Ciprofloxacin, l.Cefazolin, l.Ceftriaxone, l.Cefepime, l.Zosyn, l.Vancomycin,
    l.Meropenem, l.Vancomycin_Meropenem, l.Vancomycin_Zosyn, l.Vancomycin_Cefepime, l.Vancomycin_Ceftriaxone
FROM
    `mining-clinical-decisions.abx.abx_orders_given_and_stopped` om
INNER JOIN 
    `mining-clinical-decisions.abx.final_ast_labels` l
USING
    (pat_enc_csn_id_coded)
WHERE
    om.was_given = 1
ORDER BY 
    om.anon_id, om.pat_enc_csn_id_coded, om.order_time
"""
query_job = client.query(query)
df_abx = query_job.result().to_dataframe()
df_abx.head()

# Lambda that aggregates Antibiotic orders after we've grouped by CSN
concat_abx = lambda x : ' '.join(np.unique(sorted([a for a in x])))

# Aggregate abx orders
df_drugs = (df_abx
    .assign(med_description=lambda x: [a.split(' ')[0] for a in x.med_description]) # Only Take first word (abx)
    .assign(med_description=lambda x: [(a.replace('PIPERACILLIN-TAZOBACTAM-DEXTRS','PIPERACILLIN-TAZOBACTAM')
                                        .replace('VANCOMYCIN-WATER', 'VANCOMYCIN'))
                                    for a in x.med_description])
    .assign(year=lambda x: x.index_time.dt.year) # get year of each CSN - used to filter later on
    .groupby('pat_enc_csn_id_coded')
    .agg({'med_description' : concat_abx,
        'year' : 'first',
        'Ampicillin' : 'first',
        'Ciprofloxacin' : 'first',
        'Cefazolin' : 'first',
        'Ceftriaxone' : 'first',
        'Cefepime' : 'first',
        'Zosyn' : 'first',
        'Vancomycin' : 'first',
        'Meropenem' : 'first',
        'Vancomycin_Ceftriaxone' : 'first',
        'Vancomycin_Cefepime' : 'first',
        'Vancomycin_Zosyn' : 'first',
        'Vancomycin_Meropenem' : 'first'})
    .reset_index()
    # Only look at test set data and CSNs where allowed antibiotic selection was administered
    .query("year == 2019 and med_description in @abx_map_inverse", engine='python') 
    .assign(med_description=lambda x: [abx_map_inverse[a] for a in x.med_description])
)

In [80]:
### Solve linear programming formulation so we can stratify by misses of optimized procedure as well
import sys
sys.path.insert(0, "../decision_alg")
from DecisionAnalysis import AbxDecisionMaker, load_predictions
from integer_programming import get_clinician_prescribing_patterns

abx_settings = {"Ceftriaxone" : 404,
    "Vancomycin_Zosyn" :  149,
    "Zosyn" : 102,
    "Vancomycin_Ceftriaxone" : 31,
    "Vancomycin_Cefepime" : 23,
    "Cefepime" : 14,
    "Vancomycin" : 13,
    "Vancomycin_Meropenem" : 9,
    "Meropenem" : 9,
    "Cefazolin" : 8,
    "Ciprofloxacin" : 8,
    "Ampicillin" : 0,
    }

df_predictions = load_predictions()
df_drugs = get_clinician_prescribing_patterns()
opt = AbxDecisionMaker(df_predictions, df_drugs, abx_settings)

In [81]:
opt.solve_and_assign()

In [82]:
# Map med_description to df_ast_labels_by_bug
df_ast_labels_by_bug = (df_ast_labels_by_bug
    .assign(pat_enc_csn_id_coded = lambda x: x.pat_enc_csn_id_coded.astype(int))
    .merge(opt.df[['pat_enc_csn_id_coded', 'med_description', 'IP_med_description']],
           how='inner',
           on='pat_enc_csn_id_coded')
)
df_ast_labels_by_bug.head()


Unnamed: 0,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Ceftriaxone,Vancomycin_Zosyn,Zosyn,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Cefepime,Vancomycin,Meropenem,Vancomycin_Meropenem,Cefazolin,Ciprofloxacin,Ampicillin,med_description,IP_med_description
0,131260812263,CITROBACTER FREUNDII COMPLEX,588888988,1,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Resistant,Susceptible,Resistant,Ceftriaxone,Ceftriaxone
1,131260812263,PROTEUS MIRABILIS,588888988,2,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Ceftriaxone,Ceftriaxone
2,131260883970,ENTEROCOCCUS SPECIES,589085205,1,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Resistant,Susceptible,Resistant,Susceptible,Susceptible,Vancomycin_Zosyn,Ceftriaxone
3,131261001599,ESCHERICHIA COLI,589354826,1,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Zosyn,Vancomycin_Zosyn
4,131261014293,STAPHYLOCOCCUS AUREUS,589441192,1,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Ceftriaxone,Ceftriaxone


In [83]:
# Abx labels to binary
to_binary = lambda x: 1 if x == "Susceptible" else 0
for key in abx_map.keys():
    df_ast_labels_by_bug[key] = [1 if x == "Susceptible" else 0 for x in df_ast_labels_by_bug[key].values]
df_ast_labels_by_bug.head()

Unnamed: 0,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Ceftriaxone,Vancomycin_Zosyn,Zosyn,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Cefepime,Vancomycin,Meropenem,Vancomycin_Meropenem,Cefazolin,Ciprofloxacin,Ampicillin,med_description,IP_med_description
0,131260812263,CITROBACTER FREUNDII COMPLEX,588888988,1,1,1,1,1,1,1,0,1,1,0,1,0,Ceftriaxone,Ceftriaxone
1,131260812263,PROTEUS MIRABILIS,588888988,2,1,1,1,1,1,1,0,1,1,1,1,1,Ceftriaxone,Ceftriaxone
2,131260883970,ENTEROCOCCUS SPECIES,589085205,1,0,1,1,1,1,0,1,0,1,0,1,1,Vancomycin_Zosyn,Ceftriaxone
3,131261001599,ESCHERICHIA COLI,589354826,1,1,1,1,1,1,1,0,1,1,1,1,0,Zosyn,Vancomycin_Zosyn
4,131261014293,STAPHYLOCOCCUS AUREUS,589441192,1,1,1,1,1,1,1,1,1,1,1,1,1,Ceftriaxone,Ceftriaxone


In [86]:
def compute_was_covered(x, decision_column='med_description'):
    """
    Given med description, find appropriate label column and return whether patient was covered during CSN
    Returns "Not in abx options" if abx regimen isn't in our set of 12 options - useful for filtering later
    """
    if decision_column == 'med_description':
        med_description = x.med_description
    elif decision_column == 'random_med_description':
        med_description = x.random_med_description
    elif decision_column == 'IP_med_description':
        med_description = x.IP_med_description

    return x[med_description]

df_ast_labels_by_bug = (df_ast_labels_by_bug
    .assign(dr_covered=df_ast_labels_by_bug.apply(lambda x: compute_was_covered(x), axis=1))
    .assign(alg_covered=df_ast_labels_by_bug.apply(lambda x: compute_was_covered(x, 'IP_med_description'), axis=1))
)

In [88]:
### Query culture sensitivity table for culture types
query = """
SELECT DISTINCT
    description, order_proc_id_coded 
FROM 
    `mining-clinical-decisions.abx.culture_orders_within_24_hrs` cult_orders
"""
query_job = client.query(query)
df_culture_types = query_job.result().to_dataframe()
df_culture_types['order_proc_id_coded'] = df_culture_types['order_proc_id_coded'].astype(int)

In [89]:
df_ast_labels_by_bug.head()

Unnamed: 0,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Ceftriaxone,Vancomycin_Zosyn,Zosyn,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Cefepime,Vancomycin,Meropenem,Vancomycin_Meropenem,Cefazolin,Ciprofloxacin,Ampicillin,med_description,IP_med_description,dr_covered,alg_covered
0,131260812263,CITROBACTER FREUNDII COMPLEX,588888988,1,1,1,1,1,1,1,0,1,1,0,1,0,Ceftriaxone,Ceftriaxone,1,1
1,131260812263,PROTEUS MIRABILIS,588888988,2,1,1,1,1,1,1,0,1,1,1,1,1,Ceftriaxone,Ceftriaxone,1,1
2,131260883970,ENTEROCOCCUS SPECIES,589085205,1,0,1,1,1,1,0,1,0,1,0,1,1,Vancomycin_Zosyn,Ceftriaxone,1,0
3,131261001599,ESCHERICHIA COLI,589354826,1,1,1,1,1,1,1,0,1,1,1,1,0,Zosyn,Vancomycin_Zosyn,1,1
4,131261014293,STAPHYLOCOCCUS AUREUS,589441192,1,1,1,1,1,1,1,1,1,1,1,1,1,Ceftriaxone,Ceftriaxone,1,1


In [90]:
df_ast_labels_by_bug['order_proc_id_coded'] = df_ast_labels_by_bug['order_proc_id_coded'].astype(int)
df_ast_labels_by_bug = (df_ast_labels_by_bug
                        .merge(df_culture_types, on='order_proc_id_coded', how='left')
                       )
df_ast_labels_by_bug.head()

Unnamed: 0,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Ceftriaxone,Vancomycin_Zosyn,Zosyn,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Cefepime,...,Meropenem,Vancomycin_Meropenem,Cefazolin,Ciprofloxacin,Ampicillin,med_description,IP_med_description,dr_covered,alg_covered,description
0,131260812263,CITROBACTER FREUNDII COMPLEX,588888988,1,1,1,1,1,1,1,...,1,1,0,1,0,Ceftriaxone,Ceftriaxone,1,1,URINE CULTURE
1,131260812263,PROTEUS MIRABILIS,588888988,2,1,1,1,1,1,1,...,1,1,1,1,1,Ceftriaxone,Ceftriaxone,1,1,URINE CULTURE
2,131260883970,ENTEROCOCCUS SPECIES,589085205,1,0,1,1,1,1,0,...,0,1,0,1,1,Vancomycin_Zosyn,Ceftriaxone,1,0,URINE CULTURE
3,131261001599,ESCHERICHIA COLI,589354826,1,1,1,1,1,1,1,...,1,1,1,1,0,Zosyn,Vancomycin_Zosyn,1,1,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLE)
4,131261014293,STAPHYLOCOCCUS AUREUS,589441192,1,1,1,1,1,1,1,...,1,1,1,1,1,Ceftriaxone,Ceftriaxone,1,1,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLE)


#### Get most common doctor misses

In [91]:
import pdb
        
def pipe_func(df):
    df = df.assign(organism_and_culture = df.apply(lambda x: x['description'] + ' ' + x['organism'], axis=1))
    return df

def format_description(x):
    if "URINE" in x:
        return 'Urine Culture'
    elif "BLOOD" in x:
        return "Blood Culture"
    else:
        return "Other Fluid Culture"
df_frequent_misses = (df_ast_labels_by_bug
.query("dr_covered == 0", engine='python')
.assign(description = lambda x: [format_description(a) for a in x.description])
.assign(organism = lambda x: [a.capitalize() for a in x.organism])
.groupby(['description', 'organism'])
.agg(num_misses=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values(['description', 'num_misses'], ascending=False)
)
df_frequent_misses

Unnamed: 0,description,organism,num_misses
21,Urine Culture,Enterococcus species,38
22,Urine Culture,Escherichia coli,23
19,Urine Culture,Enterococcus faecalis,12
30,Urine Culture,Pseudomonas aeruginosa,9
18,Urine Culture,Enterobacter cloacae complex,7
25,Urine Culture,Klebsiella pneumoniae,6
20,Urine Culture,Enterococcus faecium,3
17,Urine Culture,Citrobacter freundii complex,2
31,Urine Culture,Staph aureus {mrsa},2
23,Urine Culture,Klebsiella aerogenes,1


#### Get most common alg misses

In [92]:
df_frequent_misses_alg = (df_ast_labels_by_bug
.query("alg_covered == 0", engine='python')
.assign(description = lambda x: [format_description(a) for a in x.description])
.assign(organism = lambda x: [a.capitalize() for a in x.organism])
.groupby(['description', 'organism'])
.agg(num_misses=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values(['description', 'num_misses'], ascending=False)
)
df_frequent_misses_alg

Unnamed: 0,description,organism,num_misses
24,Urine Culture,Enterococcus species,24
25,Urine Culture,Escherichia coli,23
22,Urine Culture,Enterococcus faecalis,8
32,Urine Culture,Pseudomonas aeruginosa,6
21,Urine Culture,Enterobacter cloacae complex,5
28,Urine Culture,Klebsiella pneumoniae,4
19,Urine Culture,Citrobacter freundii complex,2
23,Urine Culture,Enterococcus faecium,2
20,Urine Culture,Citrobacter species,1
26,Urine Culture,Klebsiella aerogenes,1


In [105]:
map_high_level_organism = {
    'Escherichia coli' : 'Lactose Fermenting GNRs',
    'Enterococcus species' : 'Enterococcus species',
    'Enterococcus faecalis' : 'Enterococcus species',
    'Enterobacter cloacae complex' : "Lactose Fermenting GNRs",
    'Klebsiella pneumoniae' : "Lactose Fermenting GNRs",
    'Morganella morganii' : "Non Lactose Fermenting GNRs",
    'Enterococcus faecium' : 'Enterococcus species',
    'Citrobacter freundii complex' : "Lactose Fermenting GNRs",
    'Mucoid pseudomonas aeruginosa' : 'Non Lactose Fermenting GNRs',
    'Proteus mirabilis' : 'Non Lactose Fermenting GNRs',
    'Proteus vulgaris group' : 'Non Lactose Fermenting GNRs',
    'Zzzcitrobacter amalonaticus' : 'Non Lactose Fermenting GNRs', 
    'Elizabethkingia species' : 'Non Lactose Fermenting GNRs',
    'Serratia marcescens' : 'Non Lactose Fermenting GNRs',
    'Stenotrophomonas maltophilia' : 'Non Lactose Fermenting GNRs',
    'Non fermenting gram negative rods' : 'Non Lactose Fermenting GNRs',
    'Streptococcus parasanguinis' : 'Streptococcus species',
    'Streptococcus mitis group' : 'Streptococcus species',
    'Viridans group streptococci not s. pneumoniae' : 'Streptococcus species',
    'Pseudomonas aeruginosa' : 'Non Lactose Fermenting GNRs',
    'Klebsiella aerogenes' : 'Lactose Fermenting GNRs',
    'Klebsiella oxytoca' : 'Lactose Fermenting GNRs',
    'Staph aureus {mrsa}' : 'MRSA',
    'Streptococcus species' : 'Streptococcus species',
    'Enterococcus gallinarum' : 'Enterococcus species',
    'Streptococcus sanguinis' : 'Streptococcus species',
    'Streptococcus salivarius group' : 'Streptococcus species',
    'Citrobacter species' : 'Lactose Fermenting GNRs',
}

### Group doctor misses into high level organisms

In [93]:
df_frequent_misses_high_level = (df_ast_labels_by_bug
.query("dr_covered == 0", engine='python')
.assign(description = lambda x: [format_description(a) for a in x.description])
.assign(organism = lambda x: [a.capitalize() for a in x.organism])
.assign(organism=lambda x: [map_high_level_organism[a] for a in x.organism])
.groupby(['description', 'organism'])
.agg(num_misses=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values(['num_misses'], ascending=False)
.rename(columns={'description' : 'Culture Type',
                 'organism' : 'Infection Type',
                 'num_misses' : 'Number of Misses'})
)
df_frequent_misses_high_level.to_csv('doctor_misses_by_infection_type.csv', index=None)
df_frequent_misses_high_level

Unnamed: 0,Culture Type,Infection Type,Number of Misses
6,Urine Culture,Enterococcus species,52
7,Urine Culture,Lactose Fermenting GNRs,39
9,Urine Culture,Non Lactose Fermenting GNRs,14
1,Blood Culture,Lactose Fermenting GNRs,8
2,Blood Culture,Non Lactose Fermenting GNRs,5
5,Other Fluid Culture,Non Lactose Fermenting GNRs,4
0,Blood Culture,Enterococcus species,3
3,Blood Culture,Streptococcus species,3
8,Urine Culture,MRSA,2
4,Other Fluid Culture,Enterococcus species,1


### Group Alg misses into high level organisms

In [106]:
df_frequent_misses_alg_high_level = (df_ast_labels_by_bug
.query("alg_covered == 0", engine='python')
.assign(description = lambda x: [format_description(a) for a in x.description])
.assign(organism = lambda x: [a.capitalize() for a in x.organism])
.assign(organism=lambda x: [map_high_level_organism[a] for a in x.organism])
.groupby(['description', 'organism'])
.agg(num_misses=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values(['num_misses'], ascending=False)
.rename(columns={'description' : 'Culture Type',
                 'organism' : 'Infection Type',
                 'num_misses' : 'Number of Misses'})
)
df_frequent_misses_alg_high_level.to_csv('alg_misses_by_infection_type.csv', index=None)
df_frequent_misses_alg_high_level

Unnamed: 0,Culture Type,Infection Type,Number of Misses
9,Urine Culture,Lactose Fermenting GNRs,36
8,Urine Culture,Enterococcus species,34
3,Blood Culture,Non Lactose Fermenting GNRs,9
11,Urine Culture,Non Lactose Fermenting GNRs,9
0,Blood Culture,Enterococcus species,5
1,Blood Culture,Lactose Fermenting GNRs,5
2,Blood Culture,MRSA,4
4,Blood Culture,Streptococcus species,3
7,Other Fluid Culture,Non Lactose Fermenting GNRs,3
5,Other Fluid Culture,Enterococcus species,2


### Dr misses stratified by what antibiotic was delivered

In [108]:
df_frequent_misses_dr = (df_ast_labels_by_bug
.query("dr_covered == 0", engine='python')
.assign(description = lambda x: [format_description(a) for a in x.description])
.assign(organism = lambda x: [a.capitalize() for a in x.organism])
.assign(organism=lambda x: [map_high_level_organism[a] for a in x.organism])
.groupby(['description', 'organism', 'med_description'])
.agg(num_misses=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values(['num_misses'], ascending=False)
.rename(columns={'description' : 'Culture Type',
                 'organism' : 'Infection Type',
                 'med_description' : 'Antibiotics Administered',
                 'num_misses' : 'Number of Misses'})
)
df_frequent_misses_dr.to_csv('doctor_misses_by_infection_type_and_abx.csv', index=None)
df_frequent_misses_dr

Unnamed: 0,Culture Type,Infection Type,Antibiotics Administered,Number of Misses
18,Urine Culture,Enterococcus species,Ceftriaxone,47
20,Urine Culture,Lactose Fermenting GNRs,Ceftriaxone,29
26,Urine Culture,Non Lactose Fermenting GNRs,Ceftriaxone,10
2,Blood Culture,Lactose Fermenting GNRs,Ceftriaxone,5
23,Urine Culture,Lactose Fermenting GNRs,Zosyn,4
21,Urine Culture,Lactose Fermenting GNRs,Vancomycin,3
22,Urine Culture,Lactose Fermenting GNRs,Vancomycin_Zosyn,3
15,Other Fluid Culture,Non Lactose Fermenting GNRs,Zosyn,2
11,Blood Culture,Streptococcus species,Zosyn,2
1,Blood Culture,Enterococcus species,Ceftriaxone,2


### Alg misses by what antibiotic was given

In [110]:
df_frequent_misses_alg = (df_ast_labels_by_bug
.query("alg_covered == 0", engine='python')
.assign(description = lambda x: [format_description(a) for a in x.description])
.assign(organism = lambda x: [a.capitalize() for a in x.organism])
.assign(organism=lambda x: [map_high_level_organism[a] for a in x.organism])
.groupby(['description', 'organism', 'IP_med_description'])
.agg(num_misses=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values(['num_misses'], ascending=False)
.rename(columns={'description' : 'Culture Type',
                 'organism' : 'Infection Type',
                 'IP_med_description' : 'Antibiotics Administered',
                 'num_misses' : 'Number of Misses'})
)
df_frequent_misses_alg.to_csv('alg_misses_by_infection_type_and_abx.csv', index=None)
df_frequent_misses_alg

Unnamed: 0,Culture Type,Infection Type,Antibiotics Administered,Number of Misses
17,Urine Culture,Enterococcus species,Ceftriaxone,29
20,Urine Culture,Lactose Fermenting GNRs,Ceftriaxone,22
27,Urine Culture,Non Lactose Fermenting GNRs,Ceftriaxone,7
0,Blood Culture,Enterococcus species,Ceftriaxone,5
4,Blood Culture,Non Lactose Fermenting GNRs,Ceftriaxone,5
1,Blood Culture,Lactose Fermenting GNRs,Ceftriaxone,5
24,Urine Culture,Lactose Fermenting GNRs,Vancomycin_Zosyn,5
25,Urine Culture,Lactose Fermenting GNRs,Zosyn,4
18,Urine Culture,Enterococcus species,Meropenem,3
2,Blood Culture,MRSA,Ceftriaxone,3


## IV AND IM Antibiotics Only

In [7]:
query = """
SELECT
    om.anon_id, om.pat_enc_csn_id_coded, om.order_med_id_coded, l.index_time, om.med_description,
    l.Ampicillin, l.Ciprofloxacin, l.Cefazolin, l.Ceftriaxone, l.Cefepime, l.Zosyn, l.Vancomycin,
    l.Meropenem, l.Vancomycin_Meropenem, l.Vancomycin_Zosyn, l.Vancomycin_Cefepime, l.Vancomycin_Ceftriaxone
FROM
    `mining-clinical-decisions.abx.abx_orders_given_and_stopped` om
INNER JOIN 
    `mining-clinical-decisions.abx.final_ast_labels` l
USING
    (pat_enc_csn_id_coded)
WHERE
    om.was_given = 1
ORDER BY 
    om.anon_id, om.pat_enc_csn_id_coded, om.order_time
"""
query_job = client.query(query)
df_abx = query_job.result().to_dataframe()
df_abx.head()

# Lambda that aggregate Antibiotic orders after we've grouped by CSN
concat_abx = lambda x : ' '.join(np.unique(sorted([a for a in x])))

# Aggregate abx orders
df_drugs = (df_abx
    .assign(med_description=lambda x: [a.split(' ')[0] for a in x.med_description]) # Only Take first word (abx)
    .assign(med_description=lambda x: [(a.replace('PIPERACILLIN-TAZOBACTAM-DEXTRS','PIPERACILLIN-TAZOBACTAM')
                                        .replace('VANCOMYCIN-WATER', 'VANCOMYCIN'))
                                    for a in x.med_description])
    .assign(year=lambda x: x.index_time.dt.year) # get year of each CSN - used to filter later on
    .groupby('pat_enc_csn_id_coded')
    .agg({'med_description' : concat_abx,
        'year' : 'first',
        'Ampicillin' : 'first',
        'Ciprofloxacin' : 'first',
        'Cefazolin' : 'first',
        'Ceftriaxone' : 'first',
        'Cefepime' : 'first',
        'Zosyn' : 'first',
        'Vancomycin' : 'first',
        'Meropenem' : 'first',
        'Vancomycin_Ceftriaxone' : 'first',
        'Vancomycin_Cefepime' : 'first',
        'Vancomycin_Zosyn' : 'first',
        'Vancomycin_Meropenem' : 'first'})
    .reset_index()
    # Only look at test set data and CSNs where allowed antibiotic selection was administered
    .query("year == 2019", engine='python') # and med_description in @abx_map_inverse", engine='python') 
    #.assign(med_description=lambda x: [abx_map_inverse[a] for a in x.med_description])
)

In [8]:
df_drugs.head()

Unnamed: 0,pat_enc_csn_id_coded,med_description,year,Ampicillin,Ciprofloxacin,Cefazolin,Ceftriaxone,Cefepime,Zosyn,Vancomycin,Meropenem,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Vancomycin_Zosyn,Vancomycin_Meropenem
6847,131260812263,CEFTRIAXONE,2019,0,1,0,1,1,1,0,1,1,1,1,1
6855,131260883970,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM VANCOMYCIN,2019,1,1,0,0,0,1,1,0,1,1,1,1
6858,131261001599,PIPERACILLIN-TAZOBACTAM,2019,0,1,1,1,1,1,0,1,1,1,1,1
6859,131261001696,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM,2019,0,1,0,0,1,1,0,1,0,1,1,1
6861,131261014293,CEFTRIAXONE,2019,1,1,1,1,1,1,1,1,1,1,1,1


In [9]:
len(df_drugs)

1296

In [14]:
unique_regimens = (df_drugs
.groupby('med_description')
.agg(num_ed_visits=('pat_enc_csn_id_coded', 'nunique'))
.sort_values('num_ed_visits', ascending=False)
.reset_index()
)
unique_regimens.head(20)

Unnamed: 0,med_description,num_ed_visits
0,CEFTRIAXONE,367
1,PIPERACILLIN-TAZOBACTAM VANCOMYCIN,113
2,PIPERACILLIN-TAZOBACTAM,102
3,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM,41
4,AZITHROMYCIN CEFTRIAXONE,37
5,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM VANCOMYCIN,32
6,CEFTRIAXONE VANCOMYCIN,31
7,ERTAPENEM,23
8,CEFEPIME VANCOMYCIN,23
9,CEFTRIAXONE METRONIDAZOLE,21


In [15]:
unique_regimens.to_csv('unique_regimens.csv', index=None)

## Now Include Oral Abx that were listed as administered

In [16]:
query = """
SELECT
    om.anon_id, om.pat_enc_csn_id_coded, om.order_med_id_coded, l.index_time, om.med_description,
    l.Ampicillin, l.Ciprofloxacin, l.Cefazolin, l.Ceftriaxone, l.Cefepime, l.Zosyn, l.Vancomycin,
    l.Meropenem, l.Vancomycin_Meropenem, l.Vancomycin_Zosyn, l.Vancomycin_Cefepime, l.Vancomycin_Ceftriaxone
FROM
    `mining-clinical-decisions.abx.abx_orders_given_and_stopped_include_oral` om
INNER JOIN 
    `mining-clinical-decisions.abx.final_ast_labels` l
USING
    (pat_enc_csn_id_coded)
WHERE
    om.was_given = 1
ORDER BY 
    om.anon_id, om.pat_enc_csn_id_coded, om.order_time
"""
query_job = client.query(query)
df_abx = query_job.result().to_dataframe()
df_abx.head()

# Lambda that aggregate Antibiotic orders after we've grouped by CSN
concat_abx = lambda x : ' '.join(np.unique(sorted([a for a in x])))

# Aggregate abx orders
df_drugs = (df_abx
    .assign(med_description=lambda x: [a.split(' ')[0] for a in x.med_description]) # Only Take first word (abx)
    .assign(med_description=lambda x: [(a.replace('PIPERACILLIN-TAZOBACTAM-DEXTRS','PIPERACILLIN-TAZOBACTAM')
                                        .replace('VANCOMYCIN-WATER', 'VANCOMYCIN'))
                                    for a in x.med_description])
    .assign(year=lambda x: x.index_time.dt.year) # get year of each CSN - used to filter later on
    .groupby('pat_enc_csn_id_coded')
    .agg({'med_description' : concat_abx,
        'year' : 'first',
        'Ampicillin' : 'first',
        'Ciprofloxacin' : 'first',
        'Cefazolin' : 'first',
        'Ceftriaxone' : 'first',
        'Cefepime' : 'first',
        'Zosyn' : 'first',
        'Vancomycin' : 'first',
        'Meropenem' : 'first',
        'Vancomycin_Ceftriaxone' : 'first',
        'Vancomycin_Cefepime' : 'first',
        'Vancomycin_Zosyn' : 'first',
        'Vancomycin_Meropenem' : 'first'})
    .reset_index()
    # Only look at test set data and CSNs where allowed antibiotic selection was administered
    .query("year == 2019", engine='python') # and med_description in @abx_map_inverse", engine='python') 
    #.assign(med_description=lambda x: [abx_map_inverse[a] for a in x.med_description])
)

In [17]:
df_drugs.head()

Unnamed: 0,pat_enc_csn_id_coded,med_description,year,Ampicillin,Ciprofloxacin,Cefazolin,Ceftriaxone,Cefepime,Zosyn,Vancomycin,Meropenem,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Vancomycin_Zosyn,Vancomycin_Meropenem
6885,131260812263,CEFTRIAXONE,2019,0,1,0,1,1,1,0,1,1,1,1,1
6893,131260883970,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM VANCOMYCIN,2019,1,1,0,0,0,1,1,0,1,1,1,1
6896,131261001599,PIPERACILLIN-TAZOBACTAM,2019,0,1,1,1,1,1,0,1,1,1,1,1
6897,131261001696,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM,2019,0,1,0,0,1,1,0,1,0,1,1,1
6899,131261014293,CEFTRIAXONE,2019,1,1,1,1,1,1,1,1,1,1,1,1


In [18]:
len(df_drugs)

1304

In [19]:
unique_regimens = (df_drugs
.groupby('med_description')
.agg(num_ed_visits=('pat_enc_csn_id_coded', 'nunique'))
.sort_values('num_ed_visits', ascending=False)
.reset_index()
)
unique_regimens.head(20)

Unnamed: 0,med_description,num_ed_visits
0,CEFTRIAXONE,318
1,PIPERACILLIN-TAZOBACTAM VANCOMYCIN,106
2,PIPERACILLIN-TAZOBACTAM,91
3,AZITHROMYCIN CEFTRIAXONE,49
4,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM,31
5,CEFTRIAXONE PIPERACILLIN-TAZOBACTAM VANCOMYCIN,30
6,CEFTRIAXONE VANCOMYCIN,29
7,CEFTRIAXONE METRONIDAZOLE,21
8,CEFEPIME METRONIDAZOLE VANCOMYCIN,21
9,ERTAPENEM,21


In [20]:
len(unique_regimens)

306

In [21]:
unique_regimens.to_csv('unique_regimens_include_oral.csv', index=None)

### Inspect Misses

In [19]:
query="""
SELECT DISTINCT
  a.anon_id, a.pat_enc_csn_id_coded, a.index_time, cs.organism, cs.description
FROM  
  `mining-clinical-decisions.abx.final_ast_labels` a
INNER JOIN
  `mining-clinical-decisions.abx.culture_orders_within_24_hrs` cult_orders
USING
  (pat_enc_csn_id_coded)
INNER JOIN 
  `som-nero-phi-jonc101.shc_core.culture_sensitivity` cs
USING
  (order_proc_id_coded)
WHERE 
    EXTRACT(YEAR FROM a.index_time) = 2019
AND 
    organism NOT LIKE "%COAG NEGATIVE STAPHYLOCOCCUS%"
AND 
    organism NOT LIKE "%CANDIDA%"


""" 
query_job = client.query(query)
df_bugs = query_job.result().to_dataframe()

In [20]:
df_bugs.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,index_time,organism,description
0,JC2a201a1,131272667143,2019-08-11 00:46:00+00:00,ACINETOBACTER SPECIES,CSF CULTURE AND GRAM STAIN
1,JCec25cb,131272440956,2019-07-17 04:04:00+00:00,ESCHERICHIA COLI,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLE)
2,JCdedda0,131273672987,2019-07-13 00:24:00+00:00,STAPHYLOCOCCUS AUREUS,BLOOD CULTURE (2 AEROBIC BOTTLES)
3,JCe1fac8,131269207468,2019-05-23 22:44:00+00:00,ENTEROCOCCUS FAECIUM,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLE)
4,JCd6758a,131278824768,2019-11-20 07:01:00+00:00,STAPHYLOCOCCUS AUREUS,BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLE)


In [21]:
(df_bugs
.groupby('description')
.agg(num_ids=('pat_enc_csn_id_coded', 'nunique'))
)

Unnamed: 0_level_0,num_ids
description,Unnamed: 1_level_1
"BLOOD CULT - FIRST SET, VIA PHLEBOTOMY",20
BLOOD CULT CENTRAL LINE CATHETER BY NURSE,19
BLOOD CULTURE (2 AEROBIC BOTTLES),133
BLOOD CULTURE (AEROBIC & ANAEROBIC BOTTLE),356
CSF CULTURE AND GRAM STAIN,1
FLUID CULTURE AND GRAM STAIN,77
URINE CULTURE,940


### Bugs that are most frequent misses

In [49]:
import pdb
### Get organisms corresponding to misses
concat_bugs = lambda x : ' '.join(np.unique(sorted([a.capitalize() for a in x])))
def group_cultures(x):
    if x.split()[0] in ("BLOOD", "URINE"): 
        return x.split()[0].capitalize()
    else:
        return 'Other fluid' 
        
def pipe_func(df):
    df = df.assign(organism_and_culture = df.apply(lambda x: x['description'] + ' ' + x['organism'], axis=1))
    return df

df_bugs_collapsed = (df_bugs
.assign(description = lambda x: [group_cultures(a) for a in x.description]) 
.pipe(pipe_func)
.groupby('pat_enc_csn_id_coded')
.agg(organism=('organism_and_culture', concat_bugs))
.reset_index()
)


df_bugs_collapsed = (df_bugs_collapsed
.merge(df_drugs, how='inner', on='pat_enc_csn_id_coded')
.query("dr_covered == 0", engine='python')
.groupby('organism')
.agg(num_csns=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values('num_csns', ascending=False)
)
df_bugs_collapsed

Unnamed: 0,organism,num_csns
39,Urine enterococcus species,20
44,Urine escherichia coli,14
40,Urine enterococcus species Urine escherichia coli,9
34,Urine enterococcus faecalis,7
55,Urine pseudomonas aeruginosa,4
32,Urine enterobacter cloacae complex,4
49,Urine klebsiella pneumoniae,4
11,Blood escherichia coli Urine escherichia coli,3
36,Urine enterococcus faecalis Urine escherichia ...,2
38,Urine enterococcus faecium,2


### Bug & Drug Combos of misses

In [50]:
### Get organisms corresponding to misses
df_bugs_collapsed = (df_bugs
.assign(description = lambda x: [group_cultures(a) for a in x.description]) 
.pipe(pipe_func)
.groupby('pat_enc_csn_id_coded')
.agg(organism=('organism_and_culture', concat_bugs))
.reset_index()
)

df_misses_with_drugs = (df_bugs_collapsed
.merge(df_drugs, how='inner', on='pat_enc_csn_id_coded')
.query("dr_covered == 0", engine='python')
.groupby(['organism', 'med_description'])
.agg(num_csns=('pat_enc_csn_id_coded', 'nunique'))
.reset_index()
.sort_values('num_csns', ascending=False)
)

df_misses_with_drugs

Unnamed: 0,organism,med_description,num_csns
42,Urine enterococcus species,Ceftriaxone,19
47,Urine escherichia coli,Ceftriaxone,13
43,Urine enterococcus species Urine escherichia coli,Ceftriaxone,9
35,Urine enterococcus faecalis,Ceftriaxone,7
33,Urine enterobacter cloacae complex,Ceftriaxone,4
...,...,...,...
1,Blood elizabethkingia species Blood non fermen...,Vancomycin_Cefepime,1
34,Urine enterobacter cloacae complex Urine esche...,Zosyn,1
8,Blood enterococcus faecalis Urine klebsiella p...,Cefepime,1
36,Urine enterococcus faecalis Urine enterococcus...,Ceftriaxone,1


In [22]:
df_misses_with_drugs.to_csv("top_misses_with_what_was_given.csv", index=None)