In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os 

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/conorcorbin/.config/gcloud/application_default_credentials.json' 
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()



In [44]:
### Resistant bugs dataframe - manually filled in
df_bugs = pd.read_csv('resistant_bugs.csv')
df_bugs.query("antibiotic == 'Meropenem' and `Always Resistant` == 1")['bug'].values

array(['ENTEROCOCCUS SPECIES', 'ENTEROCOCCUS FAECALIS',
       'STAPH AUREUS {MRSA}', 'ENTEROCOCCUS FAECIUM',
       'STENOTROPHOMONAS MALTOPHILIA',
       'ENTEROCOCCUS FAECIUM - VANCO RESISTANT',
       'ENTEROCOCCUS CASSELIFLAVUS', 'ENTEROCOCCUS AVIUM',
       'ENTEROCOCCUS GALLINARUM', 'ENTEROCOCCUS RAFFINOSUS',
       'ENTEROCOCCUS DURANS', 'ENTEROCOCCUS DURANS/HIRAE'], dtype=object)

In [51]:
# Define Fill in And Processing Functions
def fill_in_cefazolin(x):
    
    # If not missing just return what it is. 
    if x.Cefazolin == 'Susceptible' or x.Cefazolin == 'Resistant':
        return x.Cefazolin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Cefazolin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # If anything resistant to Cefepime or Ceftriaxone and Cefazolin not listed, assume resistant to Cefazolin
    if x.Cefepime == "Resistant" or x.Ceftriaxone == 'Resistant':
        return "Resistant"
    
    # For anything STREPTOCOCCUS related except STREPTOCOCCUS PNEUMONIAE  - refer to penicillin if not listed 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism) and x.organism != 'STREPTOCOCCUS PNEUMONIAE':
        if x.Penicillin is not None:
            return x.Penicillin
    
    # Check For MRSA in STAPH AUREUS if not listed
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_ceftriaxone(x):
    
    # If not missing just return what it is. 
    if x.Ceftriaxone == 'Susceptible' or x.Ceftriaxone == 'Resistant':
        return x.Ceftriaxone
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Ceftriaxone' and `Always Resistant` == 1")['bug'].values)
    
    # If anything resistant to Cefepime, assume resistant to Ceftriaxone
    if x.Cefepime == "Resistant":
        return "Resistant"
    
    # If susceptible to Cefazolin, assume susceptible to Ceftriaxone
    if x.Cefazolin == "Susceptible": 
        return "Susceptible"
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"

    # For anything STREPTOCOCCUS related - refer to penicillin if not listed
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_cefepime(x):
    
    # If not missing just return what it is. 
    if x.Cefepime == 'Susceptible' or x.Cefepime == 'Resistant':
        return x.Cefepime
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Cefepime' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # If susceptible to Cefazolin or Ceftriaxone assume susceptible to Cefepime
    if x.Cefazolin == "Susceptible" or x.Ceftriaxone == "Susceptible":
        return "Susceptible"
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_zosyn(x):
     
    # If not missing just return what it is. 
    if x.Zosyn == 'Susceptible' or x.Zosyn == 'Resistant':
        return x.Zosyn
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules
                 .query("antibiotic == 'Piperacillin/Tazobactam' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If susceptible to ampicillin, then zosyn susceptible - useless rule because of the following rule
    if x.Ampicillin == "Susceptible":
        return x.Ampicillin
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"


def fill_in_vancomycin(x):
    # If not missing just return what it is. 
    if x.Vancomycin == 'Susceptible' or x.Vancomycin == 'Resistant':
        return x.Vancomycin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Vancomycin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Susceptible"

def fill_in_meropenem(x):
    # If not missing just return what it is. 
    if x.Meropenem == 'Susceptible' or x.Meropenem == 'Resistant':
        return x.Meropenem
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Meropenem' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If susceptible to ampicillin, then meropenem susceptible
    if x.Ampicillin == "Susceptible": # not the case with Enteroccocus, but this should be in alwasys resistant list above. 
        return x.Ampicillin # also - useless rule because of rule below
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_ciprofloxacin(x):
    # If not missing just return what it is. 
    if x.Ciprofloxacin == 'Susceptible' or x.Ciprofloxacin == 'Resistant':
        return x.Ciprofloxacin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Ciprofloxacin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # If Resistant to Levofloxacin, resistant to Cipro
    if x.Levofloxacin == "Resistant":
        return "Resistant"
    
    # Check for MRSA - assume same as oxacillin if not listed
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"

def fill_in_ampicillin(x):
    
    # If not missing just return what it is. 
    if x.Ampicillin == 'Susceptible' or x.Ampicillin == 'Resistant':
        return x.Ampicillin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('resistant_bugs.csv')
    r_bugs = set(df_bug_rules.query("antibiotic == 'Ampicillin' and `Always Resistant` == 1")['bug'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
        
    # Check for MRSA - assume same as oxacillin if not listed
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # Otherwise apply Nancy Watz Rule (assume susc if not listed)
    return "Susceptible"


def combine_labels(arr):
    """
    Sometimes organisms have multiple suscept labels. When they do, if any of them are not one of
    Susceptible, Positive, or Susceptible - Dose Dependent then we say organism is resistant to 
    said antibiotic
    """
    for a in arr:
        if a not in ['Susceptible', 'Positive', 'Susceptible - Dose Dependent']:
            return 'Resistant'
    return 'Susceptible'

def combine_antibiotic_syns(x):
    """
    Antibiotics often are given different names in the AST tables, this function combines
    synnonyms so that we don't have to refer to multiple names in downstream analysis. 
    """
    if x == 'Aztreonam.':
        return 'Aztreonam'
    elif x == 'Cefazolin..':
        return 'Cefazolin'
    elif x == 'Ceftazidime.':
        return 'Ceftazidime'
    elif x in ('Ceftriaxone (Meningeal)', 'Ceftriaxone (Non-Meningeal)', 'Ceftriaxone.'):
        return 'Ceftriaxone'
    elif x in ('Ciprofloxacin.'):
        return 'Ciprofloxacin'
    elif x == 'Gentamicin 500 mcg/ml.':
        return 'Gentamicin'
    elif x in ('Oxacillin Screen', 'Oxacillin.'):
        return 'Oxacillin'
    elif x in ('PENICILLIN G (MENINGEAL)','PENICILLIN G (NON-MENINGEAL)', 'PENICILLIN V (ORAL)', 'Penicillin..'):
        return 'Penicillin'
    elif x == 'Trimethoprim/Sulfamethoxazole.':
        return 'Trimethoprim/Sulfamethoxazole'
    else:
        return x





### Query BQ Tables

In [33]:
import pdb
# Query positive culture data
query = """
SELECT DISTINCT 
    cohort.pat_enc_csn_id_coded, cults.order_proc_id_coded,
    cs.sens_organism_sid, cs.line, cs.organism,
    cs.antibiotic, cs.suscept, cs.sensitivity_value, cs.sens_ref_range 
FROM 
    `mining-clinical-decisions.abx.culture_orders_within_24_hrs` cults
INNER JOIN 
    `mining-clinical-decisions.abx.cohort_not_infected_rules` cohort
USING 
    (pat_enc_csn_id_coded)
INNER JOIN 
    `shc_core.culture_sensitivity` cs
USING 
    (order_proc_id_coded)
WHERE 
    organism not LIKE "%CANDIDA%"
AND 
    organism not in ('COAG NEGATIVE STAPHYLOCOCCUS', 'HAEMOPHILUS INFLUENZAE', 'HAEMOPHILUS PARAINFLUENZAE')
ORDER BY 
    cohort.pat_enc_csn_id_coded, cults.order_proc_id_coded, cs.line
"""

query_job = client.query(query)
df=query_job.to_dataframe()
df.head()

Unnamed: 0,pat_enc_csn_id_coded,order_proc_id_coded,sens_organism_sid,line,organism,antibiotic,suscept,sensitivity_value,sens_ref_range
0,131003775398,357750415,ZZ00,1,KLEBSIELLA OXYTOCA,Ampicillin,Resistant,>=32,
1,131003775398,357750415,ZZ00,2,KLEBSIELLA OXYTOCA,Cefazolin,Susceptible,8,
2,131003775398,357750415,ZZ00,3,KLEBSIELLA OXYTOCA,Piperacillin/Tazobactam,Susceptible,<=4,
3,131003775398,357750415,ZZ00,4,KLEBSIELLA OXYTOCA,Gentamicin,Susceptible,<=1,
4,131003775398,357750415,ZZ00,5,KLEBSIELLA OXYTOCA,Ciprofloxacin,Susceptible,<=0.25,


### Make row correspond to unique organism_sid and pivot

In [63]:
filter_cols = ['example', 'pat_enc_csn_id_coded', 'organism', 'order_proc_id_coded', 'sens_organism_sid',
               'Cefepime', 'Ceftriaxone', 'Cefazolin', 'Vancomycin', 'Meropenem', 'Zosyn',
               'Ampicillin', 'Penicillin', 'Oxacillin', 'Ciprofloxacin', 'Levofloxacin']

df_wide = (df
    .assign(example=lambda x: x.pat_enc_csn_id_coded.map(str) + ', ' \
            + x.order_proc_id_coded.map(str) + ', ' + x.sens_organism_sid + ', ' + x.organism,
            antibiotic=lambda x: [combine_antibiotic_syns(a) for a in x.antibiotic])
    .groupby(['example', 'antibiotic']) 
    .agg({'suscept' : lambda x: combine_labels(x)})          
    .reset_index()
    .pivot(index='example', columns='antibiotic', values='suscept')
    .reset_index()
    .assign(organism = lambda x: [a.split(', ')[3] for a in x.example],
            order_proc_id_coded = lambda x: [a.split(', ')[1] for a in x.example],
            sens_organism_sid = lambda x: [a.split(', ')[2] for a in x.example],
            pat_enc_csn_id_coded = lambda x: [a.split(', ')[0] for a in x.example])
    .rename(columns={'Piperacillin/Tazobactam' : 'Zosyn'})
)
df_wide = df_wide[filter_cols]
df_wide.head()


antibiotic,example,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Cefepime,Ceftriaxone,Cefazolin,Vancomycin,Meropenem,Zosyn,Ampicillin,Penicillin,Oxacillin,Ciprofloxacin,Levofloxacin
0,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",131003775398,KLEBSIELLA OXYTOCA,357750415,ZZ00,,,Susceptible,,,Susceptible,Resistant,,,Susceptible,Susceptible
1,"131003775398, 357750417, ZZ00, KLEBSIELLA OXYTOCA",131003775398,KLEBSIELLA OXYTOCA,357750417,ZZ00,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Resistant,,,Susceptible,Susceptible
2,"131003775398, 357750417, ZZ01, PROTEUS MIRABILIS",131003775398,PROTEUS MIRABILIS,357750417,ZZ01,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Susceptible,,,Susceptible,Susceptible
3,"131004006894, 358490468, ZZ00, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ00,,Susceptible,,,,,,,,,
4,"131004006894, 358490468, ZZ01, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ01,,Susceptible,,,,,,Susceptible,,,


## Fill in Missing AST Labels 
### Make Unit of Observation a CSN

In [64]:
agg_labels = lambda x: "Resistant" if any(x == "Resistant") else "Susceptible"
df_wide = (df_wide
    .assign(Ampicillin=lambda x: df_wide.apply(lambda x: fill_in_ampicillin(x), axis=1),
            Cefepime=lambda x: df_wide.apply(lambda x: fill_in_cefepime(x), axis=1),
            Ceftriaxone=lambda x: df_wide.apply(lambda x: fill_in_ceftriaxone(x), axis=1),
            Cefazolin=lambda x: df_wide.apply(lambda x: fill_in_cefazolin(x), axis=1),
            Ciprofloxacin=lambda x: df_wide.apply(lambda x: fill_in_ciprofloxacin(x), axis=1),
            Zosyn=lambda x: df_wide.apply(lambda x: fill_in_zosyn(x), axis=1),
            Vancomycin=lambda x: df_wide.apply(lambda x: fill_in_vancomycin(x), axis=1),
            Meropenem=lambda x: df_wide.apply(lambda x: fill_in_meropenem(x), axis=1),
            Vancomycin_Ceftriaxone=lambda x: ["Susceptible" if a == "Susceptible" 
                                              or b == "Susceptible" else "Resistant"
                                              for a, b in zip(x.Vancomycin, x.Ceftriaxone)],
            Vancomycin_Cefepime=lambda x: ["Susceptible" if a == "Susceptible" 
                                           or b == "Susceptible" else "Resistant"
                                           for a, b in zip(x.Vancomycin, x.Cefepime)],
            Vancomycin_Zosyn=lambda x: ["Susceptible" if a == "Susceptible" 
                                        or b == "Susceptible" else "Resistant"
                                        for a, b in zip(x.Vancomycin, x.Zosyn)],
            Vancomycin_Meropenem=lambda x: ["Susceptible" if a == "Susceptible" 
                                            or b == "Susceptible" else "Resistant"
                                            for a, b in zip(x.Vancomycin, x.Meropenem)])
            
    .groupby('pat_enc_csn_id_coded')
    .agg({'Ampicillin' : agg_labels,
          'Cefazolin' : agg_labels,
          'Ceftriaxone' : agg_labels,
          'Cefepime' : agg_labels,
          'Ciprofloxacin' : agg_labels,
          'Zosyn' : agg_labels,
          'Vancomycin' : agg_labels,
          'Meropenem' : agg_labels,
          'Vancomycin_Ceftriaxone' : agg_labels,
          'Vancomycin_Cefepime' : agg_labels,
          'Vancomycin_Zosyn' : agg_labels,
          'Vancomycin_Meropenem' : agg_labels})
    .reset_index()
)
df_wide.head()

Unnamed: 0,pat_enc_csn_id_coded,Ampicillin,Cefazolin,Ceftriaxone,Cefepime,Ciprofloxacin,Zosyn,Vancomycin,Meropenem,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Vancomycin_Zosyn,Vancomycin_Meropenem
0,131003775398,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
1,131004006894,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
2,131004013925,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Resistant,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Susceptible
3,131004041439,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Susceptible
4,131004896326,Resistant,Resistant,Resistant,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible


### Merge with cohort table to get index times

In [65]:
query = """
SELECT DISTINCT 
    anon_id, pat_enc_csn_id_coded, index_time
FROM 
    mining-clinical-decisions.abx.cohort_not_infected_rules
"""
query_job = client.query(query)
df_cohort = query_job.to_dataframe()

columns = ['anon_id', 'pat_enc_csn_id_coded', 'index_time',
           'Ampicillin', 'Ciprofloxacin',
           'Cefazolin', 'Ceftriaxone', 'Cefepime',
           'Zosyn', 'Vancomycin', 'Meropenem',
           'Vancomycin_Ceftriaxone', 'Vancomycin_Cefepime',
           'Vancomycin_Zosyn', 'Vancomycin_Meropenem']
df_wide = (df_wide
    .assign(pat_enc_csn_id_coded=lambda x: x.pat_enc_csn_id_coded.astype(int))
    .merge(df_cohort, on='pat_enc_csn_id_coded', how='left')
)[columns]
df_wide.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,index_time,Ampicillin,Ciprofloxacin,Cefazolin,Ceftriaxone,Cefepime,Zosyn,Vancomycin,Meropenem,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Vancomycin_Zosyn,Vancomycin_Meropenem
0,JCdb6ef6,131003775398,2009-08-13 04:54:00+00:00,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
1,JCcc7f55,131004006894,2009-08-29 09:36:00+00:00,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible,Susceptible
2,JCd028bd,131004013925,2009-08-11 19:49:00+00:00,Resistant,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Susceptible
3,JCe92425,131004041439,2009-08-11 22:29:00+00:00,Resistant,Susceptible,Susceptible,Susceptible,Susceptible,Resistant,Resistant,Susceptible,Susceptible,Susceptible,Resistant,Susceptible
4,JCda516b,131004896326,2009-07-31 03:49:00+00:00,Resistant,Susceptible,Resistant,Resistant,Resistant,Susceptible,Susceptible,Resistant,Susceptible,Susceptible,Susceptible,Susceptible


### Convert labels to binary values

In [66]:
# Transform Susceptible vs Resitant to 1 or 0
abx_cols = ['Ampicillin', 'Ciprofloxacin',
            'Cefazolin', 'Ceftriaxone', 'Cefepime',
            'Zosyn', 'Vancomycin', 'Meropenem',
            'Vancomycin_Ceftriaxone', 'Vancomycin_Cefepime',
            'Vancomycin_Zosyn', 'Vancomycin_Meropenem']
df_wide[abx_cols] = (df_wide[abx_cols]
    .transform(lambda x: x.replace('Susceptible', '1').replace("Resistant", '0'))
)


# Convert labels to integers
convert_dict = {col : int for col in abx_cols}
df_wide = df_wide.astype(convert_dict) 
df_wide.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,index_time,Ampicillin,Ciprofloxacin,Cefazolin,Ceftriaxone,Cefepime,Zosyn,Vancomycin,Meropenem,Vancomycin_Ceftriaxone,Vancomycin_Cefepime,Vancomycin_Zosyn,Vancomycin_Meropenem
0,JCdb6ef6,131003775398,2009-08-13 04:54:00+00:00,0,1,1,1,1,1,0,1,1,1,1,1
1,JCcc7f55,131004006894,2009-08-29 09:36:00+00:00,1,0,1,1,1,1,1,1,1,1,1,1
2,JCd028bd,131004013925,2009-08-11 19:49:00+00:00,0,0,1,1,1,0,0,1,1,1,0,1
3,JCe92425,131004041439,2009-08-11 22:29:00+00:00,0,1,1,1,1,0,0,1,1,1,0,1
4,JCda516b,131004896326,2009-07-31 03:49:00+00:00,0,1,0,0,0,1,1,0,1,1,1,1


### Sanity Checks

In [67]:
# Show Fraction of CSNs where each abx therapy is susceptible
df_wide[abx_cols].mean()

Ampicillin                0.441993
Ciprofloxacin             0.629544
Cefazolin                 0.590988
Ceftriaxone               0.683196
Cefepime                  0.774591
Zosyn                     0.903496
Vancomycin                0.243526
Meropenem                 0.809778
Vancomycin_Ceftriaxone    0.828475
Vancomycin_Cefepime       0.943677
Vancomycin_Zosyn          0.950180
Vancomycin_Meropenem      0.971548
dtype: float64

In [68]:
# Make Sure Combination therapies are never resistant if either of it's parts are susceptible
for combo, a, b in zip(df_wide.Vancomycin_Ceftriaxone.values, df_wide.Ceftriaxone.values, df_wide.Vancomycin.values):
    if combo == False:
        assert a or b == False
        
for combo, a, b in zip(df_wide.Vancomycin_Cefepime.values, df_wide.Cefepime.values, df_wide.Vancomycin.values):
    if combo == False:
        assert a or b == False
        
for combo, a, b in zip(df_wide.Vancomycin_Meropenem.values, df_wide.Meropenem.values, df_wide.Vancomycin.values):
    if combo == False:
        assert a or b == False

for combo, a, b in zip(df_wide.Vancomycin_Zosyn.values, df_wide.Zosyn.values, df_wide.Vancomycin.values):
    if combo == False:
        assert a or b == False

In [69]:
# Make sure Cefazolin is rarely susceptible if Ceftriaxone or Cefepime are Resistant
# This can happen if for instance cefazolin is listed as susceptible, ceftriaxone is not listed but 
# is filled in as resistant due to the strepp rule
count = 0
for a, b, c in zip(df_wide.Cefepime.values, df_wide.Ceftriaxone.values, df_wide.Cefazolin.values):
    if c:
        if not a and b:
            count += 1
print(count)
print(len(df_wide))


16
8611


In [70]:
# Make sure Ceftriaxone is rarely susceptible if Cefeime is Resistant
# This can happen if for instance Ceftriaxone is listed as susceptible, Cefepime is not listed but 
# is filled in as resistant due to one of the rules rule
count = 0
for a, b in zip(df_wide.Cefepime.values, df_wide.Ceftriaxone.values):
    if b:
        if not a:
            count += 1
print(count)
print(len(df_wide))


82
8611


### Upload Labels Table to BQ

In [72]:
# Upload table 
table_schema = [{'name' : 'anon_id', 'type' : 'STRING'},
                {'name' : 'pat_enc_csn_id_coded', 'type' : 'INTEGER'},
                {'name' : 'index_time', 'type' : 'TIMESTAMP'},
                {'name' : 'Ampicillin', 'type' : 'INTEGER'},
                {'name' : 'Cefazolin', 'type' : 'INTEGER'},
                {'name' : 'Ceftriaxone', 'type' : 'INTEGER'},
                {'name' : 'Cefepime', 'type' : 'INTEGER'},
                {'name' : 'Cefepime', 'type' : 'INTEGER'},
                {'name' : 'Zosyn', 'type' : 'INTEGER'},
                {'name' : 'Vancomycin', 'type' : 'INTEGER'},
                {'name' : 'Meropenem', 'type' : 'INTEGER'}]

DATASET_NAME = 'abx'
TABLE_NAME = 'final_ast_labels'
df_wide.to_gbq(destination_table='%s.%s' % (DATASET_NAME, TABLE_NAME),
               project_id='mining-clinical-decisions',
               table_schema=table_schema,
               if_exists='replace')

1it [00:07,  7.46s/it]
