In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os 

### THIS IS MEANT TO RUN ON NERO - NEEDS TO BE CHANGED IF YOU RUN LOCALLY
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/ccorbin/.config/gcloud/application_default_credentials.json' 
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()



In [8]:
query = """
SELECT DISTINCT cohort.pat_enc_csn_id_coded, cults.order_proc_id_coded, cs.sens_organism_sid, cs.line, cs.organism, cs.antibiotic, cs.suscept, cs.sensitivity_value, cs.sens_ref_range 
FROM `mining-clinical-decisions.abx.culture_orders_within_24_hrs` cults
INNER JOIN `mining-clinical-decisions.abx.interm_cohort_with_no_inf_rules` cohort
USING (pat_enc_csn_id_coded)
INNER JOIN `shc_core.culture_sensitivity` cs
USING (order_proc_id_coded)
WHERE organism <> "COAG NEGATIVE STAPHYLOCOCCUS"
ORDER BY cohort.pat_enc_csn_id_coded, cults.order_proc_id_coded, cs.line
"""
query_job =client.query(query)
df=query_job.to_dataframe()

In [9]:
df.head()

Unnamed: 0,pat_enc_csn_id_coded,order_proc_id_coded,sens_organism_sid,line,organism,antibiotic,suscept,sensitivity_value,sens_ref_range
0,131003775398,357750415,ZZ00,1,KLEBSIELLA OXYTOCA,Ampicillin,Resistant,>=32,
1,131003775398,357750415,ZZ00,2,KLEBSIELLA OXYTOCA,Cefazolin,Susceptible,8,
2,131003775398,357750415,ZZ00,3,KLEBSIELLA OXYTOCA,Piperacillin/Tazobactam,Susceptible,<=4,
3,131003775398,357750415,ZZ00,4,KLEBSIELLA OXYTOCA,Gentamicin,Susceptible,<=1,
4,131003775398,357750415,ZZ00,5,KLEBSIELLA OXYTOCA,Ciprofloxacin,Susceptible,<=0.25,


In [10]:
df.sort_values('antibiotic').antibiotic.unique()

array(['5-Flucytosine', 'Amikacin', 'Amoxicillin/Clavulanic Acid',
       'Amphotericin B', 'Ampicillin', 'Ampicillin/Sulbactam',
       'Anidulafungin', 'Antibiotic', 'Aztreonam', 'Aztreonam.',
       'Beta Lactamase', 'BlaZ PCR', 'Carbapenemase', 'Caspofungin',
       'Cefazolin', 'Cefazolin..', 'Cefepime', 'Cefotaxime',
       'Cefotaxime/Clavulanic Acid', 'Cefotetan', 'Cefoxitin',
       'Cefpodoxime', 'Ceftaroline', 'Ceftazidime', 'Ceftazidime.',
       'Ceftazidime/Clavulanic Acid', 'Ceftazidime/avibactam',
       'Ceftolozane/Tazobactam', 'Ceftriaxone', 'Ceftriaxone (Meningeal)',
       'Ceftriaxone (Non-Meningeal)', 'Ceftriaxone.', 'Cefuroxime (IV)',
       'Cefuroxime (PO)', 'Cephalexin/Cephalothin', 'Chloramphenicol',
       'Ciprofloxacin', 'Ciprofloxacin.', 'Clarithromycin', 'Clindamycin',
       'Colistin', 'D-Test', 'DORIPENEM', 'Daptomycin', 'Doxycycline',
       'ESBL confirmation test', 'Ertapenem', 'Erythromycin',
       'Esbl Check', 'FOSFOMYCIN', 'Fluconazole', 'Gen

## Logic For Filling In AST
   * If same organism name listed for two separate sens_organism_sids, assume sensitivity of one is sensitivity of other if sensitivity for one is not listed. 

In [11]:
df_unique_orgs_per_order = (df
.groupby(['pat_enc_csn_id_coded', 'order_proc_id_coded', 'organism'])
.agg({'sens_organism_sid' : lambda x: len(set(x))})
.reset_index()
)

In [12]:
df_unique_orgs_per_order.groupby('sens_organism_sid').count()

Unnamed: 0_level_0,pat_enc_csn_id_coded,order_proc_id_coded,organism
sens_organism_sid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,10591,10591,10591
2,997,997,997
3,28,28,28
4,12,12,12


In [13]:
df['suscept'].unique()

array(['Resistant', 'Susceptible', 'Negative', None, 'Intermediate',
       'Positive', 'Synergy', 'No Synergy', 'No Interpretation',
       'Not done', 'Susceptible - Dose Dependent', 'Non Susceptible',
       'Detected', 'Not Detected'], dtype=object)

In [14]:
def combine_labels(arr):
    """
    Sometimes organisms have multiple suscept labels. When they do, if any of them are not one of
    Susceptible, Positive, or Susceptible - Dose Dependent then we say organism is resistant to 
    said antibiotic
    """
    for a in arr:
        if a not in ['Susceptible', 'Positive', 'Susceptible - Dose Dependent']:
            return 'Resistant'
    return 'Susceptible'

def combine_antibiotic_syns(x):
    if x == 'Aztreonam.':
        return 'Aztreonam'
    elif x == 'Cefazolin..':
        return 'Cefazolin'
    elif x == 'Ceftazidime.':
        return 'Ceftazidime'
    elif x in ('Ceftriaxone (Meningeal)', 'Ceftriaxone (Non-Meningeal)', 'Ceftriaxone.'):
        return 'Ceftriaxone'
    elif x in ('Ciprofloxacin.'):
        return 'Ciprofloxacin'
    elif x == 'Gentamicin 500 mcg/ml.':
        return 'Gentamicin'
    elif x in ('Oxacillin Screen', 'Oxacillin.'):
        return 'Oxacillin'
    elif x in ('PENICILLIN G (MENINGEAL)','PENICILLIN G (NON-MENINGEAL)', 'PENICILLIN V (ORAL)', 'Penicillin..'):
        return 'Penicillin'
    elif x == 'Trimethoprim/Sulfamethoxazole.':
        return 'Trimethoprim/Sulfamethoxazole'
    else:
        return x

df[['pat_enc_csn_id_coded', 'order_proc_id_coded', 'sens_organism_sid', 'organism']].fillna(9999, inplace=True)
df_combined = (df
.assign(example = lambda x: x.pat_enc_csn_id_coded.map(str) + ', ' + x.order_proc_id_coded.map(str) + ', ' + x.sens_organism_sid + ', ' + x.organism,
        antibiotic = lambda x: [combine_antibiotic_syns(a) for a in x.antibiotic])
.groupby(['example', 'antibiotic'])
.agg({'suscept' : lambda x: combine_labels(x)})          
.reset_index()
)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [15]:
df_combined.head()

Unnamed: 0,example,antibiotic,suscept
0,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",Ampicillin,Resistant
1,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",Cefazolin,Susceptible
2,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",Ciprofloxacin,Susceptible
3,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",ESBL confirmation test,Resistant
4,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",Gentamicin,Susceptible


In [16]:
filter_cols = ['example', 'pat_enc_csn_id_coded', 'organism', 'order_proc_id_coded', 'sens_organism_sid',
               'Cefepime', 'Ceftriaxone', 'Cefazolin', 'Vancomycin', 'Meropenem', 'Piperacillin/Tazobactam',
               'Ampicillin', 'Penicillin', 'Oxacillin']

df_wide = (df_combined
.pivot(index='example', columns='antibiotic', values='suscept')
.reset_index()
.assign(organism = lambda x: [a.split(', ')[3] for a in x.example],
        order_proc_id_coded = lambda x: [a.split(', ')[1] for a in x.example],
        sens_organism_sid = lambda x: [a.split(', ')[2] for a in x.example],
        pat_enc_csn_id_coded = lambda x: [a.split(', ')[0] for a in x.example])
)[filter_cols]

In [17]:
df_wide.head(20)

antibiotic,example,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Cefepime,Ceftriaxone,Cefazolin,Vancomycin,Meropenem,Piperacillin/Tazobactam,Ampicillin,Penicillin,Oxacillin
0,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",131003775398,KLEBSIELLA OXYTOCA,357750415,ZZ00,,,Susceptible,,,Susceptible,Resistant,,
1,"131003775398, 357750417, ZZ00, KLEBSIELLA OXYTOCA",131003775398,KLEBSIELLA OXYTOCA,357750417,ZZ00,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Resistant,,
2,"131003775398, 357750417, ZZ01, PROTEUS MIRABILIS",131003775398,PROTEUS MIRABILIS,357750417,ZZ01,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Susceptible,,
3,"131004006894, 358490468, ZZ00, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ00,,Susceptible,,,,,,,
4,"131004006894, 358490468, ZZ01, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ01,,Susceptible,,,,,,Susceptible,
5,"131004006894, 358490468, ZZ02, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ02,,Susceptible,,,,,,,
6,"131004006894, 358490468, ZZ03, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ03,,Susceptible,,,,,,Susceptible,
7,"131004006894, 358490468, ZZ04, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ04,,Susceptible,,,,,,,
8,"131004006894, 358490468, ZZ05, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ05,,Susceptible,,,,,,Susceptible,
9,"131004013925, 358505019, ZZ00, ESCHERICHIA COLI",131004013925,ESCHERICHIA COLI,358505019,ZZ00,Susceptible,Susceptible,Susceptible,,,Resistant,Resistant,,


In [18]:
## Fills in abx suscept if same organism from same patient encounter and one suscept listed but not the other
cols = ['Cefepime', 'Ceftriaxone', 'Cefazolin', 'Vancomycin', 'Meropenem', 'Piperacillin/Tazobactam', 'Ampicillin', 'Penicillin', 'Oxacillin']
df_wide[cols] = (df_wide
.groupby(['pat_enc_csn_id_coded', 'organism'])[cols]
.transform(lambda x: x.ffill().bfill())
)

### Find bugs that are never listed as Susceptible or Resistant to different antibiotics

In [19]:
(df_wide
.rename(columns={'Piperacillin/Tazobactam' : "Zosyn"})
.groupby('organism')
.agg({'pat_enc_csn_id_coded' : 'nunique',
      'Cefazolin' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Ceftriaxone' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Cefepime' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Vancomycin' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Meropenem' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Zosyn' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Ampicillin' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Penicillin' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      'Oxacillin' : lambda x: 1 if any(x=="Susceptible") or any(x=='Resistant') else 0,
      })
.reset_index()
.query("Meropenem == 0")
.sort_values('pat_enc_csn_id_coded', ascending=False)
).to_csv('Mero_never_listed.csv', index=None)

In [20]:
len(df_wide
.query("organism.str.contains('STAPHYLOCOCCUS AUREUS') and not Cefazolin.isna()", engine='python')
)

68

### Read bacteria AST rules

In [21]:
df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
df_bug_rules.head()

Unnamed: 0,Organism,Num Isolated in Cohort,cefazolin_r,cefazolin_s,Cefazolin Refer To Penicillin,ceftriaxone_r,ceftriaxone_s,cefepime_r,cefepime_s,zosyn_r,zosyn_s,vancomycin_r,vancomycin_s,meropenem_r,meropenem_s
0,ABIOTROPHIA SPECIES (OR GRANULICATELLA SPECIES),1,,,,,,,,,,,,,
1,ACHROMOBACTER XYLOSOXIDANS,9,,,,,,,,,,,,,
2,ACINETOBACTER BAUMANNII,7,,,,,,,,,,1.0,,,
3,ACINETOBACTER BAUMANNII COMPLEX,7,,,,,,,,,,1.0,,,
4,ACINETOBACTER LWOFFI,9,,,,,,,,,,1.0,,,


In [None]:
import pdb
def fill_in_cefazolin(x):
    
    # If not missing just return what it is. 
    if x.Cefazolin == 'Susceptible' or x.Cefazolin == 'Resistant':
        return x.Cefazolin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
    r_bugs = set(df_bug_rules.query("cefazolin_r == 1")['Organism'].values)
    s_bugs = set(df_bug_rules.query("cefazolin_s == 1")['Organism'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Uses hand labelled always susceptible bugs
    if x.organism in s_bugs:
        return "Susceptible"

    # For anything STREPTOCOCCUS related except STREPTOCOCCUS PNEUMONIAE  - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism) and x.organism != 'STREPTOCOCCUS PNEUMONIAE':
        if x.Penicillin is not None:
            return x.Penicillin
    
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If anything resistant to Cefepime or Ceftriaxone, assume resistant to Cefazolin
    if x.Cefepime == "Resistant" or x.Ceftriaxone == 'Resistant':
        return "Resistant"
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Not Filled In"

def fill_in_ceftriaxone(x):
    
    # If not missing just return what it is. 
    if x.Ceftriaxone == 'Susceptible' or x.Ceftriaxone == 'Resistant':
        return x.Ceftriaxone
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
    r_bugs = set(df_bug_rules.query("ceftriaxone_r == 1")['Organism'].values)
    s_bugs = set(df_bug_rules.query("ceftriaxone_s == 1")['Organism'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Uses hand labelled always susceptible bugs
    if x.organism in s_bugs:
        return "Susceptible"

    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 

    # If anything resistant to Cefepime, assume resistant to Ceftriaxone
    if x.Cefepime == "Resistant":
        return "Resistant"
    
    # If susceptible to Cefazolin, assume susceptible to Ceftriaxone
    if x.Cefazolin == "Susceptible":
        return "Resistant"
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Not Filled In"

def fill_in_cefepime(x):
    
    # If not missing just return what it is. 
    if x.Cefepime == 'Susceptible' or x.Cefepime == 'Resistant':
        return x.Cefepime
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
    r_bugs = set(df_bug_rules.query("cefepime_r == 1")['Organism'].values)
    s_bugs = set(df_bug_rules.query("cefepime_s == 1")['Organism'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Uses hand labelled always susceptible bugs
    if x.organism in s_bugs:
        return "Susceptible"

    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
    
    # If susceptible to Cefazolin or Ceftriaxone assume susceptible to Cefepime
    if x.Cefazolin == "Susceptible" or x.Ceftriaxone == "Susceptible":
        return "Susceptible"
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Not Filled In"

def fill_in_zosyn(x):
     
    # If not missing just return what it is. 
    if x.Zosyn == 'Susceptible' or x.Zosyn == 'Resistant':
        return x.Zosyn
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
    r_bugs = set(df_bug_rules.query("zosyn_r == 1")['Organism'].values)
    s_bugs = set(df_bug_rules.query("zosyn_s == 1")['Organism'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Uses hand labelled always susceptible bugs
    if x.organism in s_bugs:
        return "Susceptible"
    
    # For anything STREPTOCOCCUS related - refer to penicillin 
    if ('STREPTOCOCCUS' in x.organism or 'STREPTOCOCCI' in x.organism):
        if x.Penicillin is not None:
            return x.Penicillin
        
    # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If susceptible to ampicillin, then zosyn susceptible
    if x.Ampicillin == "Susceptible":
        return x.Ampicillin
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Not Filled In"


def fill_in_vancomycin(x):
    # If not missing just return what it is. 
    if x.Vancomycin == 'Susceptible' or x.Vancomycin == 'Resistant':
        return x.Vancomycin
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
    r_bugs = set(df_bug_rules.query("vancomycin_r == 1")['Organism'].values) # Want to make sure I catch all gram negative bugs in here
    s_bugs = set(df_bug_rules.query("vancomycin_s == 1")['Organism'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Uses hand labelled always susceptible bugs
    if x.organism in s_bugs:
        return "Susceptible"
    
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Not Filled In"

def fill_in_meropenem(x):
    # If not missing just return what it is. 
    if x.Meropenem == 'Susceptible' or x.Meropenem == 'Resistant':
        return x.Meropenem
    
    # Read in bug rules
    df_bug_rules = pd.read_csv('Bacteria_AST_Rules.csv')
    r_bugs = set(df_bug_rules.query("meropenem_r == 1")['Organism'].values) # Want to make sure I catch all gram negative bugs in here
    s_bugs = set(df_bug_rules.query("meropenem_s == 1")['Organism'].values)
    
    # Uses hand labelled list of resistant bugs
    if x.organism in r_bugs:
        return "Resistant"
    
    # Uses hand labelled always susceptible bugs
    if x.organism in s_bugs:
        return "Susceptible"
    
     # Check For MRSA in STAPH AUREUS 
    if x.organism == 'STAPHYLOCOCCUS AUREUS':
        if x.Oxacillin is not None:
            return x.Oxacillin 
        
    # If susceptible to ampicillin, then meropenem susceptible
    if x.Ampicillin == "Susceptible": # not the case with Enteroccocus, but this should be in alwasys resistant list above. 
        return x.Ampicillin
    
    # Will apply Nancy Watz Rule Later (assume susc if not listed)
    return "Not Filled In"

df_wide_temp = (df_wide
.rename(columns={'Piperacillin/Tazobactam' : "Zosyn"})
.assign(Cefazolin=lambda x: df_wide.apply(lambda x: fill_in_cefazolin(x), axis=1),
        Ceftriaxone=lambda x: df_wide.apply(lambda x: fill_in_ceftriaxone(x), axis=1),
        Cefepime=lambda x: df_wide.apply(lambda x: fill_in_cefepime(x), axis=1),
        Zosyn=lambda x: df_wide.apply(lambda x: fill_in_zosyn(x), axis=1),
        Vancomycin=lambda x: df_wide.apply(lambda x: fill_in_vancomycin(x), axis=1),
        Meropenem=lambda x: df_wide.apply(lambda x: fill_in_meropenem(x), axis=1))
)

In [None]:
df_wide_temp.head(20)

In [50]:
def fill_ceftriaxone(x):
    if x.Cefazolin == "Susceptible":
        return "Susceptible"
    elif: x.organism == "PSEUDOMONAS AERUGINOSA":
        return "Resistant"
    else:
        return x.Ceftriaxone

(df_wide
.assign(Ceftriaxone = df_wide.apply(lambda x: fill_ceftriaxone(x), axis=1)) #['Susceptible' if cef == 'Susceptible' or ceft == 'Susceptible' else "Resistant" for cef, ceft in zip(x.Cefazolin, x.Ceftriaxone)])
.assign(Cefepime = lambda x: ['Susceptible' if ceft == 'Susceptible' or cef ==  "Susceptible" else "Resistant" for ceft, cef in zip(x.Ceftriaxone, x.Cefepime)])
# .assign(Ceftriaxone = lambda x: )
).head(12)

antibiotic,example,pat_enc_csn_id_coded,organism,order_proc_id_coded,sens_organism_sid,Cefepime,Ceftriaxone,Cefazolin,Vancomycin,Meropenem,Piperacillin/Tazobactam,Ampicillin,Penicillin,Oxacillin
0,"131003775398, 357750415, ZZ00, KLEBSIELLA OXYTOCA",131003775398,KLEBSIELLA OXYTOCA,357750415,ZZ00,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Resistant,,
1,"131003775398, 357750417, ZZ00, KLEBSIELLA OXYTOCA",131003775398,KLEBSIELLA OXYTOCA,357750417,ZZ00,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Resistant,,
2,"131003775398, 357750417, ZZ01, PROTEUS MIRABILIS",131003775398,PROTEUS MIRABILIS,357750417,ZZ01,Susceptible,Susceptible,Susceptible,,Susceptible,Susceptible,Susceptible,,
3,"131004006894, 358490468, ZZ00, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ00,Susceptible,Susceptible,,,,,,Susceptible,
4,"131004006894, 358490468, ZZ01, STREPTOCOCCUS A...",131004006894,STREPTOCOCCUS ANGINOSUS GROUP,358490468,ZZ01,Susceptible,Susceptible,,,,,,Susceptible,
5,"131004006894, 358490468, ZZ02, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ02,Susceptible,Susceptible,,,,,,Susceptible,
6,"131004006894, 358490468, ZZ03, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ03,Susceptible,Susceptible,,,,,,Susceptible,
7,"131004006894, 358490468, ZZ04, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ04,Susceptible,Susceptible,,,,,,Susceptible,
8,"131004006894, 358490468, ZZ05, STREPTOCOCCUS C...",131004006894,STREPTOCOCCUS CONSTELLATUS,358490468,ZZ05,Susceptible,Susceptible,,,,,,Susceptible,
9,"131004013925, 358505019, ZZ00, ESCHERICHIA COLI",131004013925,ESCHERICHIA COLI,358505019,ZZ00,Susceptible,Susceptible,Susceptible,,,Resistant,Resistant,,


In [32]:
df_wide[cols].isna().sum()

antibiotic
Cefepime                    7777
Ceftriaxone                 5227
Cefazolin                   5302
Vancomycin                  9658
Meropenem                   6089
Piperacillin/Tazobactam     5131
Ampicillin                  3548
Penicillin                  9745
Oxacillin                  11728
dtype: int64

In [55]:
df_wide.groupby('organism').agg({'pat_enc_csn_id_coded' : 'nunique'}).sort_values('pat_enc_csn_id_coded', ascending=False).to_csv('organisms.csv')

### Fill In Rules

In [None]:
# If same CSN, order_proc_id-

In [74]:
len(df[df[['example', 'antibiotic', 'suscept']].duplicated()])

84

In [29]:
df.pivot(index='example',
         columns='antibiotic',
         values='suscept').head()

ValueError: Index contains duplicate entries, cannot reshape