In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("top_bugs.csv")
df.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,index_time,organism,department_name
0,JCd497d7,131239550685,2017-10-08 22:29:00 UTC,ESCHERICHIA COLI,PEDIATRIC EMERGENCY DEPARTMENT
1,JCd3b410,131246515333,2018-01-23 04:37:00 UTC,ESCHERICHIA COLI,PEDIATRIC EMERGENCY DEPARTMENT
2,JCd3b410,131246515333,2018-01-23 04:37:00 UTC,STREPTOCOCCUS ANGINOSUS GROUP,PEDIATRIC EMERGENCY DEPARTMENT
3,JCe1e328,131247746590,2018-02-19 06:21:00 UTC,ESCHERICHIA COLI,PEDIATRIC EMERGENCY DEPARTMENT
4,JCd01d34,131248224581,2018-02-12 06:10:00 UTC,STREPTOCOCCUS MITIS GROUP,PEDIATRIC EMERGENCY DEPARTMENT


In [3]:
### Filter out coag neg staph and rename MRSA to Staph Aureus
df = (df
    .query("organism != 'COAG NEGATIVE STAPHYLOCOCCUS'")
    .assign(organism = lambda x: [a if a != 'STAPH AUREUS {MRSA}' else 'STAPHYLOCOCCUS AUREUS'
                                 for a in x.organism])
)

In [9]:
### Get Top 10 bugs by department name where we count by number of encounters bug grew. 
df_stanford = (df
    .query('not department_name.str.contains("VCP")', engine='python')
    .groupby('organism')
    .agg({'pat_enc_csn_id_coded' : 'nunique'})
    .reset_index()
    .rename(columns={'organism' : 'Organism', 'pat_enc_csn_id_coded' : "Count"})
    .sort_values('Count', ascending=False)
    .head(10)
)


df_valley_care = (df
    .query('department_name.str.contains("VCP")', engine='python')
    .groupby('organism')
    .agg({'pat_enc_csn_id_coded' : 'nunique'})
    .reset_index()
    .rename(columns={'organism' : 'Organism', 'pat_enc_csn_id_coded' : "Count"})
    .sort_values('Count', ascending=False)
    .head(10)
)
    

In [10]:
df_stanford

Unnamed: 0,Organism,Count
79,ESCHERICHIA COLI,3474
89,KLEBSIELLA PNEUMONIAE,928
78,ENTEROCOCCUS SPECIES,884
133,STAPHYLOCOCCUS AUREUS,739
114,PSEUDOMONAS AERUGINOSA,368
109,PROTEUS MIRABILIS,341
73,ENTEROCOCCUS FAECALIS,214
88,KLEBSIELLA OXYTOCA,159
137,STREPTOCOCCUS AGALACTIAE (GROUP B),147
138,STREPTOCOCCUS AGALACTIAE {GROUP B},117


In [11]:
df_valley_care

Unnamed: 0,Organism,Count
14,ESCHERICHIA COLI,406
17,KLEBSIELLA PNEUMONIAE,108
23,PROTEUS MIRABILIS,95
35,STAPHYLOCOCCUS AUREUS,76
12,ENTEROCOCCUS FAECALIS,69
28,PSEUDOMONAS AERUGINOSA,50
19,MORGANELLA MORGANII,19
10,ENTEROBACTER CLOACAE COMPLEX,19
16,KLEBSIELLA OXYTOCA,16
39,STREPTOCOCCUS ANGINOSUS GROUP,12


In [12]:
df_valley_care.columns = pd.MultiIndex.from_product([['Valley Care ED'], df_valley_care.columns])
df_stanford.columns = pd.MultiIndex.from_product([['Stanford ED'], df_stanford.columns])

df_bugs = pd.concat([df_valley_care.reset_index(drop=True), df_stanford.reset_index(drop=True)], axis=1)

In [13]:
df_bugs

Unnamed: 0_level_0,Valley Care ED,Valley Care ED,Stanford ED,Stanford ED
Unnamed: 0_level_1,Organism,Count,Organism,Count
0,ESCHERICHIA COLI,406,ESCHERICHIA COLI,3474
1,KLEBSIELLA PNEUMONIAE,108,KLEBSIELLA PNEUMONIAE,928
2,PROTEUS MIRABILIS,95,ENTEROCOCCUS SPECIES,884
3,STAPHYLOCOCCUS AUREUS,76,STAPHYLOCOCCUS AUREUS,739
4,ENTEROCOCCUS FAECALIS,69,PSEUDOMONAS AERUGINOSA,368
5,PSEUDOMONAS AERUGINOSA,50,PROTEUS MIRABILIS,341
6,MORGANELLA MORGANII,19,ENTEROCOCCUS FAECALIS,214
7,ENTEROBACTER CLOACAE COMPLEX,19,KLEBSIELLA OXYTOCA,159
8,KLEBSIELLA OXYTOCA,16,STREPTOCOCCUS AGALACTIAE (GROUP B),147
9,STREPTOCOCCUS ANGINOSUS GROUP,12,STREPTOCOCCUS AGALACTIAE {GROUP B},117


In [14]:
df_bugs.to_html('table2_top_bugs.html', index=None)

In [15]:
df_bugs.to_csv('table2_top_bugs.csv', index=None)