In [1]:
import pandas as pd
from ebmdatalab import bq
import os
import matplotlib.pyplot as plt

# 2019/2021 WHO AWARE list comparison

In [5]:
qry = """
select * from `ebmdatalab.jonm.who_aware`
"""
df_2019 = bq.cached_read(qry, os.path.join('who_aware_2019.csv'))
qry = """
select * from `ebmdatalab.jonm.who_aware_2021`
"""
df_2021 = bq.cached_read(qry, os.path.join('who_aware_2021.csv'))

Downloading: 100%|██████████| 180/180 [00:00<00:00, 936.16rows/s]
Downloading: 100%|██████████| 258/258 [00:00<00:00, 1957.20rows/s]


In [6]:
df_2019['route'] = df_2019.antibiotic.apply(lambda x: 'IV' if "(iv)" in x.lower() else ("Oral" if "(oral)" in x.lower() else None)) 
df_2019['ingredient_count'] = df_2019.antibiotic.apply(lambda x: len(x.replace('-','/').split('/')))
df_2019.describe(include='all')

Unnamed: 0,antibiotic,class,atc,category,listed_on_eml_2019,route,ingredient_count
count,180,180,180,180,180,8,180.0
unique,180,34,173,4,2,2,
top,Omadacycline,Penicillins,to be assigned,Watch,False,Oral,
freq,1,22,5,106,141,4,
mean,,,,,,,1.077778
std,,,,,,,0.268569
min,,,,,,,1.0
25%,,,,,,,1.0
50%,,,,,,,1.0
75%,,,,,,,1.0


In [7]:
df_2021['route'] = df_2021.antibiotic.apply(lambda x: 'IV' if "(iv)" in x.lower() else ("Oral" if "(oral)" in x.lower() else None)) 
df_2021['ingredient_count'] = df_2021.antibiotic.apply(lambda x: len(x.replace('-','/').split('/')))
df_2021.describe(include='all')

Unnamed: 0,antibiotic,class,atc,category,listed_on_eml_2019,route,ingredient_count
count,258,258,258,258,258,24,258.0
unique,258,38,255,4,2,2,
top,Cefalotin,Penicillins,to be assigned,Watch,False,Oral,
freq,1,34,2,140,217,12,
mean,,,,,,,1.069767
std,,,,,,,0.270064
min,,,,,,,1.0
25%,,,,,,,1.0
50%,,,,,,,1.0
75%,,,,,,,1.0


## Antibiotics in 2019 list but not 2021 list

In [8]:
df_2019[~df_2019.antibiotic.isin(df_2021.antibiotic)]

Unnamed: 0,antibiotic,class,atc,category,listed_on_eml_2019,route,ingredient_count
21,Cefotiam hexetil,Second-generation cephalosporins,J01DC07,Watch,False,,1
66,Streptomycin,Aminoglycosides,J01GA01,Watch,False,,1
68,Kanamycin,Aminoglycosides,J01GB04,Watch,False,,1
69,Neomycin,Aminoglycosides,J01GB05,Watch,False,,1
98,Fusidic Acid,Steroid antibacterials,J01XC01,Watch,False,,1
101,Rifamycin,Rifamycins,J04AB03,Watch,False,,1
127,Amoxicillin/clavulanic Acid,Beta lactam - beta lactamase inhibitor,J01CR02,Access,True,,2
141,Sulfamethizole/trimethoprim,Trimethoprim - sulfonamide combinations,J01EB02,Access,False,,2
161,Ceftazidime-avibactam,Third-generation cephalosporins,J01DD52,Reserve,True,,2
163,Meropenem-vaborbactam,Carbapenems,J01DH52,Reserve,True,,2


* Cefotiam hexetil -> "Cefotiam"
* Streptomycin -> listed with separate Oral and IV routes
* Kanamycin -> listed with separate Oral and IV routes
* Neomycin -> listed with separate Oral and IV routes
* Fusidic Acid -> "Fusidic acid" NB lowercase "a"
* Rifamycin -> listed with separate Oral and IV routes
* Amoxicillin/clavulanic Acid -> "Amoxicillin/clavulanic acid" NB lowercase "a"
* Sulfamethizole/trimethoprim -> no longer listed in combo with TMP
* Ceftazidime-avibactam -> "Ceftazidime/avibactam"
* Meropenem-vaborbactam ->"Meropenem/vaborbactam"
* Ceftolozane-tazobactam -> "Ceftolozane/tazobactam"
* Dalfopristin-quinupristin -> "Dalfopristin/quinupristin"
* Colistin -> listed with separate Oral and IV routes
* Polymyxin B -> listed with separate Oral and IV routes


## Antibiotics in 2021 list but not 2019 list

In [9]:
pd.set_option('display.max_rows', 500)
display(df_2021[~df_2021.antibiotic.isin(df_2019.antibiotic)])

Unnamed: 0,antibiotic,class,atc,category,listed_on_eml_2019,route,ingredient_count
1,Troleandomycin,Macrolides,J01FA08,Watch,False,,1
2,Rokitamycin,Macrolides,J01FA12,Watch,False,,1
4,Fidaxomicin,Macrolides,A07AA12,Watch,False,,1
9,Flurithromycin,Macrolides,J01FA14,Watch,False,,1
11,Miocamycin,Macrolides,J01FA11,Watch,False,,1
15,Solithromycin,Macrolides,J01FA16,Watch,False,,1
16,Pipemidic acid,Quinolones,J01MB04,Watch,False,,1
17,Cinoxacin,Quinolones,J01MB06,Watch,False,,1
19,Nemonoxacin,Quinolones,J01MB08,Watch,False,,1
20,Oxolinic acid,Quinolones,J01MB05,Watch,False,,1


## Routes of administration

In [34]:
df_2019.assign(ab_no_route=df_2019.antibiotic.str.replace('\(\w*\)','')).groupby('ab_no_route').size().reset_index().groupby(0).size()

0
1    172
2      4
dtype: int64

In [35]:
df_2021.assign(ab_no_route=df_2021.antibiotic.str.replace('\(\w*\)','')).groupby('ab_no_route').size().reset_index().groupby(0).size()

0
1    234
2     12
dtype: int64

## Numbers of ingredients

In [37]:
df_2019.groupby('ingredient_count').size()

ingredient_count
1    166
2     14
dtype: int64

In [38]:
df_2021.groupby('ingredient_count').size()

ingredient_count
1    241
2     16
3      1
dtype: int64

## Matched VMPs

In [39]:
qry = "select * from `ebmdatalab.jonm.who_aware_vmp`"
df_vmp_2019 = bq.cached_read(qry, os.path.join('who_aware_vmp_2019.csv'))
qry = "select * from `ebmdatalab.jonm.who_aware_vmp_2021`"
df_vmp_2021 = bq.cached_read(qry, os.path.join('who_aware_vmp_2021.csv'))

Downloading: 100%|██████████| 723/723 [00:00<00:00, 3037.57rows/s]
Downloading: 100%|██████████| 705/705 [00:00<00:00, 3662.79rows/s]


In [42]:
df_vmp_2019.describe(include='all')

Unnamed: 0,vmp,route,ing_nm_1,ing_nm_2,atc,category,class,listed_on_eml_2019
count,723.0,50.0,723,38,723,723,723,723
unique,,,85,8,90,4,31,2
top,,,Nitrofurantoin,clavulanic Acid,J01XE01,Access,Aminoglycosides,True
freq,,,86,12,86,337,126,430
mean,1.622628e+16,35035810.0,,,,,,
std,1.409722e+16,10383400.0,,,,,,
min,134561000.0,26643010.0,,,,,,
25%,1696806000000000.0,26643010.0,,,,,,
50%,1.293391e+16,26643010.0,,,,,,
75%,3.274976e+16,47625010.0,,,,,,


In [43]:
df_vmp_2021.describe(include='all')

Unnamed: 0,vmp,route,ing_nm_1,ing_nm_2,ing_nm_3,atc,category,class,listed_on_eml_2019
count,705.0,62.0,705,45,0.0,705,705,705,705
unique,,,92,9,0.0,97,4,32,2
top,,,Nitrofurantoin,clavulanic acid,,J01XE01,Access,Aminoglycosides,True
freq,,,86,12,,86,349,93,429
mean,1.679172e+16,33411390.0,,,,,,,
std,1.402797e+16,9888395.0,,,,,,,
min,134561000.0,26643010.0,,,,,,,
25%,4145011000000000.0,26643010.0,,,,,,,
50%,1.293531e+16,26643010.0,,,,,,,
75%,3.315891e+16,47625010.0,,,,,,,


## Matched VMPs in 2019 list but not 2021 list

In [44]:
df_vmp_2019[~df_vmp_2019.vmp.isin(df_vmp_2021.vmp)]

Unnamed: 0,vmp,route,ing_nm_1,ing_nm_2,atc,category,class,listed_on_eml_2019
12,3550411000001106,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
28,330951005,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
43,3405211000001103,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
44,3788411000001100,,Polymyxin B,,J01XB02,Reserve,Polymyxins,True
82,3405411000001104,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
93,331851002,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
102,330352007,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
128,407831007,,Neomycin,,J01GB05,Watch,Aminoglycosides,False
129,3861711000001101,,Polymyxin B,,J01XB02,Reserve,Polymyxins,True
140,4009711000001102,,Polymyxin B,,J01XB02,Reserve,Polymyxins,True


## Matched VMPs in 2021 list but not 2019 list

In [45]:
df_vmp_2021[~df_vmp_2021.vmp.isin(df_vmp_2019.vmp)]

Unnamed: 0,vmp,route,ing_nm_1,ing_nm_2,ing_nm_3,atc,category,class,listed_on_eml_2019
23,324554008,26643006.0,Tinidazole,,,P01AB02,Access,Imidazoles,False
40,32785511000001100,,Demeclocycline,,,J01AA01,Watch,Tetracyclines,False
79,15474111000001108,,Sulfapyridine,,,J01EB04,Access,Sulfonamides,False
106,703666002,,Fidaxomicin,,,A07AA12,Watch,Macrolides,False
139,13454811000001105,,Sulfadiazine,tetroxoprim,,J01EE06,Access,Trimethoprim - sulfonamide combinations,False
194,15374811000001105,,Sulfadiazine,tetroxoprim,,J01EE06,Access,Trimethoprim - sulfonamide combinations,False
205,371595007,,Demeclocycline,,,J01AA01,Watch,Tetracyclines,False
207,11713511000001103,,Demeclocycline,,,J01AA01,Watch,Tetracyclines,False
219,11713411000001102,,Demeclocycline,,,J01AA01,Watch,Tetracyclines,False
286,13454711000001102,,Sulfadiazine,tetroxoprim,,J01EE06,Access,Trimethoprim - sulfonamide combinations,False


## Changes in formatting

There were significant changes in the formatting of both the `antibiotic` and `class` fields in the 2021 dataset vs the 2019 dataset. The 2021 data were standardised to match the 2019 conventions and any inconsistent presentations resolved.
* Routes of administration represented by a '_IV' or '_oral' suffix in 2021 vs ' (IV)' or ' (oral)' in 2019
* Salts/alternative forms of base drugs represented as hyphenated pairs rather than two separate words
* Trailing spaces in `antibiotic` and `class` fields e.g. both 'Penicilins' and 'Penicillins ' present in source data
* Changes to hyphenation in `class` e.g. 'First-generation cephalosporins' -> 'First-generation-cephalosporins' N.B: 
  * 'Fifth-generation cephalosporins' remained unchanged
  * 'Nitrofuran-derivatives' and 'Nitrofuran derivatives' both present in 2021 data
  * 'Phenol derivates' presented unhyphenated in 2021 data
* Combination classes format changed from '-' separation to '/' e.g. 'Beta lactam - beta lactamase inhibitor' -> 'Beta-lactam/beta-lactamase-inhibitor'
* Sub-classes with supplementary info changed from bracketed suffix to underscore suffix e.g. 'Beta lactam - beta lactamase inhibitor (anti-pseudomonal)'->'Beta-lactam/beta-lactamase-inhibitor_anti-pseudomonal'

This is additional to the new classes presented in the 2021 data and the removal of the 'Carboxypenicillins' class (drugs in the class now classified as 'Penicillins' in 2021)