This is a list of antimicrobial medicines. 

In [14]:
from ebmdatalab import bq
import os
import pandas as pd
import numpy as np

#mapping = pd.read_csv('../lib/ingredients.csv')
#cats = pd.read_csv('../lib/categories.csv', header = None, names=["category_code","category"])

#mapping.groupby('product_id')["category_id"].nunique()
#mapping['product_id'].nunique()

#mapping = mapping.join(cats, on='product_id', how="left")



## Antimicrobials in BNF

In [15]:
sql = '''
WITH bnf_codes AS (  
  SELECT presentation_code as bnf_code, chemical 
  FROM hscic.bnf 
  WHERE 
  (presentation_code LIKE '0501%')
) 
  
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, 
  route.descr as route
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing
LEFT JOIN dmd.droute on vmp.id = droute.vmp	
LEFT JOIN dmd.route on route.cd = droute.route

WHERE bnf_code IN (SELECT bnf_code FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, amp.id, bnf_code, amp.descr as nm, ing.nm as ingredient,
  route.descr as route
FROM dmd.amp
INNER JOIN dmd.vpi AS vpi ON amp.vmp=vpi.vmp
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing
LEFT JOIN dmd.droute on amp.vmp = droute.vmp	
LEFT JOIN dmd.route on route.cd = droute.route

WHERE bnf_code IN (SELECT bnf_code FROM bnf_codes)

ORDER BY type, nm  '''

antibac_meds = bq.cached_read(sql, csv_path=os.path.join('..','data','antibac_meds.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
antibac_meds[["id", "bnf_code"]].count()

antibac_meds["source"] = "bnf"

In [16]:
#antibac_meds.merge(mapping, left_on='id', right_on='product_id', how='left')


## Dm+d Additions
There are some medicines used in hospital only, which don't have BNF codes (used for primary care). 

We will manually select these from dmd using the ingredients found in the products we identified from BNF codes.

In [17]:
  
# set name of codelist for exporting file
codelist_name="antibac_meds"

# import or paste list of vtms (Virtual Therapeutic Moieties / Ingredients) by name
names = antibac_meds["ingredient"].drop_duplicates()
print("No of antibiotic ingredients from bnf list: ", len(names))

# filter out ingredients identified as not being antimicrobial agents
names_filtered = [k for k in names if k not in ["Citric acid", "Thalidomide", "Sodium bicarbonate", "Sodium citrate"]]
print("No of antibiotic ingredients after filtering: ", len(names_filtered))

# convert list to tuple for use in SQL query
names_tuple = tuple(names_filtered)
if len(names_tuple)==1:
    # remove comma if only one item
    names_tuple = str(names_tuple).replace(",","")

sql = f'''
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, 
    route.descr as route, ddd.ddd, ddd.ddd_uomcd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.droute on vmp.id = droute.vmp
LEFT JOIN dmd.route on route.cd = droute.route
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid

UNION ALL

SELECT "amp" AS type, amp.id, bnf_code, amp.descr as nm, ing.nm as ingredient, 
    route.descr as route, ddd.ddd, ddd.ddd_uomcd
FROM dmd.amp
INNER JOIN dmd.vpi AS vpi ON amp.vmp=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.droute on amp.vmp = droute.vmp
LEFT JOIN dmd.route on route.cd = droute.route
LEFT JOIN dmd.ddd on amp.vmp=ddd.vpid

ORDER BY type, nm  '''

dmd_antibac_meds = bq.cached_read(sql, csv_path=os.path.join('..','data',f'dmd_{codelist_name}.csv'))

dmd_antibac_meds[["id", "bnf_code"]].count()
    
print("No of antibiotic ingredients in dmd list: ", dmd_antibac_meds["ingredient"].nunique())

No of antibiotic ingredients from bnf list:  122
No of antibiotic ingredients after filtering:  118
No of antibiotic ingredients in dmd list:  118


**Note the above should have captured all the codes we previously found by selecting based on BNF codes so we don't need to join the tables.**

### Investigate routes of administration

In [18]:
dmd_antibac_meds.groupby("route")["id"].count().sort_values(ascending=False)

route
Oral                                      3281
Intravenous                                797
Intramuscular                              318
Ocular                                     217
Cutaneous                                  210
Auricular                                   85
Route of administration not applicable      60
Inhalation                                  59
Vaginal                                     46
Intrapleural                                29
Gastroenteral                               29
Intraarticular                              27
Intrathecal                                 17
Rectal                                      12
Intracameral                                11
Intraperitoneal                              9
Intravitreal                                 8
Intralesional                                6
Nasal                                        5
Oromucosal                                   4
Gingival                                     4
Intrace

In [19]:
# categorise routes of administration

condlist = [dmd_antibac_meds["route"]== "Oral",
            dmd_antibac_meds["route"].isin(["Intravenous", "Subcutaneous", "Intramuscular"]),
            dmd_antibac_meds["route"].isin(["Cutaneous", "Auricular","Vaginal","Intralesional",
                                        "Nasal", "Oromucosal", "Gingival"])]
choicelist = ["Oral", "Injectable", "Topical"]
dmd_antibac_meds["Route"] = np.select(condlist, choicelist, default="Other")

print(dmd_antibac_meds[["Route", "route"]].drop_duplicates().sort_values(by="Route").head())

dmd_antibac_meds = dmd_antibac_meds.drop("route", axis=1)

           Route          route
3822  Injectable   Subcutaneous
19    Injectable  Intramuscular
20    Injectable    Intravenous
2           Oral           Oral
3994       Other  Gastroenteral


In [20]:
# check number of distinct medicines
print(dmd_antibac_meds["nm"].agg({"count", "nunique"}))
dmd_antibac_meds.groupby("type")["nm"].agg({"count", "nunique"})

nunique    4351
count      5249
Name: nm, dtype: int64


Unnamed: 0_level_0,nunique,count
type,Unnamed: 1_level_1,Unnamed: 2_level_1
amp,3454,4204
vmp,897,1045


In [21]:
# check how many products have DDDs
dmd_antibac_meds["ddd_flag"] = np.where(dmd_antibac_meds["ddd"].notnull(), 1, 0)
print(dmd_antibac_meds.groupby(["type"])["ddd_flag"].agg({"sum","count"}))
print(dmd_antibac_meds.groupby(["Route"])["ddd_flag"].agg({"sum","count"}))


      count   sum
type             
amp    4204  3419
vmp    1045   731
            count   sum
Route                  
Injectable   1117   946
Oral         3281  3029
Other         491   165
Topical       360    10


In [23]:
dmd_antibac_meds.to_csv(os.path.join('..','data','antibac_codelist.csv')) #export to csv here
