### DMD Ingredient->Product Lookup

Get a list of all VMPs (Virtual Medicinal Products) and AMPs (Actual Medicinal Products) from a given list of ingredients (VTMs = Virtual Therapeutic Moieties)

In [1]:
# set name of codelist for exporting file
codelist_name="rheumatology_meds"


# import or paste list of vtms (Virtual Therapeutic Moieties / Ingredients) by name
names = ['Adalimumab', 'Etanercept', 'Certolizumab', 'Infliximab', 'Golimumab','Rituximab',
        'Tocilizumab','Sarilumab','Tofacitinib','Baricitinib','Upadacitinib','Filgotinib',
        'Abatacept','Ipilimumab','Nivolumab','Pembrolizumab']


from ebmdatalab import bq
import os
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)



## Select products from DMD containing chosen ingredients

In [2]:
# convert list to tuple for use in SQL query
names_tuple = tuple(names)
if len(names_tuple)==1:
    # remove comma if only one item
    names_tuple = str(names_tuple).replace(",","")

sql = f'''
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid

ORDER BY type, nm  '''

meds = bq.cached_read(sql, csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

meds.count()

type          54
id            54
bnf_code      48
nm            54
ingredient    54
ddd           38
dtype: int64

#### Add wildcards to all ingredients and append any results to csv output

In [6]:
#Create a tuple containing all ingredients with wildcards (%) to pull back ingredients 
# with prefix and suffix words

append_suff = '%\''
append_pref = '\'%'

wc_ings =  [append_pref + i + append_suff for i in names]


wc_ing_tup = tuple(wc_ings)

wc_ing_tup

("'%Adalimumab%'",
 "'%Etanercept%'",
 "'%Certolizumab%'",
 "'%Infliximab%'",
 "'%Golimumab%'",
 "'%Rituximab%'",
 "'%Tocilizumab%'",
 "'%Sarilumab%'",
 "'%Tofacitinib%'",
 "'%Baricitinib%'",
 "'%Upadacitinib%'",
 "'%Filgotinib%'",
 "'%Abatacept%'",
 "'%Ipilimumab%'",
 "'%Nivolumab%'",
 "'%Pembrolizumab%'")

In [7]:
# Create a variable for each of the missing ingredients making sure there are the matching number of 
# ingredients to test variables, ie/ 4 ingredients will require 0,1,2,3 to be called from the 

WC_ing_0 = str(wc_ing_tup[0])
WC_ing_1 = str(wc_ing_tup[1])
WC_ing_2 = str(wc_ing_tup[2])
WC_ing_3 = str(wc_ing_tup[3])
WC_ing_4 = str(wc_ing_tup[4])
WC_ing_5 = str(wc_ing_tup[5])
WC_ing_6 = str(wc_ing_tup[6])
WC_ing_7 = str(wc_ing_tup[7])
WC_ing_8 = str(wc_ing_tup[8])
WC_ing_9 = str(wc_ing_tup[9])
WC_ing_10 = str(wc_ing_tup[10])
WC_ing_11 = str(wc_ing_tup[11])
WC_ing_12 = str(wc_ing_tup[12])
WC_ing_13 = str(wc_ing_tup[13])
WC_ing_14 = str(wc_ing_tup[14])
WC_ing_15 = str(wc_ing_tup[15])

WC_missing_ing_sql = f'''
SELECT *
FROM dmd.ing
where ing.nm like {WC_ing_0}
    or ing.nm like {WC_ing_1}
    or ing.nm like {WC_ing_2}
    or ing.nm like {WC_ing_3}
    or ing.nm like {WC_ing_4}
    or ing.nm like {WC_ing_5}
    or ing.nm like {WC_ing_6}
    or ing.nm like {WC_ing_7}
    or ing.nm like {WC_ing_8}
    or ing.nm like {WC_ing_9}
    or ing.nm like {WC_ing_10}
    or ing.nm like {WC_ing_11}
    or ing.nm like {WC_ing_12}
    or ing.nm like {WC_ing_13}
    or ing.nm like {WC_ing_14}
    or ing.nm like {WC_ing_15}


ORDER BY nm  '''
WC_missing_ing_tbl = bq.cached_read(WC_missing_ing_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

WC_missing_ing_tbl

Unnamed: 0,id,isiddt,isidprev,invalid,nm
0,421777009,NaT,,False,Abatacept
1,407317001,2005-07-27,5015611000000000.0,False,Adalimumab
2,726518002,2017-10-06,3.430661e+16,False,Baricitinib
3,430306004,NaT,,False,Certolizumab pegol
4,387045004,2005-07-29,4173811000000000.0,False,Etanercept
5,39002911000001106,NaT,,False,Filgotinib
6,39039711000001109,NaT,,False,Filgotinib maleate
7,442435002,NaT,,False,Golimumab
8,386891004,2005-08-01,4428711000000000.0,False,Infliximab
9,697995005,2018-01-25,1.937301e+16,False,Ipilimumab


#### Create final list including wildcard ingredient names for ingredients with no exact matches

In [8]:

sql_incl_wc_match = f'''
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid

UNION DISTINCT

SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing 
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid
WHERE ing.nm LIKE {WC_ing_0}
    OR ing.nm LIKE {WC_ing_1}
    OR ing.nm LIKE {WC_ing_2}
    OR ing.nm LIKE {WC_ing_3}

  '''

meds_incl_wc_match = bq.cached_read(sql_incl_wc_match
            , csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

meds_incl_wc_match            


Unnamed: 0,type,id,bnf_code,nm,ingredient,ddd
0,vmp,30033011000001108,0801050CAAAAAAA,Pembrolizumab 50mg powder for solution for infusion vials,Pembrolizumab,
1,vmp,11762011000001101,1001030V0AAAAAA,Abatacept 250mg powder for solution for infusion vials,Abatacept,27.0
2,vmp,36065611000001101,1001030D0AAAAAA,Etanercept 25mg powder and solvent for solution for injection vials,Etanercept,7.0
3,vmp,36065711000001105,1001030D0AAABAB,Etanercept 50mg powder and solvent for solution for injection vials,Etanercept,7.0
4,vmp,19946811000001101,1001030D0AAAFAF,Etanercept 10mg powder and solvent for solution for injection vials,Etanercept,7.0
5,vmp,34346011000001104,1001030ACAAAAAA,Baricitinib 4mg tablets,Baricitinib,4.0
6,vmp,34625211000001109,1001030ACAAABAB,Baricitinib 2mg tablets,Baricitinib,4.0
7,vmp,400687000,1001030T0AAAAAA,Infliximab 100mg powder for solution for infusion vials,Infliximab,3.75
8,vmp,35894411000001100,1001030S0AAAAAA,Adalimumab 40mg/0.8ml solution for injection pre-filled syringes,Adalimumab,2.9
9,vmp,11236911000001103,1001030S0AAABAB,Adalimumab 40mg/0.8ml solution for injection pre-filled disposable devices,Adalimumab,2.9


### Results

In [9]:
# How many products have DDDs using exact match
print(meds[["ddd", "id"]].count())

# How many products have DDDs using wildcard matching
print(meds_incl_wc_match[["ddd", "id"]].count())

ddd    38
id     54
dtype: int64
ddd    41
id     57
dtype: int64


In [None]:
#Exact and non exact (including wildcards) ingredient name matches
meds_incl_wc_match

In [None]:
#Exact ingredient name match only
meds