### DMD Ingredient->Product Lookup

Get a list of all VMPs (Virtual Medicinal Products) and AMPs (Actual Medicinal Products) from a given list of ingredients (VTMs = Virtual Therapeutic Moieties)

In [None]:
# set name of codelist for exporting file
codelist_name="rheumatology_meds"


# import or paste list of vtms (Virtual Therapeutic Moieties / Ingredients) by name
names = ['Adalimumab', 'Etanercept', 'Certolizumab', 'Infliximab', 'Golimumab','Rituximab',
        'Tocilizumab','Sarilumab','Tofacitinib','Baricitinib','Upadacitinib','Filgotinib',
        'Abatacept','Ipilimumab','Nivolumab','Pembrolizumab']


from ebmdatalab import bq
import os
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)



## Select products from DMD containing chosen ingredients

In [17]:
# convert list to tuple for use in SQL query
names_tuple = tuple(names)
if len(names_tuple)==1:
    # remove comma if only one item
    names_tuple = str(names_tuple).replace(",","")

sql = f'''
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid

ORDER BY type, nm  '''

meds = bq.cached_read(sql, csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

meds.count()

type          54
id            54
bnf_code      48
nm            54
ingredient    54
ddd           38
dtype: int64

#### Check if any ingredients were not found

In [16]:
ings = list(meds.ingredient.drop_duplicates())
names.sort()
ings.sort()
print("Selected ingredients:", names)
print("Ingredients found:", ings)

missing_ings = [i for i in names if i not in ings]
print("Ingredients NOT found:", missing_ings)

Selected ingredients: ['Abatacept', 'Adalimumab', 'Baricitinib', 'Certolizumab', 'Etanercept', 'Filgotinib', 'Golimumab', 'Infliximab', 'Ipilimumab', 'Nivolumab', 'Pembrolizumab', 'Rituximab', 'Sarilumab', 'Tocilizumab', 'Tofacitinib', 'Upadacitinib']
Ingredients found: ['Abatacept', 'Adalimumab', 'Baricitinib', 'Etanercept', 'Golimumab', 'Infliximab', 'Ipilimumab', 'Nivolumab', 'Pembrolizumab', 'Rituximab', 'Sarilumab', 'Tocilizumab']
Ingredients NOT found: ['Certolizumab', 'Filgotinib', 'Tofacitinib', 'Upadacitinib']


#### Investigate ingredients not found using wildcards to find non-exact matches

In [None]:
#WORKING - search for missing ingredient name with wildcard, to look for names with suffix or prefix
# if present, add 'nm' to the updated ing list and rerun below 
# The above is a work around approach as don't currently have code which uses wildcard searching 

search_term = '"%Filgotinib%"'

missing_ing_sql = f'''
SELECT *
FROM dmd.ing
where ing.nm like {search_term}

ORDER BY nm  '''

single_ing_missing_tbl = bq.cached_read(missing_ing_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))
               

single_ing_missing_tbl

Unnamed: 0,id,isiddt,isidprev,invalid,nm
0,39002911000001106,NaT,,False,Filgotinib
1,39039711000001109,NaT,,False,Filgotinib maleate


In [47]:
#WORKING - look for id's of missing ing's found in the targetted wildcard search above in vpi table
#Result - some ingredients are present only with a prefix/suffix in the vpi table
#Action - add the full ing names not previously included to the revised code list below 

missing_ids = tuple(single_ing_missing_tbl.id)

vpi_sql = f'''
SELECT *
FROM dmd.vpi
where ing IN {missing_ids}

ORDER BY ing  '''

vpi_missing_tbl = bq.cached_read(vpi_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))
               

vpi_missing_tbl

Unnamed: 0,vmp,ing,basis_strnt,bs_subid,strnt_nmrtr_val,strnt_nmrtr_uom,strnt_dnmtr_val,strnt_dnmtr_uom
0,39046311000001104,39039711000001109,2,39002911000001106,200.0,258684004,,
1,39046211000001107,39039711000001109,2,39002911000001106,100.0,258684004,,


#### Add wildcards to missing ingredients and append any results to csv output

In [82]:
#Create a tuple containing all missing ingredients with wildcards (%) to pull back ingredients 
# with prefix and suffix words

append_suff = '%\''
append_pref = '\'%'

wc_missing_ings =  [append_pref + i + append_suff for i in missing_ings]


wc_missing_ing_tup = tuple(wc_missing_ings)

wc_missing_ing_tup

("'%Certolizumab%'", "'%Filgotinib%'", "'%Tofacitinib%'", "'%Upadacitinib%'")

In [85]:
# Create a variable for each of the missing ingredients making sure there are the matching number of 
# ingredients to test variables, ie/ 4 ingredients will require 0,1,2,3 to be called from the 

WC_ing_0 = str(wc_missing_ing_tup[0])
WC_ing_1 = str(wc_missing_ing_tup[1])
WC_ing_2 = str(wc_missing_ing_tup[2])
WC_ing_3 = str(wc_missing_ing_tup[3])


WC_missing_ing_sql = f'''
SELECT *
FROM dmd.ing
where ing.nm like {WC_ing_0}
    or ing.nm like {WC_ing_1}
    or ing.nm like {WC_ing_2}
    or ing.nm like {WC_ing_3}

ORDER BY nm  '''
WC_missing_ing_tbl = bq.cached_read(WC_missing_ing_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

WC_missing_ing_tbl

Unnamed: 0,id,isiddt,isidprev,invalid,nm
0,430306004,NaT,,False,Certolizumab pegol
1,39002911000001106,NaT,,False,Filgotinib
2,39039711000001109,NaT,,False,Filgotinib maleate
3,704313003,NaT,,False,Tofacitinib
4,704314009,NaT,,False,Tofacitinib citrate
5,37800211000001108,NaT,,False,Upadacitinib
6,37949711000001108,NaT,,False,Upadacitinib hemihydrate


#### Create final list including wildcard ingredient names for ingredients with no exact matches

In [None]:

sql_incl_wc_match = f'''
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid

UNION DISTINCT

SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing 
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid
WHERE ing.nm LIKE {WC_ing_0}
    OR ing.nm LIKE {WC_ing_1}
    OR ing.nm LIKE {WC_ing_2}
    OR ing.nm LIKE {WC_ing_3}

  '''

meds_incl_wc_match = bq.cached_read(sql_incl_wc_match
            , csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

meds_incl_wc_match            


### Results

In [92]:
# How many products have DDDs using exact match
print(meds[["ddd", "id"]].count())

# How many products have DDDs using wildcard matching
print(meds_incl_wc_match[["ddd", "id"]].count())

ddd    38
id     54
dtype: int64
ddd    44
id     65
dtype: int64


In [94]:
meds_incl_wc_match

Unnamed: 0,type,id,bnf_code,nm,ingredient,ddd
0,vmp,38030511000001109,,Upadacitinib hemihydrate 15mg tablets,Upadacitinib hemihydrate,
1,vmp,38067611000001103,1001030AEAAAAAA,Upadacitinib 15mg modified-release tablets,Upadacitinib hemihydrate,
2,vmp,40087811000001102,1001030AEAAABAB,Upadacitinib 30mg modified-release tablets,Upadacitinib hemihydrate,
3,vmp,39046311000001104,1001030AGAAABAB,Filgotinib 200mg tablets,Filgotinib maleate,
4,vmp,39046211000001107,1001030AGAAAAAA,Filgotinib 100mg tablets,Filgotinib maleate,
5,vmp,39705411000001108,1001030ABAAABAB,Tofacitinib 5mg tablets,Tofacitinib citrate,10.0
6,vmp,35734311000001100,1001030ABAAACAC,Tofacitinib 10mg tablets,Tofacitinib citrate,10.0
7,vmp,38162311000001103,1001030ABAAADAD,Tofacitinib 11mg modified-release tablets,Tofacitinib citrate,10.0
8,vmp,17315811000001100,1001030Y0AAAAAA,Certolizumab pegol 200mg/1ml solution for injection pre-filled syringes,Certolizumab pegol,14.0
9,vmp,33523711000001105,1001030Y0AAABAB,Certolizumab pegol 200mg/1ml solution for injection pre-filled disposable devices,Certolizumab pegol,14.0


In [93]:
meds

Unnamed: 0,type,id,bnf_code,nm,ingredient,ddd
0,vmp,29767011000001106,1001030V0AAACAC,Abatacept 125mg/1ml solution for injection pre-filled disposable devices,Abatacept,27.0
1,vmp,21704711000001107,1001030V0AAABAB,Abatacept 125mg/1ml solution for injection pre-filled syringes,Abatacept,27.0
2,vmp,11762011000001101,1001030V0AAAAAA,Abatacept 250mg powder for solution for infusion vials,Abatacept,27.0
3,vmp,37223011000001109,1001030V0AAAEAE,Abatacept 50mg/0.4ml solution for injection pre-filled syringes,Abatacept,27.0
4,vmp,37223111000001105,1001030V0AAADAD,Abatacept 87.5mg/0.7ml solution for injection pre-filled syringes,Abatacept,27.0
5,vmp,35318811000001108,1001030S0AAAFAF,Adalimumab 20mg/0.2ml solution for injection pre-filled syringes,Adalimumab,2.9
6,vmp,36441711000001106,1001030S0AAAIAI,Adalimumab 20mg/0.4ml solution for injection pre-filled syringes,Adalimumab,2.9
7,vmp,32888111000001102,1001030S0AAADAD,Adalimumab 40mg/0.4ml solution for injection pre-filled disposable devices,Adalimumab,2.9
8,vmp,32888211000001108,1001030S0AAAEAE,Adalimumab 40mg/0.4ml solution for injection pre-filled syringes,Adalimumab,2.9
9,vmp,11236911000001103,1001030S0AAABAB,Adalimumab 40mg/0.8ml solution for injection pre-filled disposable devices,Adalimumab,2.9
