### DMD Ingredient->Product Lookup

Get a list of all VMPs (Virtual Medicinal Products) and AMPs (Actual Medicinal Products) from a given list of ingredients (VTMs = Virtual Therapeutic Moieties)

In [3]:
# set name of codelist for exporting file
codelist_name="rheumatology_meds"


# import or paste list of vtms (Virtual Therapeutic Moieties / Ingredients) by name
names = ['Adalimumab', 'Etanercept', 'Certolizumab', 'Infliximab', 'Golimumab','Rituximab',
        'Tocilizumab','Sarilumab','Tofacitinib','Baricitinib','Upadacitinib','Filgotinib',
        'Abatacept','Ipilimumab','Nivolumab','Pembrolizumab']


from ebmdatalab import bq
import os
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)



## Select products from DMD containing chosen ingredients

In [4]:
# convert list to tuple for use in SQL query
names_tuple = tuple(names)
if len(names_tuple)==1:
    # remove comma if only one item
    names_tuple = str(names_tuple).replace(",","")

sql = f'''
SELECT "vmp" AS type, vmp.id, bnf_code, vmp.nm, ing.nm AS ingredient, ddd.ddd
FROM dmd.vmp
INNER JOIN dmd.vpi AS vpi ON vmp.id=vpi.vmp 
INNER JOIN dmd.ing as ing ON ing.id = vpi.ing AND ing.nm IN {names_tuple}
LEFT JOIN dmd.ddd on vmp.id=ddd.vpid

ORDER BY type, nm  '''

meds = bq.cached_read(sql, csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))

meds.count()

type          54
id            54
bnf_code      48
nm            54
ingredient    54
ddd           38
dtype: int64

#### Check if any ingredients were not found

In [5]:
ings = list(meds.ingredient.drop_duplicates())
names.sort()
ings.sort()
print("Selected ingredients:", names)
print("Ingredients found:", ings)

missing_ings = [i for i in names if i not in ings]
print("Ingredients NOT found:", missing_ings)

Selected ingredients: ['Abatacept', 'Adalimumab', 'Baricitinib', 'Certolizumab', 'Etanercept', 'Filgotinib', 'Golimumab', 'Infliximab', 'Ipilimumab', 'Nivolumab', 'Pembrolizumab', 'Rituximab', 'Sarilumab', 'Tocilizumab', 'Tofacitinib', 'Upadacitinib']
Ingredients found: ['Abatacept', 'Adalimumab', 'Baricitinib', 'Etanercept', 'Golimumab', 'Infliximab', 'Ipilimumab', 'Nivolumab', 'Pembrolizumab', 'Rituximab', 'Sarilumab', 'Tocilizumab']
Ingredients NOT found: ['Certolizumab', 'Filgotinib', 'Tofacitinib', 'Upadacitinib']


In [17]:
# look for ingredients not found above in underlying tables
missing_names_tuple = tuple(missing_ings)
if len(missing_names_tuple)==1:
    # remove comma if only one item
    missing_names_tuple = str(missing_names_tuple).replace(",","")

ing_sql = f'''
SELECT *
FROM dmd.ing
where ing.nm IN {missing_names_tuple}

ORDER BY nm  '''

ing_missing_tbl = bq.cached_read(ing_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))
               

ing_missing_tbl


Unnamed: 0,id,isiddt,isidprev,invalid,nm
0,39002911000001106,NaT,,False,Filgotinib
1,704313003,NaT,,False,Tofacitinib
2,37800211000001108,NaT,,False,Upadacitinib


In [32]:
#search for ingredient name with wildcard - NB CURRENTLY THIS DOESN'T WORK

search_term = tuple("filgotinib%")

ing_sql = f'''
SELECT *
FROM dmd.ing
where ing.nm like {search_term}

ORDER BY nm  '''

ing_missing_tbl = bq.cached_read(ing_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv'))
               

ing_missing_tbl

GenericGBQException: Reason: 400 No matching signature for operator LIKE for argument types: STRING, STRUCT<STRING, STRING, STRING, ...>. Supported signatures: STRING LIKE STRING; BYTES LIKE BYTES at [4:7]

(job ID: 2f5fa3e6-3581-4fd3-a110-67149ea87c31)

                     -----Query Job SQL Follows-----                     

    |    .    |    .    |    .    |    .    |    .    |    .    |
   1:
   2:SELECT *
   3:FROM dmd.ing
   4:where ing.nm like ('f', 'i', 'l', 'g', 'o', 't', 'i', 'n', 'i', 'b')
   5:
   6:ORDER BY nm  
    |    .    |    .    |    .    |    .    |    .    |    .    |

In [24]:
#Are missing ingredients present in vpi table (using ID?)
vpi_ing_sql = f'''
SELECT *
FROM dmd.vpi
where vpi.ing in (39002911000001106, 704313003, 37800211000001108) 
  '''

vpi_ing_missing_tbl = bq.cached_read(vpi_ing_sql, 
                csv_path=os.path.join('..','data',f'meds_{codelist_name}.csv')) 

vpi_ing_missing_tbl

Unnamed: 0,vmp,ing,basis_strnt,bs_subid,strnt_nmrtr_val,strnt_nmrtr_uom,strnt_dnmtr_val,strnt_dnmtr_uom


In [6]:
# check how many products have DDDs
print(meds[["ddd", "id"]].count())

ddd    38
id     54
dtype: int64


In [7]:
meds

Unnamed: 0,type,id,bnf_code,nm,ingredient,ddd
0,vmp,29767011000001106,1001030V0AAACAC,Abatacept 125mg/1ml solution for injection pre-filled disposable devices,Abatacept,27.0
1,vmp,21704711000001107,1001030V0AAABAB,Abatacept 125mg/1ml solution for injection pre-filled syringes,Abatacept,27.0
2,vmp,11762011000001101,1001030V0AAAAAA,Abatacept 250mg powder for solution for infusion vials,Abatacept,27.0
3,vmp,37223011000001109,1001030V0AAAEAE,Abatacept 50mg/0.4ml solution for injection pre-filled syringes,Abatacept,27.0
4,vmp,37223111000001105,1001030V0AAADAD,Abatacept 87.5mg/0.7ml solution for injection pre-filled syringes,Abatacept,27.0
5,vmp,35318811000001108,1001030S0AAAFAF,Adalimumab 20mg/0.2ml solution for injection pre-filled syringes,Adalimumab,2.9
6,vmp,36441711000001106,1001030S0AAAIAI,Adalimumab 20mg/0.4ml solution for injection pre-filled syringes,Adalimumab,2.9
7,vmp,32888111000001102,1001030S0AAADAD,Adalimumab 40mg/0.4ml solution for injection pre-filled disposable devices,Adalimumab,2.9
8,vmp,32888211000001108,1001030S0AAAEAE,Adalimumab 40mg/0.4ml solution for injection pre-filled syringes,Adalimumab,2.9
9,vmp,11236911000001103,1001030S0AAABAB,Adalimumab 40mg/0.8ml solution for injection pre-filled disposable devices,Adalimumab,2.9
