In [1]:
import pandas as pd
import sqlite3
con = sqlite3.connect("dmd.db")


## Restricted availability drugs

"None" means the availablity is not restricted

In [55]:
sql = """
SELECT DESC from dmd_product
INNER JOIN

LOOKUP_AVAILABILITY_RESTRICTION
ON dmd_product.AVAIL_RESTRICTCD = CD
"""
df = pd.read_sql_query(sql, con)
df.groupby('DESC')['DESC'].count().sort_values()

# "Not available" means there are no AMPs for the specified VMP


DESC
Extemp                       1    
Restricted Availability      32   
Individual Patient Supply    71   
Hospital Only                884  
Imported                     937  
Special                      12730
Not available                21067
None                         98416
Name: DESC, dtype: int64

## All drugs with special containers

In [13]:
# http://www.nhsbsa.nhs.uk/PrescriptionServices/Documents/PrescriptionServices/dmd_Implemention_Guide_(Primary_Care)_v1.0.pdf
sql = """
SELECT * from dmd_product
INNER JOIN AMP
  ON AMP.APID = DMDID
  OR AMP.VPID = DMDID
INNER JOIN AMPP
  ON AMPP.APID = AMP.APID
INNER JOIN REIMB_INFO
  ON AMPP.APPID = REIMB_INFO.APPID
  AND SPEC_CONTCD = 1
"""
special_containers = pd.read_sql_query(sql, con)


In [15]:
len(special_containers)

90277

In [12]:
special_containers.head()

Unnamed: 0,DMDID,BNF_CODE,VPID,DISPLAY_NAME,EMA,PRES_STATCD,AVAIL_RESTRICTCD,product_type,NON_AVAILCO,concept_class,...,DISCDT,APPID,PX_CHRGS,DISP_FEES,BB,LTD_STAB,CAL_PACK,SPEC_CONTCD,DND,FP34D
0,330726003,1106000P0AAAAAA,330726003,Dorzolamide 2% eye drops,0.0,1,1,1,,1,...,,1384711000001109,1,1,,,,1,,
1,434711000001109,1106000P0BBAAAA,330726003,Trusopt 2% eye drops (Santen UK Ltd),,1,1,2,0.0,2,...,,1384711000001109,1,1,,,,1,,
2,331126001,1202020L0AAAAAA,331126001,Sodium chloride 0.9% nasal drops,0.0,1,1,1,,1,...,,9433511000001106,1,1,,,,1,,
3,9433411000001107,21240000110,331126001,Sodium chloride 0.9% nasal drops (RX Farma),,1,1,3,0.0,2,...,,9433511000001106,1,1,,,,1,,
4,323733007,0501013B0AAAKAK,323733007,Amoxicillin 250mg/5ml oral suspension,0.0,1,1,1,,1,...,2007-05-08,1511211000001100,1,1,,,,1,,


## Drugs by particular routes

The first example just shows how to join the "route" table to the main table

In [64]:
sql = """
SELECT BNF_CODE, DISPLAY_NAME, DESC from dmd_product
INNER JOIN ONT
ON ONT.VPID = dmd_product.VPID
INNER JOIN LOOKUP_ONT_FORM_ROUTE
ON LOOKUP_ONT_FORM_ROUTE.CD = ONT.FORMCD
"""
df = pd.read_sql_query(sql, con)
df.head()

Unnamed: 0,BNF_CODE,DISPLAY_NAME,DESC
0,1311060I0AAABAB,Hydrogen peroxide 3% solution,solution.cutaneous
1,1311060I0AAABAB,Hydrogen peroxide 3% solution,mouthwash.oromucosal
2,1311060I0AAABAB,Hydrogen peroxide 3% solution,gargle.oromucosal
3,0308000B0AAAAAA,Benzoin compound tincture,solution.cutaneous
4,0308000B0AAAAAA,Benzoin compound tincture,vapourinhalation.inhalation


Here we only show thigs with a "form route" starting with `grocery`

In [79]:
sql = """
SELECT BNF_CODE, DISPLAY_NAME, DESC from dmd_product
INNER JOIN ONT
ON ONT.VPID = dmd_product.VPID
INNER JOIN LOOKUP_ONT_FORM_ROUTE
ON LOOKUP_ONT_FORM_ROUTE.CD = ONT.FORMCD
WHERE DESC LIKE 'grocery%'
"""
df = pd.read_sql_query(sql, con)
df.head()

Unnamed: 0,BNF_CODE,DISPLAY_NAME,DESC
0,,Barkat gluten free cream filled wafers (Flavour Not Specified),grocerysolid.oral
1,,Mrs Crimble's gluten free cheese crackers (Flavour Not Specified),grocerysolid.oral
2,,Mrs Crimble's gluten free rice cakes (Flavour Not Specified),grocerysolid.oral
3,,Orgran gluten free biscotti biscuits (Flavour Not Specified),grocerysolid.oral
4,,Orgran gluten free mini outback animals cookies (Flavour Not Specified),grocerysolid.oral


## Drugs with more than one pack size

First, we join only to VMPP (not via AMPs) which restricts us to generic equivalents.

There are 5,971 VMPs with more than one pack size (that's 32% of all VMPS).

In [215]:
sql = """
SELECT 
  DMDID,
  BNF_CODE,
  DISPLAY_NAME, 
  COUNT(VMPP.QTYVAL) AS num_packsizes, 
  DESC AS unit_of_measure
FROM dmd_product
INNER JOIN VMPP
  ON VMPP.VPID = DMDID
INNER JOIN LOOKUP_UNIT_OF_MEASURE
  ON LOOKUP_UNIT_OF_MEASURE.CD = VMPP.QTY_UOMCD
GROUP BY DISPLAY_NAME
HAVING count(VMPP.QTYVAL) = 1
order by count(VMPP.QTYVAL) DESC
"""
df2 = pd.read_sql_query(sql, con)
pd.set_option('display.max_colwidth', -1)
df2.head(10)


Unnamed: 0,DMDID,BNF_CODE,DISPLAY_NAME,num_packsizes,unit_of_measure
0,26245611000001101,,5-aminolaevulinic acid 78mg/g gel,1,gram
1,324881002,0503010E0AAABAB,Abacavir 20mg/ml oral solution sugar free,1,ml
2,324880001,0503010E0AAAAAA,Abacavir 300mg tablets,1,tablet
3,413382007,0503010Z0AAAAAA,Abacavir 600mg / Lamivudine 300mg tablets,1,tablet
4,29767011000001106,1001030V0AAACAC,Abatacept 125mg/1ml solution for injection pre-filled disposable devices,1,pre-filled disposable injection
5,21704711000001107,1001030V0AAABAB,Abatacept 125mg/1ml solution for injection pre-filled syringes,1,pre-filled disposable injection
6,11762011000001101,1001030V0AAAAAA,Abatacept 250mg powder for solution for infusion vials,1,vial
7,319794009,0209000W0AAAAAA,Abciximab 10mg/5ml solution for injection vials,1,vial
8,19524511000001101,0803042T0AAAAAA,Abiraterone 250mg tablets,1,tablet
9,7276711000001101,,Absorbable catgut chromic suture 2gauge 75cm length with 16mm curved cutting needle,1,suture


Drill down into gluten free bread:

In [190]:
sql = """SELECT 
  DMDID,
  BNF_CODE,
  DISPLAY_NAME, 
  VMPP.QTYVAL, 
  DESC AS unit_of_measure
FROM dmd_product
INNER JOIN VMPP
  ON VMPP.VPID = DMDID
INNER JOIN LOOKUP_UNIT_OF_MEASURE
  ON LOOKUP_UNIT_OF_MEASURE.CD = VMPP.QTY_UOMCD
WHERE DMDID = '328664004'
"""

df2 = pd.read_sql_query(sql, con)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 40)
df2.head(5)

Unnamed: 0,DMDID,BNF_CODE,DISPLAY_NAME,QTYVAL,unit_of_measure
0,328664004,0904010H0AAAAAA,Gluten free bread,450.0,gram
1,328664004,0904010H0AAAAAA,Gluten free bread,150.0,gram
2,328664004,0904010H0AAAAAA,Gluten free bread,250.0,gram
3,328664004,0904010H0AAAAAA,Gluten free bread,456.0,gram
4,328664004,0904010H0AAAAAA,Gluten free bread,612.0,gram


Pack size is not the only variation. There are also things like colour, flavour, etc. 

If we join on AMP tables, we get every pack size combined with all other variations:

In [181]:
# This counts all variants/flavours of all sizes of actual packs

sql = """SELECT count(QTYVAL) as num_pack_sizes, BNF_CODE, DMDID, DISPLAY_NAME from dmd_product
INNER JOIN AMP
  ON AMP.APID = DMDID
  OR AMP.VPID = DMDID
INNER JOIN AMPP
  ON AMPP.APID = AMP.APID
INNER JOIN VMPP
ON AMPP.VPPID = VMPP.VPPID 
GROUP BY DMDID ORDER BY num_pack_sizes DESC
"""
df2 = pd.read_sql_query(sql, con)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 40)
df2.head(5)


Unnamed: 0,num_pack_sizes,BNF_CODE,DMDID,DISPLAY_NAME
0,5656,,33501811000001106,Lymphoedema garments below knee closed toe
1,4467,,33502211000001103,Lymphoedema garments below knee open toe
2,4282,,33508511000001103,Lymphoedema garments tights/panty closed toe
3,4108,,33507811000001104,Lymphoedema garments thigh length closed toe with silicone top/band/border
4,4092,,33508611000001104,Lymphoedema garments tights/panty open toe


If we drill down into these lympoedema socks, we can see all the variants are brand vs. size vs. colour:

In [203]:
sql = """
SELECT AMP.DESC, QTYVAL as num_pack_sizes, LOOKUP_UNIT_OF_MEASURE.DESC AS unit_of_measure,BNF_CODE, DMDID from dmd_product
INNER JOIN AMP
  ON AMP.APID = DMDID
  OR AMP.VPID = DMDID
INNER JOIN AMPP
  ON AMPP.APID = AMP.APID
INNER JOIN VMPP
  ON AMPP.VPPID = VMPP.VPPID 
INNER JOIN LOOKUP_UNIT_OF_MEASURE
  ON LOOKUP_UNIT_OF_MEASURE.CD = VMPP.QTY_UOMCD
WHERE DMDID = '33501811000001106'"""

pd.read_sql_query(sql, con)


Unnamed: 0,DESC,num_pack_sizes,unit_of_measure,BNF_CODE,DMDID
0,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment standard small Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
1,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment standard medium Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
2,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment standard large Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
3,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment standard extra large Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
4,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment petite small Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
5,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment petite medium Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
6,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment petite large Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
7,ActiLymph class 1 (18-21mmHg) below knee closed toe lymphoedema garment petite extra large Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
8,ActiLymph class 2 (23-32mmHg) below knee closed toe lymphoedema garment standard small Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106
9,ActiLymph class 2 (23-32mmHg) below knee closed toe lymphoedema garment standard medium Sand (Activa Healthcare Ltd),2.0,device,,33501811000001106


# Generics where brands are not necessarily bioequivalent

These are brands which should not be prescribed generically.

In [214]:

sql = """
SELECT DMDID, BNF_CODE, DISPLAY_NAME FROM dmd_product
INNER JOIN VMPP
  ON VMPP.VPID = DMDID
INNER JOIN VMP
  ON VMPP.VPID = VMP.VPID
WHERE VMP.PRES_STATCD = 6"""
df = pd.read_sql_query(sql, con)
df.head(20)


Unnamed: 0,DMDID,BNF_CODE,DISPLAY_NAME
0,322864007,0408010C0AAACAC,Carbamazepine 200mg tablets
1,322865008,0408010C0AAADAD,Carbamazepine 400mg tablets
2,321730000,0402030K0AAACAC,Lithium carbonate 250mg tablets
3,319184008,0206020C0AAAVAV,Diltiazem 180mg modified-release capsules
4,327097004,0802020T0AAAAAA,Tacrolimus 5mg capsules
5,322865008,0408010C0AAADAD,Carbamazepine 400mg tablets
6,322943009,0408010Q0AAADAD,Phenytoin sodium 25mg capsules
7,322944003,0408010Q0AAAPAP,Phenytoin sodium 50mg capsules
8,319187001,0206020C0AAAXAX,Diltiazem 200mg modified-release capsules
9,319277008,0206020R0AAARAR,Nifedipine 20mg modified-release tablets


In [213]:
df[df.BNF_CODE.str.contains("0302000C0")]

Unnamed: 0,DMDID,BNF_CODE,DISPLAY_NAME
56,408063002,0302000C0AABFBF,Beclometasone 100micrograms/dose inhaler CFC free
57,408061000,0302000C0AABEBE,Beclometasone 50micrograms/dose inhaler CFC free
104,10621011000001101,0302000C0AABVBV,Beclometasone 200micrograms/dose inhaler CFC free
105,10621111000001100,0302000C0AABWBW,Beclometasone 250micrograms/dose inhaler CFC free


On this account, the following is OK to include:

```
Qvar 100_Autohaler 100mcg (200 D),0302000C0BJADBH
Qvar 100 E-Breathe_Inha 100mcg (200 D),0302000C0BJAFBH
Qvar 50_Autohaler 50mcg (200 D),0302000C0BJACBG
Qvar 50 E-Breathe_Inha 50mcg (200 D),0302000C0BJAEBG

```

But these are not:

```
Qvar 100_Inha 100mcg (200 D),0302000C0BJABBF
Qvar 50_Inha 50mcg (200 D),0302000C0BJAABE


```

In [None]:
# Make a new BNF table which can be used in PPP calculations, which includes (for generics)
# bnf_code, dmdid, bioequivalence, min_pack_size, count_pack_size, special_containers, supplier

# LOOKUP_DT_PAYMENT_CATEGORY - DT category
sql = """
WITH foo AS (
SELECT
  BNF_CODE AS bnf_code, 
  DMDID AS dmd_id, 
  DISPLAY_NAME AS name,
  product_type AS product_type, -- generic, brand, branded generic
  VMP.PRES_STATCD != 6 AS bioequivalence,
  MIN(VMPP.QTYVAL) AS min_pack_size,
  COUNT(VMPP.QTYVAL) AS pack_size_count,
  LOOKUP_ONT_FORM_ROUTE.DESC as route,
  LOOKUP_UNIT_OF_MEASURE.DESC AS dose_unit,
  LOOKUP_DT_PAYMENT_CATEGORY.DESC AS tariff_category

FROM dmd_product
INNER JOIN VMPP
  ON VMPP.VPID = DMDID
INNER JOIN VMP
  ON VMPP.VPID = VMP.VPID
INNER JOIN ONT
  ON ONT.VPID = VMP.VPID
INNER JOIN LOOKUP_ONT_FORM_ROUTE
  ON LOOKUP_ONT_FORM_ROUTE.CD = ONT.FORMCD
INNER JOIN AMP
  ON AMP.APID = DMDID
  OR AMP.VPID = DMDID
INNER JOIN LOOKUP_UNIT_OF_MEASURE
  ON LOOKUP_UNIT_OF_MEASURE.CD = VMPP.QTY_UOMCD
INNER JOIN LOOKUP_DT_PAYMENT_CATEGORY
  ON LOOKUP_DT_PAYMENT_CATEGORY.CD = dmd_product.tariff_category
WHERE bnf_code IS NOT NULL
GROUP BY bnf_code)

select * from foo 

""" 
df = pd.read_sql_query(sql, con)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 40)
df.to_csv("bnf_extended.csv")


In [9]:

df[df.bnf_code == '1202020L0AAAAAA']

Unnamed: 0,bnf_code,dmd_id,name,bioequivalence,min_pack_size,pack_size_count,special_container,supplier,dose_unit


In [10]:
df[df.special_container != 1]

Unnamed: 0,bnf_code,dmd_id,name,bioequivalence,min_pack_size,pack_size_count,special_container,supplier,dose_unit
0,,9446311000001108,Hydrogel sheet without adhesive border 5cm x 6...,1,1.0,200807,,Activa Healthcare Ltd,dressing
30,20030100020,334277004,Multiple pack dressing No 1,1,1.0,4,,Synergy Health Plc,pack
34,20030100150,3137111000001109,Sterile dressing pack specification 10,1,1.0,44,,CliniSupplies Ltd,pack
35,20030100165,3137211000001103,Sterile dressing pack with non-woven pads spec...,1,1.0,12,,CliniSupplies Ltd,pack
38,20040200420,3332811000001108,Chlorhexidine gauze dressing BP 10cm x 10cm,1,1.0,4,,Smith & Nephew Healthcare Ltd,dressing
39,20060000300,335285000,Latex foam adhesive 22.5cm x 45cm x 7mm,1,4.0,1,,Smith & Nephew Healthcare Ltd,device
41,20080000110,3431711000001100,Belladonna adhesive plaster BP 1980 28cm x 17.5cm,1,1.0,4,,Cuxson Gerrard & Co (Dressings) Ltd,plaster
42,20120000100,3463011000001108,Gauze swab type 13 light BP 1988 8ply non-ster...,1,100.0,8,,BSN medical Ltd,swab
44,20120000120,3457011000001108,Filmated gauze swab BP 1988 8ply non-sterile 1...,1,100.0,1,,Synergy Health Plc,swab
45,20120000205,3506411000001101,Non-woven fabric swab 4ply non-sterile 10cm x ...,1,50.0,20,,Synergy Health Plc,swab


In [283]:
sql = "select * from dmd_product where bnf_code like '0208020W0BBABA0'"
df2 = pd.read_sql_query(sql, con)


In [285]:
df2[df2.BNF_CODE == '0208020W0BBABA0']

Unnamed: 0,DMDID,BNF_CODE,VPID,DISPLAY_NAME,EMA,PRES_STATCD,AVAIL_RESTRICTCD,product_type,NON_AVAILCO,concept_class,NURSE_F,DENT_F,PROD_ORDER_NO,SCHED_1,SCHED_2,PADM,FP10_MDA,ACBS,assort_flav,CATCD,tariff_category
0,10186011000001104,0208020W0BBABA0,3475111000001102,CoaguChek XS PT testing strips (Roche Diagnostics Ltd),,1,1,2,0,2,1,0,,0,0,0,0,0,0,0,8


In [286]:
df[df.bnf_code == '0208020W0BBABA0']

Unnamed: 0,bnf_code,dmd_id,name,bioequivalence,min_pack_size,pack_size_count,special_container,supplier


In [5]:
sql = """
SELECT
  *
FROM dmd_product
INNER JOIN VMPP
  ON VMPP.VPID = DMDID
INNER JOIN VMP
  ON VMPP.VPID = VMP.VPID
INNER JOIN AMP
  ON AMP.APID = DMDID
  OR AMP.VPID = DMDID
INNER JOIN AMPP
  ON AMPP.APID = AMP.APID
INNER JOIN REIMB_INFO
  ON AMPP.APPID = REIMB_INFO.APPID
INNER JOIN LOOKUP_SUPPLIER
  ON LOOKUP_SUPPLIER.CD = AMP.SUPPCD
WHERE BNF_CODE = '0208020W0BBABA0'
"""
pd.read_sql_query(sql, con)

Unnamed: 0,DMDID,BNF_CODE,VPID,DISPLAY_NAME,EMA,PRES_STATCD,AVAIL_RESTRICTCD,product_type,NON_AVAILCO,concept_class,...,CATCD,tariff_category,VPPID,INVALID,NM,ABBREVNM,VPID.1,QTYVAL,QTY_UOMCD,COMBPACKCD


In [290]:
df3[df3.BNF_CODE == '0208020W0BBABA0']

Unnamed: 0,DMDID,BNF_CODE,VPID,DISPLAY_NAME,EMA,PRES_STATCD,AVAIL_RESTRICTCD,product_type,NON_AVAILCO,concept_class,NURSE_F,DENT_F,PROD_ORDER_NO,SCHED_1,SCHED_2,PADM,FP10_MDA,ACBS,assort_flav,CATCD,...,APID,COMBPACKCD,LEGAL_CATCD,SUBP,DISCCD,DISCDT,APPID,PX_CHRGS,DISP_FEES,BB,LTD_STAB,CAL_PACK,SPEC_CONTCD,DND,FP34D,CD,CDDT,CDPREV,INVALID,DESC
