In [11]:
import pandas as pd

from IPython.core.display import display, HTML


month = '2016-09-01'

# How could we scalp the NHS?

Anything not listed in the Drug Tariff ("NP8" - Not Part 8) can be supplied by a dispensing contractor at whatever price they find. They can then pass their invoice on to the NHS.

Some pharma companies specialise in selling pills at rip-off prices (e.g. £90 for a packet of cod liver oil capsules, as exposed in [this Telegraph article from 2013](http://www.telegraph.co.uk/news/health/news/10181169/Pricing-scandal-sees-NHS-pay-89-for-accessible-cod-liver-oil-capsules.html)) to the contractor, who then passes on the cost to the NHS. Separately, the pharma company has a contract with the pharmacy to pay them the equivalent of (say) £45 per packet "commission". As one happy pharmacist [said in an online forum](http://www.pharmacy-forum.co.uk/showthread.php?t=10802):

> There's also NP8 (non part VIII drugs)..these are drugs not listed so the price paid is whatever it costs you to buy. Now some wholesalers will say X amount for an NP8 drug then give you 50/60/70 or even 80% off in a legally round about way- thats a ton of margin right there because you claim say £100 when actually you've only paid £20. Think Tramadol M/R formulations.

Therefore, if I was a Black Hat Pharma company, I would look for drugs which:

* Are not in the Tariff;
* Are generically prescribed;
* Are prescribed in relatively high quantities;
* Are not already subject to fleecing by my competitors

We can use a coefficient of deviation to find things which do (or don't) already have high variance.


In [5]:
sql = """

WITH np8_drugs AS (SELECT
  p.bnf_code,
  p.bnf_name,
  sum(quantity) as quantity,
  sum(actual_cost) as actual_cost,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation
FROM
  ebmdatalab.hscic.prescribing AS p
LEFT JOIN
  ebmdatalab.hscic.tariff t
ON
  t.bnf_code = p.bnf_code
  AND TIMESTAMP(t.date) = p.month
WHERE
  p.month = TIMESTAMP('"""+month+"""') 
AND 
  category IS NULL -- not in the tariff
AND p.bnf_code LIKE '_________AA%' -- generically prescribed
AND p.bnf_code NOT LIKE '19%' -- specials
GROUP BY p.bnf_code, p.bnf_name)

SELECT * 
FROM 
  np8_drugs 
WHERE actual_cost > 2000 -- relatively high quantities
ORDER BY
  coefficient_of_deviation ASC -- things with least variability first (no-one else is on it yet)

"""
df = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')



In [12]:
#df.head(10)
display(HTML(df.to_html(formatters={'actual_cost': '£{:,.2f}'.format}, max_rows=10)))


Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation
0,1308010Z0AAAAAA,Ingenol Mebutate_Gel 150mcg/g,905,"£54,508.88",
1,1308010Z0AAABAB,Ingenol Mebutate_Gel 500mcg/g,6,"£8,371.70",
2,0109010G0AAABAB,Chenodeoxycholic Acid_Cap 250mg,60,"£2,590.48",0.000000
3,0602010V0AADFDF,Levothyrox Sod_Liq Spec 300mcg/5ml,1200,"£2,254.72",0.000000
4,0908010E0AAADAD,Mercaptamine_Cap G/R 75mg,250,"£3,890.80",0.000000
...,...,...,...,...,...
1044,0901020D0AAAFAF,Cyanocobalamin_Tab 1mg,7893,"£8,398.90",2.537117
1045,0906040G0AAEAEA,Colecal_Cap 5 000u,14454,"£5,504.63",2.869920
1046,091200000AAEEEE,Glucosamine + Chond_Cap 400mg/100mg,12180,"£3,142.78",2.969154
1047,0906040N0AAEWEW,Ergocalciferol_Tab 12.5mcg,16016,"£3,731.40",3.177512


It might be easier for us to scalp where the price per dose is currently relatively low. Let's only look at presentations where a does is less than £1, then put the ones with the greatest monthly cost to the NHS at the top.

In [13]:
df.query('(actual_cost / quantity < 1.0) & coefficient_of_deviation < 0.01').sort_values('actual_cost', ascending=False).head()

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation
268,0601023AFAAABAB,Linagliptin/Metformin_Tab 2.5mg/1g,178499,98314.04,0.003241
156,0408010D0AAAEAE,Oxcarbazepine_Oral Susp 300mg/5ml S/F,222852,40443.25,0.000571
82,1305020D0AAAIAI,Calcipotriol/Betameth_Foam 50mcg/0.5mg/g,60720,37210.98,6.4e-05
245,0802010M0AAAJAJ,Mycophenolate Mofetil_Oral Susp 1g/5mlSF,52150,31800.55,0.002465
75,0408010AFAAADAD,Rufinamide_Oral Susp 200mg/5ml S/F,122360,23342.0,5.8e-05


# Where is scalping most likely to be happening now?

We can reverse the logic to find the presentations most likely to be subject to scalping right now:

In [14]:
sql = """

WITH np8_drugs AS (SELECT
  p.bnf_code,
  p.bnf_name,
  sum(quantity) as quantity,
  sum(actual_cost) as actual_cost,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation
FROM
  ebmdatalab.hscic.prescribing AS p
LEFT JOIN
  ebmdatalab.hscic.tariff t
ON
  t.bnf_code = p.bnf_code
  AND TIMESTAMP(t.date) = p.month

WHERE
  p.month = TIMESTAMP('"""+month+"""') 
AND 
  category IS NULL -- not in the tariff
AND p.bnf_code LIKE '_________AA%' -- generically prescribed
AND p.bnf_code NOT LIKE '19%' -- specials
GROUP BY p.bnf_code, p.bnf_name),

top_drugs AS (
SELECT np8_drugs.*, bnf.supplier, bnf.tariff_category, bnf.pack_size_count, bnf.bioequivalence, bnf.route
FROM 
  np8_drugs 
LEFT JOIN ebmdatalab.hscic.super_bnf AS bnf
ON bnf.bnf_code = np8_drugs.bnf_code

WHERE actual_cost > 2000 -- relatively high quantities
ORDER BY
  coefficient_of_deviation DESC -- things with most variability first
LIMIT 100
)

select * 
FROM top_drugs 
"""
df2 = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')

In [15]:
df2[df2.bnf_code.str.startswith('03')].head()

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,tariff_category,pack_size_count,bioequivalence,route
77,0304010N0AAAGAG,Diphenhydramine HCl_Tab 25mg,41857,6097.43,1.674817,Galpharm International Ltd,,45,True,tablet.oral


In [17]:
sql = """

WITH np8_drugs AS (SELECT
  p.bnf_code,
  p.bnf_name,
  sum(quantity) as quantity,
  sum(actual_cost) as actual_cost,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation
FROM
  ebmdatalab.hscic.prescribing AS p
LEFT JOIN
  ebmdatalab.hscic.tariff t
ON
  t.bnf_code = p.bnf_code
  AND TIMESTAMP(t.date) = p.month

WHERE
  p.month = TIMESTAMP('"""+month+"""') 
AND 
  category IS NULL -- not in the tariff
AND p.bnf_code LIKE '_________AA%' -- generically prescribed
AND p.bnf_code NOT LIKE '19%' -- specials
GROUP BY p.bnf_code, p.bnf_name),

top_drugs AS (
SELECT np8_drugs.*, bnf.supplier, bnf.tariff_category, bnf.pack_size_count, bnf.bioequivalence, bnf.route
FROM 
  np8_drugs 
LEFT JOIN ebmdatalab.hscic.super_bnf AS bnf
ON bnf.bnf_code = np8_drugs.bnf_code

WHERE actual_cost > 2000 -- relatively high quantities
ORDER BY
  coefficient_of_deviation DESC -- things with most variability first
LIMIT 100
)

select supplier, count(*) as count, sum(actual_cost) as total_cost, avg(pack_size_count) as avg_pack_size
FROM top_drugs 
GROUP BY supplier
ORDER BY total_cost DESC
"""
df3 = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')

In [18]:
df3.head(20)

Unnamed: 0,supplier,count,total_cost,avg_pack_size
0,Pfizer Ltd,4,804407.92,4.5
1,A A H Pharmaceuticals Ltd,4,712599.56,61.25
2,Solgar Vitamin and Herb,5,370748.38,112.8
3,Special Order,33,264505.46,10.878788
4,Kyowa Kirin Ltd,1,118205.8,10.0
5,Chugai Pharma UK Ltd,2,107039.22,18.5
6,Intrapharm Laboratories Ltd,1,81556.52,68.0
7,Arjun Products Ltd,1,79850.97,15.0
8,HealthAid Ltd,5,64860.88,96.4
9,Lamberts Healthcare Ltd,2,51614.85,55.0


In [19]:
df2.query('bnf_code =="0302000C0AABEBE"')
df2.query('(actual_cost / quantity < 1.0) & coefficient_of_deviation > 0.1').sort_values('actual_cost', ascending=False).head()

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,tariff_category,pack_size_count,bioequivalence,route
31,0906040G0AABHBH,Colecal_Cap 1 000u,497926,257550.94,1.590396,Solgar Vitamin and Herb,3.0,418,True,capsule.oral
59,0604020K0AABMBM,Testosterone_Gel 2% (10mg per actuation),225000,118205.8,1.008506,Kyowa Kirin Ltd,3.0,10,True,gel.cutaneous
16,0906040G0AABIBI,Colecal_Cap 400u,255240,95559.03,1.990893,Solgar Vitamin and Herb,,70,True,capsule.oral
86,0206010K0AAAGAG,Isosorbide Mononit_Tab 40mg M/R,157165,81556.52,0.934972,Intrapharm Laboratories Ltd,3.0,68,True,tabletmodified-release.oral
57,0905013G0AAAYAY,Mag Glycerophos_Tab Chble 97.2mg S/F,93326,79850.97,2.210185,Arjun Products Ltd,,15,True,tablet.oral


In [20]:
df2.query('supplier == "A A H Pharmaceuticals Ltd"')

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,tariff_category,pack_size_count,bioequivalence,route
4,1501041T0AAABAB,Midazolam HCl_Inj 2mg/ml 5ml Amp,2395,2373.43,1.175262,A A H Pharmaceuticals Ltd,,68,True,solutionenema.rectal
58,0407010Q0AAAAAA,Co-Proxamol_Tab 32.5mg/325mg,659538,702718.55,1.575012,A A H Pharmaceuticals Ltd,,105,True,tablet.oral
84,1501041T0AAASAS,Midazolam HCl_Inj 1mg/ml 5ml Amp,6953,4178.51,1.318242,A A H Pharmaceuticals Ltd,,56,True,solutioninjection.intramuscular
99,1108010K0AAAAAA,Sod Chlor_Eye Dps 0.9%,1060,3329.07,1.58754,A A H Pharmaceuticals Ltd,,16,True,solutiondrops.ophthalmic


In [21]:
df2.query('supplier == "Solgar Vitamin and Herb"')

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,tariff_category,pack_size_count,bioequivalence,route
16,0906040G0AABIBI,Colecal_Cap 400u,255240,95559.03,1.990893,Solgar Vitamin and Herb,,70,True,capsule.oral
31,0906040G0AABHBH,Colecal_Cap 1 000u,497926,257550.94,1.590396,Solgar Vitamin and Herb,3.0,418,True,capsule.oral
39,0906040G0AADWDW,Colecal_Cap 2 200u,9154,4366.25,1.207822,Solgar Vitamin and Herb,,32,True,capsule.oral
67,0906040G0AABFBF,Colecal_Cap 600u,9829,7327.12,1.825936,Solgar Vitamin and Herb,,28,True,capsule.oral
85,0905013F0AAADAD,Mag Cit_Tab 200mg,4983,5945.04,1.17416,Solgar Vitamin and Herb,,16,True,tablet.oral


In [25]:
df2.query('supplier == "Kyowa Kirin Ltd"')

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,tariff_category,pack_size_count,bioequivalence,route
59,0604020K0AABMBM,Testosterone_Gel 2% (10mg per actuation),225000,118205.8,1.008506,Kyowa Kirin Ltd,3.0,10,True,gel.cutaneous


# What about interesting outliers?

So far, we've only focussed on presentations over a certain threshold of prescribing. Let's create a data frame with the top 1000 presentations by 

In [23]:
sql = """
SELECT
  bnf_name,
  bnf_code,
  COUNT(*) AS count,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation,    
  MAX(IEEE_DIVIDE(actual_cost, quantity)) - MIN(IEEE_DIVIDE(actual_cost, quantity)) AS delta
FROM
  ebmdatalab.hscic.prescribing AS p
  WHERE
    month = '2016-09-01'
    AND bnf_code NOT LIKE '19%'
    AND NOT IS_INF(IEEE_DIVIDE(actual_cost, quantity))
GROUP BY
  bnf_name,
  bnf_code
ORDER BY
  delta DESC
LIMIT
  1000"""
df4 = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')


In [27]:
display(HTML(df4.to_html(formatters={'delta': '£{:,.2f}'.format}, max_rows=10)))


Unnamed: 0,bnf_name,bnf_code,count,coefficient_of_deviation,delta
0,Ins Humulin R_500u/ml 20ml Vl,0601011R0BBABAB,30,0.546606,"£1,614.21"
1,Silver Nit_Stick 40%,1307000Q0AAABAB,445,9.631044,£455.29
2,Estradiol_Implant 50mg,0604011G0AAABAB,5,1.139499,£327.49
3,Praziquantel_Tab 500mg,0505050P0AAABAB,5,1.512055,£286.16
4,Glycopyrronium Brom_Crm 1%,1312000G0AAAMAM,6,2.068358,£283.81
...,...,...,...,...,...
995,Halyard_Corstop a.c.e stopper,23701157000,169,0.014780,£2.50
996,Testosterone_T/Derm Patch 300mcg/24hrs,0604020K0AABLBL,26,0.015680,£2.50
997,Aranesp SureClick_Inj 20mcg/0.5ml Pf Pen,0901030J0BBANAN,5,0.036030,£2.50
998,ReplensMD Vag Moist 12D Pack,21230000102,679,0.021373,£2.48
