In [1]:
import pandas as pd
pd.options.display.float_format = '£{:,.2f}'.format


month = '2016-09-01'

# How could we scalp the NHS?

Anything not listed in the Drug Tariff ("NP8" - Not Part 8) can be supplied by a dispensing contractor at whatever price they find. They can then pass their invoice on to the NHS.

Some pharma companies specialise in selling pills at rip-off prices (e.g. £90 for a packet of cod liver oil capsules, as exposed in [this Telegraph article from 2013](http://www.telegraph.co.uk/news/health/news/10181169/Pricing-scandal-sees-NHS-pay-89-for-accessible-cod-liver-oil-capsules.html)) to the contractor, who then passes on the cost to the NHS. Separately, the pharma company has a contract with the pharmacy to pay them the equivalent of (say) £45 per packet "commission". As one happy pharmacist [said in an online forum](http://www.pharmacy-forum.co.uk/showthread.php?t=10802):

> There's also NP8 (non part VIII drugs)..these are drugs not listed so the price paid is whatever it costs you to buy. Now some wholesalers will say X amount for an NP8 drug then give you 50/60/70 or even 80% off in a legally round about way- thats a ton of margin right there because you claim say £100 when actually you've only paid £20. Think Tramadol M/R formulations.

Therefore, if I was a Black Hat Pharma company, I would look for drugs which:

* Are not in the Tariff;
* Are generically prescribed;
* Are prescribed in relatively high quantities;
* Are not already subject to fleecing by my competitors

Let's do it!


In [45]:
sql = """

WITH np8_drugs AS (SELECT
  p.bnf_code,
  p.bnf_name,
  sum(quantity) as quantity,
  sum(actual_cost) as actual_cost,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation
FROM
  ebmdatalab.hscic.prescribing AS p
LEFT JOIN
  ebmdatalab.hscic.tariff t
ON
  t.bnf_code = p.bnf_code
  AND TIMESTAMP(t.date) = p.month
WHERE
  p.month = TIMESTAMP('"""+month+"""') 
AND 
  category IS NULL -- not in the tariff
AND p.bnf_code LIKE '_________AA%' -- generically prescribed
AND p.bnf_code NOT LIKE '19%' -- specials
GROUP BY p.bnf_code, p.bnf_name)

SELECT * 
FROM 
  np8_drugs 
WHERE actual_cost > 2000 -- relatively high quantities
ORDER BY
  coefficient_of_deviation ASC -- things with least variability first (no-one else is on it yet)

"""
df = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')

In [44]:
df.head(10)

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation
0,1308010Z0AAABAB,Ingenol Mebutate_Gel 500mcg/g,6,"£8,371.70",£nan
1,1308010Z0AAAAAA,Ingenol Mebutate_Gel 150mcg/g,905,"£54,508.88",£nan
2,0411000H0AAAAAA,Idebenone_Tab 45mg,672,"£2,299.60",£0.00
3,0908010N0AAARAR,Sod Benz_Liq Spec 400mg/5ml,3000,"£3,579.22",£0.00
4,1106000X0AAA4A4,Piloc HCl_Eye Dps 6% P/F,80,"£2,050.82",£0.00
5,0803042T0AAAAAA,Abiraterone_Tab 250mg,224,"£5,067.32",£0.00
6,0704030G0AAAPAP,Pot Cit_Cap 600mg,1460,"£4,626.82",£0.00
7,0408010ABAAAEAE,Tiagabine_Liq Spec 2.5mg/5ml,1680,"£2,114.82",£0.00
8,1001030V0AAABAB,Abatacept_Inj 125mg/1ml Pfs,36,"£10,085.78",£0.00
9,1001030S0AAABAB,Adalimumab_Inj 40mg/0.8ml Pf Pen,12,"£3,914.91",£0.00


It might be easier for us to scalp where the price per dose is currently relatively low. Let's only look at presentations where a does is less than £1, then put the ones with the greatest monthly cost to the NHS at the top.

In [56]:
df.query('(actual_cost / quantity < 1.0) & coefficient_of_deviation < 0.01').sort_values('actual_cost', ascending=False).head()

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation
268,0601023AFAAABAB,Linagliptin/Metformin_Tab 2.5mg/1g,178499,"£98,314.04",£0.00
156,0408010D0AAAEAE,Oxcarbazepine_Oral Susp 300mg/5ml S/F,222852,"£40,443.25",£0.00
82,1305020D0AAAIAI,Calcipotriol/Betameth_Foam 50mcg/0.5mg/g,60720,"£37,210.98",£0.00
245,0802010M0AAAJAJ,Mycophenolate Mofetil_Oral Susp 1g/5mlSF,52150,"£31,800.55",£0.00
75,0408010AFAAADAD,Rufinamide_Oral Susp 200mg/5ml S/F,122360,"£23,342.00",£0.00


# Where is scalping most likely to be happening now?

We can reverse the logic to find the presentations most likely to be subject to scalping right now:

In [2]:
sql = """

WITH np8_drugs AS (SELECT
  p.bnf_code,
  p.bnf_name,
  sum(quantity) as quantity,
  sum(actual_cost) as actual_cost,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation
FROM
  ebmdatalab.hscic.prescribing AS p
LEFT JOIN
  ebmdatalab.hscic.tariff t
ON
  t.bnf_code = p.bnf_code
  AND TIMESTAMP(t.date) = p.month

WHERE
  p.month = TIMESTAMP('"""+month+"""') 
AND 
  category IS NULL -- not in the tariff
AND p.bnf_code LIKE '_________AA%' -- generically prescribed
AND p.bnf_code NOT LIKE '19%' -- specials
GROUP BY p.bnf_code, p.bnf_name),

top_drugs AS (
SELECT np8_drugs.*, bnf.supplier, bnf.in_tariff, bnf.pack_size_count, bnf.bioequivalence, bnf.route
FROM 
  np8_drugs 
LEFT JOIN ebmdatalab.hscic.super_bnf AS bnf
ON bnf.bnf_code = np8_drugs.bnf_code

WHERE actual_cost > 2000 -- relatively high quantities
ORDER BY
  coefficient_of_deviation DESC -- things with most variability first
LIMIT 100
)

select * 
FROM top_drugs 
"""
df2 = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')

In [18]:
df2.head()

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,in_tariff,pack_size_count,bioequivalence,route
0,091200000AADGDG,Glucosamine Sulf_Tab 500mg,4138,"£2,684.34",£1.08,HealthAid Ltd,False,342,True,tablet.oral
1,1106000I0AAACAC,Betaxolol_Susp Eye Dps 0.25% 0.25ml Udv,18818,"£5,160.29",£1.74,Alcon Laboratories (UK) Ltd,False,2,True,solutiondrops.ophthalmic
2,0401020K0AAA1A1,Diazepam_Liq Spec 10mg/5ml,30113,"£20,508.00",£1.57,Special Order,False,16,True,solution.oral
3,0106040M0AAAHAH,Macrogol_Co Liq NPF S/F,916183,"£10,798.69",£1.23,Norgine Pharmaceuticals Ltd,True,2,True,solution.oral
4,1501041T0AAABAB,Midazolam HCl_Inj 2mg/ml 5ml Amp,2395,"£2,373.43",£1.18,A A H Pharmaceuticals Ltd,False,68,True,solutionenema.rectal


In [15]:
sql = """

WITH np8_drugs AS (SELECT
  p.bnf_code,
  p.bnf_name,
  sum(quantity) as quantity,
  sum(actual_cost) as actual_cost,
  IEEE_DIVIDE(stddev_pop(IEEE_DIVIDE(actual_cost, quantity)), avg(IEEE_DIVIDE(actual_cost,quantity))) as coefficient_of_deviation
FROM
  ebmdatalab.hscic.prescribing AS p
LEFT JOIN
  ebmdatalab.hscic.tariff t
ON
  t.bnf_code = p.bnf_code
  AND TIMESTAMP(t.date) = p.month

WHERE
  p.month = TIMESTAMP('"""+month+"""') 
AND 
  category IS NULL -- not in the tariff
AND p.bnf_code LIKE '_________AA%' -- generically prescribed
AND p.bnf_code NOT LIKE '19%' -- specials
GROUP BY p.bnf_code, p.bnf_name),

top_drugs AS (
SELECT np8_drugs.*, bnf.supplier, bnf.in_tariff, bnf.pack_size_count, bnf.bioequivalence, bnf.route
FROM 
  np8_drugs 
LEFT JOIN ebmdatalab.hscic.super_bnf AS bnf
ON bnf.bnf_code = np8_drugs.bnf_code

WHERE actual_cost > 2000 -- relatively high quantities
ORDER BY
  coefficient_of_deviation DESC -- things with most variability first
LIMIT 100
)

select supplier, count(*) as count, sum(actual_cost) as total_cost, avg(pack_size_count) as avg_pack_size
FROM top_drugs 
GROUP BY supplier
ORDER BY total_cost DESC
"""
df3 = pd.io.gbq.read_gbq(sql, project_id="ebmdatalab", verbose=False, dialect='standard')

In [16]:
df3.head(20)

Unnamed: 0,supplier,count,total_cost,avg_pack_size
0,Pfizer Ltd,4,"£804,407.92",£4.50
1,A A H Pharmaceuticals Ltd,4,"£712,599.56",£61.25
2,Solgar Vitamin and Herb,5,"£370,748.38",£112.80
3,Special Order,33,"£264,505.46",£10.88
4,Kyowa Kirin Ltd,1,"£118,205.80",£10.00
5,Chugai Pharma UK Ltd,2,"£107,039.22",£18.50
6,Intrapharm Laboratories Ltd,1,"£81,556.52",£68.00
7,Arjun Products Ltd,1,"£79,850.97",£15.00
8,HealthAid Ltd,5,"£64,860.88",£96.40
9,Lamberts Healthcare Ltd,2,"£51,614.85",£55.00


In [61]:
df2.query('bnf_code =="0302000C0AABEBE"')
df2.query('(actual_cost / quantity < 1.0) & coefficient_of_deviation > 0.1').sort_values('actual_cost', ascending=False).head()

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation
146,040702040AAACAC,Tramadol HCl_Tab 100mg M/R,885970,"£326,337.38",£0.77
32,0906040G0AABHBH,Colecal_Cap 1 000u,497926,"£257,550.94",£1.59
580,0401010ADAABABA,Melatonin_Oral Soln 5mg/5ml,913868,"£228,797.51",£0.13
316,0103050P0AABLBL,Omeprazole_Oral Susp 10mg/5ml,355694,"£153,920.11",£0.49
334,040702040AAAEAE,Tramadol HCl_Tab 200mg M/R,241319,"£139,408.30",£0.47


In [3]:
df2.query('supplier == "A A H Pharmaceuticals Ltd"')

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,in_tariff,pack_size_count,bioequivalence,route
4,1501041T0AAABAB,Midazolam HCl_Inj 2mg/ml 5ml Amp,2395,"£2,373.43",£1.18,A A H Pharmaceuticals Ltd,False,68,True,solutionenema.rectal
58,0407010Q0AAAAAA,Co-Proxamol_Tab 32.5mg/325mg,659538,"£702,718.55",£1.58,A A H Pharmaceuticals Ltd,False,105,True,tablet.oral
84,1501041T0AAASAS,Midazolam HCl_Inj 1mg/ml 5ml Amp,6953,"£4,178.51",£1.32,A A H Pharmaceuticals Ltd,False,56,True,solutioninjection.intramuscular
99,1108010K0AAAAAA,Sod Chlor_Eye Dps 0.9%,1060,"£3,329.07",£1.59,A A H Pharmaceuticals Ltd,False,16,True,solutiondrops.ophthalmic


In [4]:
df2.query('supplier == "Solgar Vitamin and Herb"')

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,in_tariff,pack_size_count,bioequivalence,route
16,0906040G0AABIBI,Colecal_Cap 400u,255240,"£95,559.03",£1.99,Solgar Vitamin and Herb,False,70,True,capsule.oral
31,0906040G0AABHBH,Colecal_Cap 1 000u,497926,"£257,550.94",£1.59,Solgar Vitamin and Herb,True,418,True,capsule.oral
39,0906040G0AADWDW,Colecal_Cap 2 200u,9154,"£4,366.25",£1.21,Solgar Vitamin and Herb,False,32,True,capsule.oral
67,0906040G0AABFBF,Colecal_Cap 600u,9829,"£7,327.12",£1.83,Solgar Vitamin and Herb,False,28,True,capsule.oral
85,0905013F0AAADAD,Mag Cit_Tab 200mg,4983,"£5,945.04",£1.17,Solgar Vitamin and Herb,False,16,True,tablet.oral


In [5]:
df2.query('supplier == "Kyowa Kirin Ltd"')

Unnamed: 0,bnf_code,bnf_name,quantity,actual_cost,coefficient_of_deviation,supplier,in_tariff,pack_size_count,bioequivalence,route
59,0604020K0AABMBM,Testosterone_Gel 2% (10mg per actuation),225000,"£118,205.80",£1.01,Kyowa Kirin Ltd,True,10,True,gel.cutaneous
