The NHS dm+d contains a flag which marks if something is unavalable. A related [notebook](https://gist.github.com/sebbacon/317938750ea79e6291ae2cac51a0d31b) uncovered reimbursement against a prouct which dm+d reports as unavailable for the last five years. Here we invesitigate reimbursement of unavailable products.

In [1]:
##importing libraries
import pandas as pd
import numpy as np
from ebmdatalab import bq, maps, charts
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [2]:
##ensure £ and pence are set right
pd.set_option('display.float_format', lambda x: '%.2f' % x)

Further investigations reveal that there is a further flag at vmp level relating to availability.

In [3]:
sql = '''
SELECT
  Date,
  TRIM(Principal_Supplier) AS supplier,
  bnf_name,
  presc.bnf_code,
  vmp.id,
  non_availdt,
  Items,
  actual_Cost
FROM
  ebmdatalab.hscic.prescribing_2019_04 AS presc ##latest month
LEFT JOIN
  ebmdatalab.dmd.vmp AS vmp
ON
  presc.bnf_code = vmp.bnf_code
JOIN
  ebmdatalab.alex.vendors AS software #this is where the up to date vendors table is held
ON
  software.ODS = presc.practice
  AND Date = presc.month
JOIN
  hscic.practices
ON
  practices.code = software.ODS
WHERE
  non_avail = 1 ###1 = Actual Products not Available
  AND presc.bnf_code NOT LIKE "_________AA%"
  AND non_availdt <= "2018-10-01"
GROUP BY
  Date,
  supplier,
  bnf_name,
  presc.bnf_code,
  vmp.id,
  non_avail,
  non_availdt,
  Items,
  actual_Cost
'''



df_unavailable_vmp = bq.cached_read(sql, csv_path='amp_dmd_unavailable_vmp.csv')
df_unavailable_vmp.head(15)

  credentials=credentials, verbose=verbose, private_key=private_key)


Unnamed: 0,Date,supplier,bnf_name,bnf_code,id,non_availdt,Items,actual_Cost
0,2019-04-01,TPP,U100 Syrg Sle Use 0.5ml + 12mm Needle-Ster Hyp...,21010900410,3481511000001102,2018-03-01,2,10.21
1,2019-04-01,TPP,SMA_Gold Prem 2 Catch-up For Pdr,090900000BBKJA0,15419911000001103,2017-12-11,1,9.15
2,2019-04-01,TPP,SMA_Gold Prem 2 Catch-up For Pdr,090900000BBKJA0,15419911000001103,2017-12-11,1,32.01
3,2019-04-01,TPP,SMA_Gold Prem 2 Catch-up For Pdr,090900000BBKJA0,15419911000001103,2017-12-11,1,18.3
4,2019-04-01,TPP,U100 Syrg Sle Use 1ml + 12mm Needle-Ster Hypod...,21010900420,3481911000001109,2018-03-01,2,16.27
5,2019-04-01,EMIS,U100 Syrg Sle Use 0.5ml + 12mm Needle-Ster Hyp...,21010900410,3481511000001102,2018-03-01,2,16.07
6,2019-04-01,TPP,Homeopathic Preparation - Proprietary,190203000BBAAA0,5049511000001109,2017-01-01,5,19.57
7,2019-04-01,TPP,Homeopathic Preparation - Proprietary,190203000BBAAA0,5131511000001104,2016-02-15,5,19.57
8,2019-04-01,TPP,Homeopathic Preparation - Proprietary,190203000BBAAA0,5131011000001107,2017-01-01,5,19.57
9,2019-04-01,TPP,Homeopathic Preparation - Proprietary,190203000BBAAA0,5132911000001100,2016-02-15,5,19.57


In [10]:
#monthly total
df_unavailable_vmp.sum()[['Items','actual_Cost']]

Items             26900
actual_Cost   515251.80
dtype: object

In [4]:
df_unavailable_vmp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13038 entries, 0 to 13037
Data columns (total 8 columns):
Date           13038 non-null datetime64[ns]
supplier       13038 non-null object
bnf_name       13038 non-null object
bnf_code       13038 non-null object
id             13038 non-null int64
non_availdt    13038 non-null datetime64[ns]
Items          13038 non-null int64
actual_Cost    13038 non-null float64
dtypes: datetime64[ns](2), float64(1), int64(2), object(3)
memory usage: 815.0+ KB


In [9]:
## trouble shoot https://stackoverflow.com/questions/44759840/delete-duplicate-rows-with-the-same-value-in-all-columns-in-pandas