In [1]:
import os
import pandas as pd
import numpy as np
#import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
%matplotlib inline
from ebmdatalab import bq
from ebmdatalab import charts

**PRICE CONCESSION DATA**

In [14]:
#get price concession data from BigQuery
sql = """
  SELECT DISTINCT
    ncso.vmpp AS vmpp,
    ncso.date AS month,    
    1 AS concession_bool --creates a boolean value to show a price concession exists
  FROM
    ebmdatalab.dmd.ncsoconcession AS ncso --concession table 
"""
exportfile = os.path.join("..","data","ncso_dates.csv") #defines name for cache file
dates_df = bq.cached_read(sql, csv_path=exportfile, use_cache=False) #uses BQ if changed, otherwise csv cache file
dates_df['month'] = pd.to_datetime(dates_df['month']) #ensure dates are in datetimeformat
dates_df = dates_df.sort_values(by=['month','vmpp']) #sort data by month then vmpp
dates_df.style

Downloading: 100%|██████████| 6483/6483 [00:00<00:00, 8925.18rows/s]


Unnamed: 0,vmpp,month,concession_bool
2662,959311000001107,2014-08-01 00:00:00,1
2948,975311000001108,2014-08-01 00:00:00,1
3870,1034211000001109,2014-08-01 00:00:00,1
3937,1045211000001109,2014-08-01 00:00:00,1
896,1122311000001100,2014-08-01 00:00:00,1
1658,1132711000001103,2014-08-01 00:00:00,1
774,1191111000001100,2014-08-01 00:00:00,1
1860,1191711000001104,2014-08-01 00:00:00,1
4442,1212011000001107,2014-08-01 00:00:00,1
3026,1245011000001108,2014-08-01 00:00:00,1


In [15]:
#unstacks data, fills missing month data (with zero value where no concession), then restacks
dates_cons_df = dates_df.set_index(['month','vmpp']).unstack().asfreq('MS').fillna(0).stack().sort_index(level=1).reset_index()
#dates_cons_df = dates_cons_df.loc[dates_cons_df['vmpp'] == 1040511000001102]
dates_cons_df.head()

Unnamed: 0,month,vmpp,concession_bool
0,2014-08-01,941211000001102,0.0
1,2014-09-01,941211000001102,0.0
2,2014-10-01,941211000001102,0.0
3,2014-11-01,941211000001102,0.0
4,2014-12-01,941211000001102,0.0


In [4]:
max_date = dates_cons_df["month"].max() + pd.DateOffset(months=-3) #creates variable to ensure that all price concession data have three months after concession ends to ensure calculation of change
pc_summary_df = (dates_cons_df.assign(Consecutive=dates_cons_df.concession_bool
                                .groupby((dates_cons_df.concession_bool != dates_cons_df.concession_bool.shift())
                                    .cumsum()).transform('size')) #creates a value of the number of consecutive months of either price concession or no price concession
          .query('concession_bool > 0') # filters to only where price concession is present 
          .groupby(['vmpp','Consecutive'])
          .aggregate(first_month=('month','first'),  #shows earliest month of consecutive price concession
                     last_month=('month','last')) #shows latest month of consecutive price concession
          .reset_index().query("last_month < @max_date")
          .reset_index(drop=True)
)

pc_summary_df.head()

Unnamed: 0,vmpp,Consecutive,first_month,last_month
0,941211000001102,3,2015-11-01,2016-01-01
1,941211000001102,5,2019-11-01,2020-03-01
2,941311000001105,2,2017-11-01,2017-12-01
3,941311000001105,8,2022-05-01,2022-12-01
4,941511000001104,6,2022-04-01,2022-09-01


In [65]:
#get drug tariff price data from BigQuery
sql = """
  SELECT 
    vmpp.bnf_code as bnf_code, --BNF code (at VMP level)
    vmpp.nm as nm, --name
    vmpp.qtyval as unit_qty, --quantity per pack
    dt.*
  FROM
    ebmdatalab.dmd.tariffprice AS dt --concession table
    INNER JOIN
    dmd.vmpp as vmpp --join to VMPP table to get BNF codes and names
    on
    dt.vmpp = vmpp.id
  WHERE
    dt.vmpp IN (SELECT DISTINCT vmpp FROM ebmdatalab.dmd.ncsoconcession)
"""

exportfile = os.path.join("..","data","tariff.csv") #defines name for cache file
dates_df = bq.cached_read(sql, csv_path=exportfile, use_cache=False) #uses BQ if changed, otherwise csv cache file
dates_df['date'] = pd.to_datetime(dates_df['date'])#ensure dates are in datetimeformat
dates_df['unit_qty'] = pd.to_numeric(dates_df['unit_qty'])

Downloading: 100%|██████████| 102490/102490 [00:07<00:00, 14484.87rows/s]


In [66]:
dates_df['pre_month'] = dates_df['date'] + pd.DateOffset(months=1) #creates extra date column in drug tariff price shifted by one month later, to pick up 3 month rolling mean spend for the month before price concession added
dates_df['post_month'] = dates_df['date'] + pd.DateOffset(months=-3) #creates extra date column in drug tariff price shifted by three months earlier, to pick up 3 month rolling mean spend for the 3 months after price concession added
dates_df['3_month_price'] = dates_df.groupby('vmpp')['price_pence'].transform(lambda x: x.rolling(3, 3).mean()) # create three month rolling average drug tariff cost

In [67]:
dates_df_merge = pd.merge(pc_summary_df, dates_df[['bnf_code', 'nm','unit_qty','vmpp','pre_month','3_month_price']],  how='left', left_on=['vmpp','first_month'], right_on = ['vmpp','pre_month']) #merges price concession information with the 3 month average DT price prior to the start of the price concession
dates_df_merge.rename(columns={'3_month_price' : 'pre_pc_price'}, inplace=True) #rename columns
dates_df_merge = pd.merge(dates_df_merge, dates_df[['vmpp','post_month','3_month_price']],  how='left', left_on=['vmpp','last_month'], right_on = ['vmpp','post_month']) #merges price concession information with the 3 month average DT price after the end of the price concession
dates_df_merge.rename(columns={'3_month_price' : 'post_pc_price'}, inplace=True) #rename columns
dates_df_merge = dates_df_merge.drop(columns=['pre_month', 'post_month']) #drop unneccesary columns
dates_df_merge = dates_df_merge.sort_values(by=['vmpp','first_month']) #sort data by month then vmpp
dates_df_merge['perc_difference'] = (dates_df_merge['post_pc_price']/dates_df_merge['pre_pc_price']-1)
dates_df_merge['rx_merge_date'] = (dates_df_merge['last_month'] + pd.DateOffset(months=1)) #create a merge date for prescribing data, so there's always the three months of rx data available post concession
dates_df_merge = dates_df_merge.sort_values(by=['last_month'], ascending=False) #sort data by month then vmpp



In [68]:
dates_df_merge.head()

Unnamed: 0,vmpp,Consecutive,first_month,last_month,bnf_code,nm,unit_qty,pre_pc_price,post_pc_price,perc_difference,rx_merge_date
875,12202311000001107,7,2022-10-01,2023-04-01,0407010F0AAAFAF,Co-codamol 30mg/500mg effervescent tablets 32 ...,32.0,226.0,321.666667,0.423304,2023-05-01
867,10741811000001101,7,2022-10-01,2023-04-01,0501013B0AAABAB,Amoxicillin 500mg capsules 15 capsule,15.0,144.0,181.666667,0.261574,2023-05-01
511,1257611000001106,10,2022-07-01,2023-04-01,0501012G0AAABAB,Flucloxacillin 500mg capsules 28 capsule,28.0,255.0,297.0,0.164706,2023-05-01
837,9209111000001107,5,2022-12-01,2023-04-01,0501011P0AAARAR,Phenoxymethylpenicillin 125mg/5ml oral solutio...,100.0,357.666667,976.0,1.728798,2023-05-01
853,9757411000001106,7,2022-10-01,2023-04-01,0307000J0AAAAAA,Carbocisteine 375mg capsules 120 capsule,120.0,289.0,750.333333,1.596309,2023-05-01


In [69]:
#get quantity_calcs
sql = """
  SELECT DISTINCT
    date(rx.month) as date_3m_start,
    rx.bnf_code,
    SUM(rx.quantity) OVER(
      PARTITION BY rx.bnf_code
      ORDER BY DATE_DIFF(date(rx.month), '2000-01-01', MONTH)
      RANGE BETWEEN 0 PRECEDING AND 2 FOLLOWING
    )
    as roll_3m_quantity
  FROM
    ebmdatalab.hscic.normalised_prescribing AS rx
    INNER JOIN
    dmd.vmpp as vmpp --join to VMPP table to get BNF codes and names
    on
    rx.bnf_code = vmpp.bnf_code
  WHERE
    vmpp.id IN (SELECT DISTINCT vmpp FROM ebmdatalab.dmd.ncsoconcession)
    ORDER BY date_3m_start DESC
"""

exportfile = os.path.join("..","data","rx_qty.csv") #defines name for cache file
rx_df = bq.cached_read(sql, csv_path=exportfile, use_cache=True) #uses BQ if changed, otherwise csv cache file
rx_df['date_3m_start'] = pd.to_datetime(rx_df['date_3m_start'])#ensure dates are in datetimeformat
rx_df = rx_df[rx_df['date_3m_start'] <= max(rx_df['date_3m_start']) + pd.DateOffset(months=-2)] #limit df to ensure that always 3 full months of data


In [70]:
rx_df.head()

Unnamed: 0,date_3m_start,bnf_code,roll_3m_quantity
1292,2023-04-01,0401020B0AAABAB,798349.0
1293,2023-04-01,0205052P0AAABAB,394853.0
1294,2023-04-01,0803042A0AAABAB,607126.0
1295,2023-04-01,0205051I0AAAAAA,1190697.0
1296,2023-04-01,0409010B0AAAAAA,1594348.0


In [71]:
rx_df_merge = pd.merge(dates_df_merge, rx_df,  how='right', left_on=['bnf_code','rx_merge_date'], right_on = ['bnf_code','date_3m_start'])

In [72]:
rx_df_merge.head()

Unnamed: 0,vmpp,Consecutive,first_month,last_month,bnf_code,nm,unit_qty,pre_pc_price,post_pc_price,perc_difference,rx_merge_date,date_3m_start,roll_3m_quantity
0,4028111000000000.0,9.0,2022-07-01,2023-03-01,0606020R0AAADAD,Risedronate sodium 35mg tablets 4 tablet,4.0,128.0,222.0,0.734375,2023-04-01,2023-04-01,890400.0
1,977011000000000.0,1.0,2019-09-01,2023-03-01,0409010N0AAADAD,Co-careldopa 12.5mg/50mg tablets 90 tablet,90.0,729.666667,419.0,-0.425765,2023-04-01,2023-04-01,8184855.0
2,1227811000000000.0,10.0,2022-06-01,2023-03-01,0401010T0AAANAN,Temazepam 20mg tablets 28 tablet,28.0,117.666667,2651.0,21.529745,2023-04-01,2023-04-01,558318.0
3,965911000000000.0,11.0,2022-05-01,2023-03-01,0603020T0AABHBH,Prednisolone 5mg soluble tablets 30 tablet,30.0,996.666667,5655.0,4.673913,2023-04-01,2023-04-01,986727.0
4,1250511000000000.0,4.0,2022-12-01,2023-03-01,1001010P0AAAEAE,Naproxen 500mg tablets 28 tablet,28.0,133.333333,159.0,0.1925,2023-04-01,2023-04-01,58030531.0


In [75]:
rx_df_merge['3_m_additional_cost'] = 0.01*(rx_df_merge['roll_3m_quantity']/rx_df_merge['unit_qty'])*(rx_df_merge['post_pc_price']-rx_df_merge['pre_pc_price'])

In [89]:
rx_df_merge.head()

Unnamed: 0,vmpp,Consecutive,first_month,last_month,bnf_code,nm,unit_qty,pre_pc_price,post_pc_price,perc_difference,rx_merge_date,date_3m_start,roll_3m_quantity,3_m_additional_cost
0,4028111000000000.0,9.0,2022-07-01,2023-03-01,0606020R0AAADAD,Risedronate sodium 35mg tablets 4 tablet,4.0,128.0,222.0,0.734375,2023-04-01,2023-04-01,890400.0,209244.0
1,977011000000000.0,1.0,2019-09-01,2023-03-01,0409010N0AAADAD,Co-careldopa 12.5mg/50mg tablets 90 tablet,90.0,729.666667,419.0,-0.425765,2023-04-01,2023-04-01,8184855.0,-282529.1
2,1227811000000000.0,10.0,2022-06-01,2023-03-01,0401010T0AAANAN,Temazepam 20mg tablets 28 tablet,28.0,117.666667,2651.0,21.529745,2023-04-01,2023-04-01,558318.0,505144.9
3,965911000000000.0,11.0,2022-05-01,2023-03-01,0603020T0AABHBH,Prednisolone 5mg soluble tablets 30 tablet,30.0,996.666667,5655.0,4.673913,2023-04-01,2023-04-01,986727.0,1532168.0
4,1250511000000000.0,4.0,2022-12-01,2023-03-01,1001010P0AAAEAE,Naproxen 500mg tablets 28 tablet,28.0,133.333333,159.0,0.1925,2023-04-01,2023-04-01,58030531.0,531946.5
