In [26]:
import os
import pandas as pd
import numpy as np
#import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
%matplotlib inline
from ebmdatalab import bq
from ebmdatalab import charts

**PRICE CONCESSION DATA**

In [27]:
#get price concession data from BigQuery
sql = """
  SELECT DISTINCT
    ncso.vmpp AS vmpp,
    ncso.date AS month,    
    1 AS concession_bool --creates a boolean value to show a price concession exists
  FROM
    ebmdatalab.dmd.ncsoconcession AS ncso --concession table
"""
exportfile = os.path.join("..","data","ncso_dates.csv") #defines name for cache file
dates_df = bq.cached_read(sql, csv_path=exportfile, use_cache=True) #uses BQ if changed, otherwise csv cache file
dates_df['month'] = pd.to_datetime(dates_df['month']) #ensure dates are in datetimeformat
dates_df = dates_df.sort_values(by=['month','vmpp']) #sort data by month then vmpp

In [28]:
#unstacks data, fills missing month data (with zero value where no concession), then restacks
dates_cons_df = dates_df.set_index(['month', 'vmpp']).unstack().asfreq('MS').fillna(0).stack().sort_index(level=1).reset_index()

In [29]:
max_date = dates_cons_df["month"].max() + pd.DateOffset(months=-3) #creates variable to ensure that all price concession data have three months after concession ends to ensure calculation of change
pc_summary_df = (dates_cons_df.assign(Consecutive=dates_cons_df.concession_bool
                                .groupby((dates_cons_df.concession_bool != dates_cons_df.concession_bool.shift())
                                    .cumsum()).transform('size')) #creates a value of the number of consecutive months of either price concession or no price concession
          .query('concession_bool > 0') # filters to only where price concession is present 
          .groupby(['vmpp','Consecutive'])
          .aggregate(first_month=('month','first'),  #shows earliest month of consecutive price concession
                     last_month=('month','last')) #shows latest month of consecutive price concession
          .reset_index().query("last_month < @max_date")
)
######THIS IS NOT QUITE WHAT I NEED - IDEALLY HAVE A UNIQUE NUMBER EVERY TIME ONE OF THE GROUPBY CHANGES, IN ORDER TO ENSURE EVERY ONE IS PICKED UP######

In [30]:
#get drug tariff price data from BigQuery
sql = """
  SELECT *
  FROM
    ebmdatalab.dmd.tariffprice
"""

exportfile = os.path.join("..","data","tariff.csv") #defines name for cache file
dates_df = bq.cached_read(sql, csv_path=exportfile, use_cache=True) #uses BQ if changed, otherwise csv cache file
dates_df['date'] = pd.to_datetime(dates_df['date'])#ensure dates are in datetimeformat

In [31]:
dates_df['pre_month'] = dates_df['date'] + pd.DateOffset(months=1) #creates extra date column in drug tariff price shifted by one month later, to pick up 3 month rolling mean spend for the month before price concession added
dates_df['post_month'] = dates_df['date'] + pd.DateOffset(months=-3) #creates extra date column in drug tariff price shifted by three months earlier, to pick up 3 month rolling mean spend for the 3 months after price concession added
dates_df['3_month_price'] = dates_df.groupby('vmpp')['price_pence'].transform(lambda x: x.rolling(3, 3).mean()) # create three month rolling average drug tariff cost

In [48]:
dates_df_merge = pd.merge(pc_summary_df, dates_df[['vmpp','pre_month','3_month_price']],  how='left', left_on=['vmpp','first_month'], right_on = ['vmpp','pre_month']) #merges price concession information with the 3 month average DT price prior to the start of the price concession
dates_df_merge.rename(columns={'3_month_price' : 'pre_pc_price'}, inplace=True) #rename columns
dates_df_merge = pd.merge(dates_df_merge, dates_df[['vmpp','post_month','3_month_price']],  how='left', left_on=['vmpp','last_month'], right_on = ['vmpp','post_month']) #merges price concession information with the 3 month average DT price after the end of the price concession
dates_df_merge.rename(columns={'3_month_price' : 'post_pc_price'}, inplace=True) #rename columns
dates_df_merge = dates_df_merge.drop(columns=['pre_month', 'post_month']) #drop unneccesary columns
dates_df_merge = dates_df_merge.sort_values(by=['vmpp','first_month']) #sort data by month then vmpp
dates_df_merge['perc_difference'] = (dates_df_merge['post_pc_price']/dates_df_merge['pre_pc_price']-1)
dates_df_merge = dates_df_merge.sort_values(by=['perc_difference'], ascending=False) #sort data by month then vmpp

In [49]:
dates_df_merge.style

Unnamed: 0,vmpp,Consecutive,first_month,last_month,pre_pc_price,post_pc_price,perc_difference
204,1082811000001108,12,2016-07-01 00:00:00,2017-06-01 00:00:00,371.333333,16500.0,43.43447
133,1020611000001101,3,2017-04-01 00:00:00,2017-06-01 00:00:00,422.0,17300.0,39.995261
510,1321711000001101,7,2017-06-01 00:00:00,2017-12-01 00:00:00,106.333333,3587.0,32.733542
382,1239711000001106,7,2017-06-01 00:00:00,2017-12-01 00:00:00,160.0,5222.0,31.6375
206,1085111000001108,7,2017-06-01 00:00:00,2017-12-01 00:00:00,208.666667,6175.0,28.592652
465,1290011000001107,10,2016-06-01 00:00:00,2017-03-01 00:00:00,200.0,5671.0,27.355
629,4989011000001105,7,2017-06-01 00:00:00,2017-12-01 00:00:00,307.0,7345.0,22.925081
220,1094611000001100,7,2017-06-01 00:00:00,2017-12-01 00:00:00,229.333333,5130.0,21.369186
315,1182311000001106,5,2017-06-01 00:00:00,2017-10-01 00:00:00,131.333333,2899.0,21.073604
448,1277311000001104,7,2017-09-01 00:00:00,2018-03-01 00:00:00,175.666667,3751.0,20.352941
