# Secondary Outcomes
**S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.**

**S2. Total items prescribed per 1000 registered patients for Co-proxamol.**
 
**S3. Total items prescribed per 1000 registered patients for Dosulepin.**

In [1]:
# Set dates of baseline and follow-up periods
d4 = '2018-07-01' # month after end of follow-up period
d3 = '2018-01-01' # follow-up start
d2 = '2017-07-01' # month after end of baseline period
d1 = '2017-01-01' # baseline start

# Import dataset from BigQuery
import pandas as pd
import numpy as np
GBQ_PROJECT_ID = '620265099307'


## S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined. 

In [2]:
# Load costs and items data for each of the individual low-priority measures

#costs:
q2 = '''SELECT _TABLE_SUFFIX AS measure, pct_id, month, sum(numerator) AS cost, sum(denominator) AS denominator FROM
  `ebmdatalab.alex.cost_*`
  WHERE _TABLE_SUFFIX <> 'all_low_priority'
  AND _TABLE_SUFFIX <> 'zomnibus'
  AND month >= '2017-01-01'
  GROUP BY measure, pct_id, month
   '''
#items:
q3 = '''SELECT _TABLE_SUFFIX AS measure, pct_id, month, sum(numerator) AS items FROM
  `ebmdatalab.alex.items_*`
  WHERE _TABLE_SUFFIX <> 'all_low_priority'
  AND _TABLE_SUFFIX <> 'zomnibus'
  AND month >= '2017-01-01'
  GROUP BY measure, pct_id, month'''

lpcosts = pd.read_gbq(q2, GBQ_PROJECT_ID, dialect='standard',verbose=False)
lpitems = pd.read_gbq(q3, GBQ_PROJECT_ID, dialect='standard',verbose=False)

lpcosts["month"] = pd.to_datetime(lpcosts.month)
lpitems["month"] = pd.to_datetime(lpitems.month)

lpcosts.head() # this gives the first few rows of data


Unnamed: 0,measure,pct_id,month,cost,denominator
0,oxycodone,00C,2018-07-01,669.41572,108.428
1,glutenfree,00C,2018-07-01,1977.39603,108.428
2,homeopathy,00C,2018-07-01,0.0,108.428
3,tramadolpara,00C,2018-07-01,30.26708,108.428
4,dosulepin,00C,2018-07-01,164.01923,108.428


In [3]:
# merge items and cost into a single table
lp = lpcosts.merge(lpitems, on=["measure","pct_id","month"], how="outer")
lp.head()

Unnamed: 0,measure,pct_id,month,cost,denominator,items
0,oxycodone,00C,2018-07-01,669.41572,108.428,22
1,glutenfree,00C,2018-07-01,1977.39603,108.428,121
2,homeopathy,00C,2018-07-01,0.0,108.428,0
3,tramadolpara,00C,2018-07-01,30.26708,108.428,7
4,dosulepin,00C,2018-07-01,164.01923,108.428,102


In [4]:
### select data only for the baseline and follow-up periods
import datetime

conditions = [
    (lp['month']  >= d4), # after follow-up period
    (lp['month']  >= d3), # follow-up
    (lp['month']  >= d2), # mid
    (lp['month']  >= d1), # baseline
    (lp['month']  < d1)] # before

choices = ['after', 'follow-up', 'mid', 'baseline','before']
lp['period'] = np.select(conditions, choices, default='0')

# take columns of interest from df
df2 = lp[["measure","pct_id","period", "month", "cost","items","denominator"]]
df2 = df2.loc[(df2['period']== "baseline") | (df2['period']== "follow-up")].set_index(["pct_id","period", "month"])
df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,measure,cost,items,denominator
pct_id,period,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00C,follow-up,2018-06-01,tramadolpara,37.28624,6,108.356
00C,follow-up,2018-06-01,rubefacients,1022.11857,95,108.356
00C,follow-up,2018-06-01,coprox,255.93032,3,108.356
00C,follow-up,2018-06-01,tadalafil,1465.51566,46,108.356
00C,follow-up,2018-06-01,dosulepin,169.51614,106,108.356


In [5]:
### sum numerator and average population denominators for each CCG for each period
agg_6m = df2.groupby(["measure","pct_id","period"]).agg({"cost":sum,"items":sum,"denominator":"mean"})
agg_6m.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cost,items,denominator
measure,pct_id,period,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
coprox,00C,baseline,2948.99296,31,107.7325
coprox,00C,follow-up,1874.83241,24,108.279167
coprox,00D,baseline,3457.80011,16,290.8515
coprox,00D,follow-up,967.06012,7,292.0395
coprox,00J,baseline,26605.75613,163,256.521833


In [6]:
### import **allocated** CCGs
ccgs = pd.read_csv('randomisation_group.csv')
# import joint team information
team = pd.read_csv('joint_teams.csv')

ccgs = ccgs.merge(team,on="joint_team", how="left")
#fill black ccg_ids from joint_id column
ccgs["pct_id"] = ccgs["ccg_id"].combine_first(ccgs["joint_id"])
ccgs = ccgs[["joint_id","allocation","pct_id"]]

df2b = agg_6m.reset_index()
df2b = ccgs.merge(df2b, on="pct_id",how="left")
df2b.head()


Unnamed: 0,joint_id,allocation,pct_id,measure,period,cost,items,denominator
0,01X,con,01X,coprox,baseline,9100.74863,50,196.474
1,01X,con,01X,coprox,follow-up,9740.93645,47,197.501167
2,01X,con,01X,dosulepin,baseline,6375.88864,2447,196.474
3,01X,con,01X,dosulepin,follow-up,4271.18226,2175,197.501167
4,01X,con,01X,doxazosin,baseline,11780.60775,1081,196.474


In [7]:
# group up to Joint team groups 
# note: SUM both numerators and population denominator across geographies
df2c = df2b.groupby(["joint_id","allocation","measure","period"]).sum()
df2c = df2c.unstack().reset_index()
df2c.columns = df2c.columns.map('_'.join)

### calculate aggregated measure values (cost only)
df2c["baseline_calc_value"] = df2c.cost_baseline / df2c.denominator_baseline
df2c["follow_up_calc_value"] = df2c["cost_follow-up"] / df2c["denominator_follow-up"]

df2c.head()


Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,coprox,26605.75613,20065.45311,163,107,256.521833,259.612833,103.717316,77.289912
1,00J,I,dosulepin,3020.08789,1937.38454,1575,1274,256.521833,259.612833,11.77322,7.462592
2,00J,I,doxazosin,10729.89017,7266.42611,1160,783,256.521833,259.612833,41.82837,27.989472
3,00J,I,fentanylir,9581.45259,19996.55911,43,82,256.521833,259.612833,37.351412,77.02454
4,00J,I,glucosamine,5973.5044,7174.23894,319,321,256.521833,259.612833,23.286534,27.634377


In [8]:
# find top 3 measures per CCG by cost
df3 = df2c.sort_values(by=["joint_id_","baseline_calc_value"], ascending=False)
df3["measure_rank"] = df3.groupby("joint_id_")["baseline_calc_value"].rank(ascending=False)
df4 = df3.loc[df3.measure_rank <=3]
df4.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,measure_rank
710,99K,con,liothyronine,129145.01841,57558.58123,380,172,169.696667,170.398,761.034503,337.789066,1.0
719,99K,con,trimipramine,69674.06189,51635.34145,188,163,169.696667,170.398,410.580026,303.027861,2.0
709,99K,con,lidocaine,41409.52854,35373.67281,662,524,169.696667,170.398,244.020872,207.594413,3.0
692,99J,con,liothyronine,278653.00228,221120.63222,1081,829,485.592,492.281333,573.841831,449.175334,1.0
701,99J,con,trimipramine,204273.63041,172486.96408,531,439,485.592,492.281333,420.669266,350.382906,2.0


In [9]:
df5 = df4.copy()
df5 = df5.groupby(["joint_id_","allocation_"]).agg({"cost_baseline":"sum","cost_follow-up":"sum","denominator_baseline":"mean","denominator_follow-up":"mean"})

### calculate aggregated measure values for combined cost for the top 3 measures
df5["baseline_calc_value"] = df5.cost_baseline / df5.denominator_baseline
df5["follow_up_calc_value"] = df5["cost_follow-up"] / df5["denominator_follow-up"]
df5.head() 


Unnamed: 0_level_0,Unnamed: 1_level_0,cost_baseline,cost_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
joint_id_,allocation_,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00J,I,222188.91809,191214.14573,256.521833,259.612833,866.159871,736.535799
00Y,con,164808.49656,147379.1074,252.313167,255.081167,653.190235,577.773378
01F,con,168207.39028,165237.73603,130.523333,131.222333,1288.715098,1259.219615
01J,con,225708.40765,221575.57886,163.463333,164.991,1380.789215,1342.955548
01V,I,332137.37823,338439.64273,278.8905,280.012,1190.923958,1208.66121


In [10]:
# secondary outcome: Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.

import statsmodels.formula.api as smf
data = df5.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

lm = smf.ols(formula='data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention', data=data).fit()

#output regression coefficients and p-values:
params = pd.DataFrame(lm.params).reset_index().rename(columns={0: 'coefficient','index': 'factor'})
pvals = pd.DataFrame(lm.pvalues[[1,2]]).reset_index().rename(columns={0: 'p value','index': 'factor'})
params.merge(pvals, how='left',on='factor').set_index('factor')

Unnamed: 0_level_0,coefficient,p value
factor,Unnamed: 1_level_1,Unnamed: 2_level_1
Intercept,51.845539,
"data[""baseline_calc_value""]",0.787907,1.848895e-10
intervention,5.828968,0.8928908


## S2: Total items prescribed per 1000 registered patients for Co-proxamol. 

In [11]:
# filter data for coproxamol measure:
df6 = df2c.copy()
df6 = df6.loc[df6.measure_=="coprox"]

### calculate aggregated measure values (items per 1000 patients)
df6["baseline_calc_value"] = df6.items_baseline / df6.denominator_baseline
df6["follow_up_calc_value"] = df6["items_follow-up"] / df6["denominator_follow-up"]
df6.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,coprox,26605.75613,20065.45311,163,107,256.521833,259.612833,0.635423,0.412152
18,00Y,con,coprox,8034.09214,7397.55534,46,45,252.313167,255.081167,0.182313,0.176414
36,01F,con,coprox,8743.74849,1897.93113,60,13,130.523333,131.222333,0.459688,0.099069
54,01J,con,coprox,10694.05893,3020.67409,45,26,163.463333,164.991,0.275291,0.157584
72,01V,I,coprox,22347.01914,5283.50532,131,49,278.8905,280.012,0.469718,0.174993


In [12]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Co-proxamol.
import statsmodels.formula.api as smf
data = df6.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

lm = smf.ols(formula='data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention', data=data).fit()

#output regression coefficients and p-values:
params = pd.DataFrame(lm.params).reset_index().rename(columns={0: 'coefficient','index': 'factor'})
pvals = pd.DataFrame(lm.pvalues[[1,2]]).reset_index().rename(columns={0: 'p value','index': 'factor'})
params.merge(pvals, how='left',on='factor').set_index('factor')

Unnamed: 0_level_0,coefficient,p value
factor,Unnamed: 1_level_1,Unnamed: 2_level_1
Intercept,0.015003,
"data[""baseline_calc_value""]",0.589724,5.559887e-12
intervention,-0.010632,0.7746501


## S3: Total items prescribed per 1000 registered patients for Dosulepin. 

In [13]:
# filter data for dosulepin measure:
df7 = df2c.copy()
df7 = df7.loc[df7.measure_=="dosulepin"]

### calculate aggregated measure values (items per 1000 patients)
df7["baseline_calc_value"] = df7.items_baseline / df7.denominator_baseline
df7["follow_up_calc_value"] = df7["items_follow-up"] / df7["denominator_follow-up"]
df7.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
1,00J,I,dosulepin,3020.08789,1937.38454,1575,1274,256.521833,259.612833,6.139828,4.907307
19,00Y,con,dosulepin,5050.51768,2644.01308,2063,1552,252.313167,255.081167,8.176347,6.084338
37,01F,con,dosulepin,3296.94696,2334.94701,1378,1275,130.523333,131.222333,10.557499,9.716334
55,01J,con,dosulepin,5132.9604,3630.15245,2276,2093,163.463333,164.991,13.923612,12.68554
73,01V,I,dosulepin,3981.79981,2834.57196,1574,1558,278.8905,280.012,5.643792,5.564047


In [14]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Dosulepin.
import statsmodels.formula.api as smf
data = df7.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

lm = smf.ols(formula='data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention', data=data).fit()

#output regression coefficients and p-values:
params = pd.DataFrame(lm.params).reset_index().rename(columns={0: 'coefficient','index': 'factor'})
pvals = pd.DataFrame(lm.pvalues[[1,2]]).reset_index().rename(columns={0: 'p value','index': 'factor'})
params.merge(pvals, how='left',on='factor').set_index('factor')

Unnamed: 0_level_0,coefficient,p value
factor,Unnamed: 1_level_1,Unnamed: 2_level_1
Intercept,-0.136647,
"data[""baseline_calc_value""]",0.892998,6.414546e-34
intervention,0.025557,0.8544145
