# Secondary Outcomes
**S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.**

**S2. Total items prescribed per 1000 registered patients for Co-proxamol.**
 
**S3. Total items prescribed per 1000 registered patients for Dosulepin.**

In [1]:
import os
import requests
import pandas as pd
import numpy as np

from analysis import compute_regression

GBQ_PROJECT_ID = '620265099307'

# Set dates of baseline and follow-up periods
baseline_start = '2018-04-01'       # baseline start
mid_start = '2018-10-01'            # month after end of baseline period
followup_start = '2019-04-01'       # follow-up start
post_followup_start = '2019-10-01'  # month after end of follow-up period


In [2]:
# Load data which should have been generated already by running the 
# primary outcome notebook
# (Specifically, per-measure cost/items numerators, and population denominators)
rawdata = pd.read_csv(os.path.join('..','data','all_measure_data.csv'))
rawdata["month"] = pd.to_datetime(rawdata.month)
rawdata.head(2)

Unnamed: 0.1,Unnamed: 0,month,pct_id,items,cost,denominator,measure
0,0,2018-10-01 00:00:00+00:00,00C,4,392.31788,108.634,lpcoprox
1,1,2018-11-01 00:00:00+00:00,00C,2,127.21812,108.594,lpcoprox


In [3]:
data = rawdata.copy()

### select data only for the baseline and follow-up periods
import datetime

conditions = [
    (data['month'] >= post_followup_start),
    (data['month'] >= followup_start),
    (data['month'] >= mid_start),
    (data['month'] >= baseline_start),
    (data['month'] < baseline_start)]

choices = ['after', 'follow-up', 'mid', 'baseline','before']
data['period'] = np.select(conditions, choices, default='0')

# take columns of interest from df
df2 = data[["measure","pct_id","period", "month", "cost","items","denominator"]]
df2 = df2.loc[(df2['period']== "baseline") | (df2['period']== "follow-up")].set_index(
    ["pct_id","period", "month"])
df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,measure,cost,items,denominator
pct_id,period,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00C,baseline,2018-04-01 00:00:00+00:00,lpcoprox,587.41623,8,108.273
00C,baseline,2018-05-01 00:00:00+00:00,lpcoprox,55.69691,1,108.299
00C,baseline,2018-06-01 00:00:00+00:00,lpcoprox,255.93032,3,108.356
00D,baseline,2018-04-01 00:00:00+00:00,lpcoprox,552.77494,4,292.023
00D,baseline,2018-05-01 00:00:00+00:00,lpcoprox,0.0,0,292.107


In [4]:
### sum numerator and average population denominators for each CCG for each period
agg_6m = df2.groupby(["measure","pct_id","period"]).agg({"cost":sum,"items":sum,"denominator":"mean"})
agg_6m.head()

### CCGs that have been allocated in the RCT
ccgs = pd.read_csv(os.path.join('..','data','randomisation_group.csv'))
# import joint team information
team = pd.read_csv(os.path.join('..','data','joint_teams.csv'))

ccgs = ccgs.merge(team,on="joint_team", how="left")
#fill blank ccg_ids from joint_id column, so even CCGs not in Joint Teams 
# have a value for joint_id
ccgs["pct_id"] = ccgs["ccg_id"].combine_first(ccgs["joint_id"])
ccgs = ccgs[["joint_id","allocation","pct_id"]]
 
# Combine CCG/Joint Team info with measure data
rct_agg_6m = ccgs.merge(agg_6m.reset_index(), on="pct_id",how="left")
rct_agg_6m.head(3)


Unnamed: 0,joint_id,allocation,pct_id,measure,period,cost,items,denominator
0,01X,con,01X,lpcoprox,baseline,7845.14647,39,197.704333
1,01X,con,01X,lpcoprox,follow-up,1993.5624,14,198.111167
2,01X,con,01X,lpdosulepin,baseline,4167.29525,2051,197.704333


In [5]:
# group up to Joint team groups 
# note: SUM both numerators and population denominator across geographies
rct_agg_6m = rct_agg_6m.groupby(["joint_id","allocation","measure","period"])\
       .sum().unstack().reset_index()
rct_agg_6m.columns = rct_agg_6m.columns.map('_'.join).map(lambda x: x.strip("_"))

### calculate aggregated measure values (cost only)
rct_agg_6m["baseline_calc_value"] = rct_agg_6m.cost_baseline / rct_agg_6m.denominator_baseline
rct_agg_6m["follow_up_calc_value"] = rct_agg_6m["cost_follow-up"] / rct_agg_6m["denominator_follow-up"]

rct_agg_6m.head(2)


Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,lpcoprox,18105.34852,10348.92981,111,47,259.248,261.553833,69.837949,39.567112
1,00J,I,lpdosulepin,1912.89594,6333.01366,1266,795,259.248,261.553833,7.378633,24.213041


## S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined. 

In [6]:
# find top 3 measures per CCG by cost
top_3 = rct_agg_6m.sort_values(by=["joint_id","baseline_calc_value"], ascending=False)
top_3["measure_rank"] = top_3.groupby("joint_id")["baseline_calc_value"].rank(ascending=False)
top_3 = top_3.loc[top_3.measure_rank <=3]
top_3.head(2)

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,measure_rank
705,99K,con,lpfentanylir,52079.50137,24239.10571,119,70,170.713667,171.033667,305.069315,141.721254,1.0
710,99K,con,lpliothyronine,50050.901,33104.70501,155,110,170.713667,171.033667,293.186257,193.556658,2.0


In [7]:
# check whether any CCGs' top 3 include herbal medicine which was not available as a measure at the time of the intervention
top_3.loc[top_3["measure"]=="lpherbal"]

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,measure_rank


In [10]:
top_3 = top_3.groupby(["joint_id","allocation"]).agg({"cost_baseline":"sum","cost_follow-up":"sum","denominator_baseline":"mean","denominator_follow-up":"mean"})

### calculate aggregated measure values for combined cost for the top 3 measures
top_3["baseline_calc_value"] = top_3.cost_baseline / top_3.denominator_baseline
top_3["follow_up_calc_value"] = top_3["cost_follow-up"] / top_3["denominator_follow-up"]
top_3.head(2) 


Unnamed: 0_level_0,Unnamed: 1_level_0,cost_baseline,cost_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
joint_id,allocation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00J,I,179617.4137,161140.38067,259.248,261.553833,692.840113,616.088775
00Y,con,140982.1327,65189.09093,255.777833,258.976833,551.1898,251.717847


In [12]:
# secondary outcome: Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.

data = top_3.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation.map({'con':0, 'I':1})

# summary data:
out = data.groupby("intervention").agg({"joint_id":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})
out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]
display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)


Unnamed: 0_level_0,joint_id,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,225.697333,866.845267,206.789446,664.082485,-202.762782
1,20,159.695379,756.709713,157.760994,592.244016,-164.465698


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.728
Model:,OLS,Adj. R-squared:,0.713
Method:,Least Squares,F-statistic:,49.49
Date:,"Mon, 23 Nov 2020",Prob (F-statistic):,3.49e-11
Time:,11:01:42,Log-Likelihood:,-239.07
No. Observations:,40,AIC:,484.1
Df Residuals:,37,BIC:,489.2
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-26.4092,74.691,-0.354,0.726,-177.747,124.929
"data[""baseline_calc_value""]",0.7966,0.082,9.681,0.000,0.630,0.963
intervention,15.8908,32.640,0.487,0.629,-50.245,82.026

0,1,2,3
Omnibus:,3.158,Durbin-Watson:,2.059
Prob(Omnibus):,0.206,Jarque-Bera (JB):,2.927
Skew:,-0.636,Prob(JB):,0.231
Kurtosis:,2.627,Cond. No.,4080.0


## S2: Total items prescribed per 1000 registered patients for Co-proxamol. 

In [13]:
# filter data for coproxamol measure:
coprox = rct_agg_6m.copy()
coprox = coprox.loc[coprox.measure=="lpcoprox"]

### calculate aggregated measure values (items per 1000 patients)
coprox["baseline_calc_value"] = coprox.items_baseline / coprox.denominator_baseline
coprox["follow_up_calc_value"] = coprox["items_follow-up"] / coprox["denominator_follow-up"]
coprox.head(2)

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,lpcoprox,18105.34852,10348.92981,111,47,259.248,261.553833,0.428161,0.179695
18,00Y,con,lpcoprox,7201.93514,6073.37376,46,37,255.777833,258.976833,0.179844,0.14287


In [14]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Co-proxamol.
data = coprox.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation.map({'con':0, 'I':1})

# summary data:
out = data.groupby("intervention").agg({"joint_id":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})
out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]
display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)

Unnamed: 0_level_0,joint_id,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,0.159883,0.295355,0.122855,0.19899,-0.096364
1,20,0.231633,0.262386,0.203357,0.176306,-0.086081


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.844
Model:,OLS,Adj. R-squared:,0.836
Method:,Least Squares,F-statistic:,100.4
Date:,"Mon, 23 Nov 2020",Prob (F-statistic):,1.12e-15
Time:,11:02:25,Log-Likelihood:,52.743
No. Observations:,40,AIC:,-99.49
Df Residuals:,37,BIC:,-94.42
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0300,0.022,-1.358,0.183,-0.075,0.015
"data[""baseline_calc_value""]",0.7754,0.055,14.134,0.000,0.664,0.887
intervention,0.0029,0.021,0.135,0.894,-0.040,0.046

0,1,2,3
Omnibus:,0.116,Durbin-Watson:,1.812
Prob(Omnibus):,0.944,Jarque-Bera (JB):,0.157
Skew:,-0.11,Prob(JB):,0.924
Kurtosis:,2.786,Cond. No.,6.35


## S3: Total items prescribed per 1000 registered patients for Dosulepin. 

In [15]:
# filter data for dosulepin measure:
dosulepin = rct_agg_6m.copy()
dosulepin = dosulepin.loc[dosulepin.measure=="lpdosulepin"]

### calculate aggregated measure values (items per 1000 patients)
dosulepin["baseline_calc_value"] = dosulepin.items_baseline / dosulepin.denominator_baseline
dosulepin["follow_up_calc_value"] = dosulepin["items_follow-up"] / dosulepin["denominator_follow-up"]
dosulepin.head(2)

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
1,00J,I,lpdosulepin,1912.89594,6333.01366,1266,795,259.248,261.553833,4.883355,3.039527
19,00Y,con,lpdosulepin,2537.48637,11897.58863,1476,1076,255.777833,258.976833,5.770633,4.154812


In [16]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Dosulepin.
data = dosulepin.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation.map({'con':0, 'I':1})

# summary data:
out = data.groupby("intervention").agg({"joint_id":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})
out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]
display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)

Unnamed: 0_level_0,joint_id,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,2.774081,7.113155,2.427627,6.058305,-1.05485
1,20,3.38677,6.465136,2.997525,5.475997,-0.989139


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.958
Model:,OLS,Adj. R-squared:,0.956
Method:,Least Squares,F-statistic:,424.1
Date:,"Mon, 23 Nov 2020",Prob (F-statistic):,3.09e-26
Time:,11:02:48,Log-Likelihood:,-32.605
No. Observations:,40,AIC:,71.21
Df Residuals:,37,BIC:,76.28
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0750,0.247,-0.304,0.763,-0.576,0.426
"data[""baseline_calc_value""]",0.8622,0.030,28.945,0.000,0.802,0.923
intervention,-0.0236,0.181,-0.130,0.897,-0.390,0.343

0,1,2,3
Omnibus:,8.691,Durbin-Watson:,1.902
Prob(Omnibus):,0.013,Jarque-Bera (JB):,7.562
Skew:,-0.923,Prob(JB):,0.0228
Kurtosis:,4.063,Cond. No.,22.4
