# Secondary Outcomes
**S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.**

**S2. Total items prescribed per 1000 registered patients for Co-proxamol.**
 
**S3. Total items prescribed per 1000 registered patients for Dosulepin.**

In [1]:
import os
import requests
import pandas as pd
import numpy as np

from analysis import compute_regression

GBQ_PROJECT_ID = '620265099307'

# Set dates of baseline and follow-up periods
baseline_start = '2018-04-01'       # baseline start
mid_start = '2018-10-01'            # month after end of baseline period
followup_start = '2019-04-01'       # follow-up start
post_followup_start = '2019-10-01'  # month after end of follow-up period


In [2]:
rawdata = pd.read_csv(os.path.join('..','data','all_measure_data.csv'))
rawdata["month"] = pd.to_datetime(rawdata.month)
rawdata.head()

Unnamed: 0.1,Unnamed: 0,month,pct_id,items,cost,denominator,measure
0,0,2018-10-01 00:00:00+00:00,00C,4,392.31788,108.634,lpcoprox
1,1,2018-11-01 00:00:00+00:00,00C,2,127.21812,108.594,lpcoprox
2,2,2018-12-01 00:00:00+00:00,00C,6,551.13563,108.595,lpcoprox
3,3,2018-10-01 00:00:00+00:00,00D,1,39.54454,292.679,lpcoprox
4,4,2018-11-01 00:00:00+00:00,00D,3,345.62081,292.747,lpcoprox


## S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined. 

In [3]:
data = rawdata.copy()

### select data only for the baseline and follow-up periods
import datetime

conditions = [
    (data['month'] >= post_followup_start),
    (data['month'] >= followup_start),
    (data['month'] >= mid_start),
    (data['month'] >= baseline_start),
    (data['month'] < baseline_start)]

choices = ['after', 'follow-up', 'mid', 'baseline','before']
data['period'] = np.select(conditions, choices, default='0')

# take columns of interest from df
df2 = data[["measure","pct_id","period", "month", "cost","items","denominator"]]
df2 = df2.loc[(df2['period']== "baseline") | (df2['period']== "follow-up")].set_index(["pct_id","period", "month"])
df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,measure,cost,items,denominator
pct_id,period,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00C,baseline,2018-04-01 00:00:00+00:00,lpcoprox,587.41623,8,108.273
00C,baseline,2018-05-01 00:00:00+00:00,lpcoprox,55.69691,1,108.299
00C,baseline,2018-06-01 00:00:00+00:00,lpcoprox,255.93032,3,108.356
00D,baseline,2018-04-01 00:00:00+00:00,lpcoprox,552.77494,4,292.023
00D,baseline,2018-05-01 00:00:00+00:00,lpcoprox,0.0,0,292.107


In [4]:
### sum numerator and average population denominators for each CCG for each period
agg_6m = df2.groupby(["measure","pct_id","period"]).agg({"cost":sum,"items":sum,"denominator":"mean"})
agg_6m.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cost,items,denominator
measure,pct_id,period,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
lpcoprox,00C,baseline,1682.94526,23,108.418333
lpcoprox,00C,follow-up,2128.24541,30,108.9115
lpcoprox,00D,baseline,561.78717,5,292.331833
lpcoprox,00D,follow-up,0.0,0,293.096833
lpcoprox,00J,baseline,18105.34852,111,259.248


In [5]:
### import **allocated** CCGs
ccgs = pd.read_csv(os.path.join('..','data','randomisation_group.csv'))
# import joint team information
team = pd.read_csv(os.path.join('..','data','joint_teams.csv'))

ccgs = ccgs.merge(team,on="joint_team", how="left")
#fill black ccg_ids from joint_id column
ccgs["pct_id"] = ccgs["ccg_id"].combine_first(ccgs["joint_id"])
ccgs = ccgs[["joint_id","allocation","pct_id"]]

df2b = agg_6m.reset_index()
df2b = ccgs.merge(df2b, on="pct_id",how="left")
df2b.head()


Unnamed: 0,joint_id,allocation,pct_id,measure,period,cost,items,denominator
0,01X,con,01X,lpcoprox,baseline,7845.14647,39,197.704333
1,01X,con,01X,lpcoprox,follow-up,1993.5624,14,198.111167
2,01X,con,01X,lpdosulepin,baseline,4167.29525,2051,197.704333
3,01X,con,01X,lpdosulepin,follow-up,20835.211,1686,198.111167
4,01X,con,01X,lpdoxazosin,baseline,4673.51929,492,197.704333


In [6]:
# group up to Joint team groups 
# note: SUM both numerators and population denominator across geographies
df2c = df2b.groupby(["joint_id","allocation","measure","period"]).sum()
df2c = df2c.unstack().reset_index()
df2c.columns = df2c.columns.map('_'.join)

### calculate aggregated measure values (cost only)
df2c["baseline_calc_value"] = df2c.cost_baseline / df2c.denominator_baseline
df2c["follow_up_calc_value"] = df2c["cost_follow-up"] / df2c["denominator_follow-up"]

df2c.head()


Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,lpcoprox,18105.34852,10348.92981,111,47,259.248,261.553833,69.837949,39.567112
1,00J,I,lpdosulepin,1912.89594,6333.01366,1266,795,259.248,261.553833,7.378633,24.213041
2,00J,I,lpdoxazosin,6478.96813,6131.91956,703,710,259.248,261.553833,24.991391,23.444197
3,00J,I,lpfentanylir,23649.01996,29199.73282,110,83,259.248,261.553833,91.22161,111.639476
4,00J,I,lpglucosamine,6726.00966,2875.0041,299,213,259.248,261.553833,25.944307,10.992017


In [7]:
# find top 3 measures per CCG by cost
df3 = df2c.sort_values(by=["joint_id_","baseline_calc_value"], ascending=False)
df3["measure_rank"] = df3.groupby("joint_id_")["baseline_calc_value"].rank(ascending=False)
df4 = df3.loc[df3.measure_rank <=3]
df4.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,measure_rank
705,99K,con,lpfentanylir,52079.50137,24239.10571,119,70,170.713667,171.033667,305.069315,141.721254,1.0
710,99K,con,lpliothyronine,50050.901,33104.70501,155,110,170.713667,171.033667,293.186257,193.556658,2.0
719,99K,con,lptrimipramine,48736.87774,52512.58144,164,146,170.713667,171.033667,285.489022,307.030671,3.0
692,99J,con,lpliothyronine,209433.01209,136225.25598,794,638,493.998,500.1035,423.955182,272.394126,1.0
701,99J,con,lptrimipramine,168361.17675,160905.19318,445,375,493.998,500.1035,340.813478,321.743785,2.0


In [8]:
df5 = df4.copy()
df5 = df5.groupby(["joint_id_","allocation_"]).agg({"cost_baseline":"sum","cost_follow-up":"sum","denominator_baseline":"mean","denominator_follow-up":"mean"})

### calculate aggregated measure values for combined cost for the top 3 measures
df5["baseline_calc_value"] = df5.cost_baseline / df5.denominator_baseline
df5["follow_up_calc_value"] = df5["cost_follow-up"] / df5["denominator_follow-up"]
df5.head() 


Unnamed: 0_level_0,Unnamed: 1_level_0,cost_baseline,cost_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
joint_id_,allocation_,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00J,I,179617.4137,161140.38067,259.248,261.553833,692.840113,616.088775
00Y,con,140982.1327,65189.09093,255.777833,258.976833,551.1898,251.717847
01F,con,163150.55569,105996.32564,131.572667,132.790333,1240.003413,798.223206
01J,con,203730.64187,158096.42026,165.419167,166.806,1231.602395,947.786172
01V,I,300578.38709,247990.96216,280.439333,281.692833,1071.812515,880.359501


In [9]:
# secondary outcome: Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.

import statsmodels.formula.api as smf
data = df5.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

out = data.groupby("intervention").agg({"joint_id_":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})

out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]

display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)


Unnamed: 0_level_0,joint_id_,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,222.271509,877.234799,207.484766,679.54347,-197.691329
1,20,156.20841,746.320181,147.509083,576.783031,-169.53715


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.726
Model:,OLS,Adj. R-squared:,0.711
Method:,Least Squares,F-statistic:,49.05
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,3.93e-11
Time:,11:44:01,Log-Likelihood:,-239.2
No. Observations:,40,AIC:,484.4
Df Residuals:,37,BIC:,489.5
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-9.5210,76.975,-0.124,0.902,-165.488,146.446
"data[""baseline_calc_value""]",0.7855,0.084,9.351,0.000,0.615,0.956
intervention,0.0725,33.324,0.002,0.998,-67.449,67.594

0,1,2,3
Omnibus:,3.168,Durbin-Watson:,2.039
Prob(Omnibus):,0.205,Jarque-Bera (JB):,2.948
Skew:,-0.634,Prob(JB):,0.229
Kurtosis:,2.599,Cond. No.,4200.0


## S2: Total items prescribed per 1000 registered patients for Co-proxamol. 

In [10]:
# filter data for coproxamol measure:
df6 = df2c.copy()
df6 = df6.loc[df6.measure_=="lpcoprox"]

### calculate aggregated measure values (items per 1000 patients)
df6["baseline_calc_value"] = df6.items_baseline / df6.denominator_baseline
df6["follow_up_calc_value"] = df6["items_follow-up"] / df6["denominator_follow-up"]
df6.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,lpcoprox,18105.34852,10348.92981,111,47,259.248,261.553833,0.428161,0.179695
18,00Y,con,lpcoprox,7201.93514,6073.37376,46,37,255.777833,258.976833,0.179844,0.14287
36,01F,con,lpcoprox,2144.7135,2289.76199,12,9,131.572667,132.790333,0.091204,0.067776
54,01J,con,lpcoprox,2932.11798,1812.5043,24,18,165.419167,166.806,0.145086,0.10791
72,01V,I,lpcoprox,4636.03187,2604.61799,42,19,280.439333,281.692833,0.149765,0.067449


In [11]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Co-proxamol.
import statsmodels.formula.api as smf
data = df6.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})


out = data.groupby("intervention").agg({"joint_id_":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})

out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]

display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)

Unnamed: 0_level_0,joint_id_,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,0.17604,0.257955,0.133597,0.184193,-0.073762
1,20,0.218809,0.299786,0.197092,0.191103,-0.108683


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.85
Model:,OLS,Adj. R-squared:,0.842
Method:,Least Squares,F-statistic:,105.2
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,5.38e-16
Time:,11:44:01,Log-Likelihood:,53.533
No. Observations:,40,AIC:,-101.1
Df Residuals:,37,BIC:,-96.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0175,0.020,-0.863,0.394,-0.059,0.024
"data[""baseline_calc_value""]",0.7819,0.054,14.504,0.000,0.673,0.891
intervention,-0.0258,0.021,-1.229,0.227,-0.068,0.017

0,1,2,3
Omnibus:,0.077,Durbin-Watson:,1.865
Prob(Omnibus):,0.962,Jarque-Bera (JB):,0.263
Skew:,-0.069,Prob(JB):,0.877
Kurtosis:,2.627,Cond. No.,6.3


## S3: Total items prescribed per 1000 registered patients for Dosulepin. 

In [12]:
# filter data for dosulepin measure:
df7 = df2c.copy()
df7 = df7.loc[df7.measure_=="lpdosulepin"]

### calculate aggregated measure values (items per 1000 patients)
df7["baseline_calc_value"] = df7.items_baseline / df7.denominator_baseline
df7["follow_up_calc_value"] = df7["items_follow-up"] / df7["denominator_follow-up"]
df7.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
1,00J,I,lpdosulepin,1912.89594,6333.01366,1266,795,259.248,261.553833,4.883355,3.039527
19,00Y,con,lpdosulepin,2537.48637,11897.58863,1476,1076,255.777833,258.976833,5.770633,4.154812
37,01F,con,lpdosulepin,2231.53336,10893.21717,1244,930,131.572667,132.790333,9.454851,7.003522
55,01J,con,lpdosulepin,3464.89964,20279.28536,2062,1774,165.419167,166.806,12.465303,10.635109
73,01V,I,lpdosulepin,3022.75316,17020.62873,1535,1453,280.439333,281.692833,5.473555,5.158101


In [13]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Dosulepin.
import statsmodels.formula.api as smf
data = df7.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

out = data.groupby("intervention").agg({"joint_id_":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})

out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]

display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)

Unnamed: 0_level_0,joint_id_,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,2.589369,6.992991,2.220098,5.912151,-1.08084
1,20,3.548865,6.5853,3.175514,5.622151,-0.963149


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.958
Model:,OLS,Adj. R-squared:,0.956
Method:,Least Squares,F-statistic:,425.4
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,2.94e-26
Time:,11:44:01,Log-Likelihood:,-32.551
No. Observations:,40,AIC:,71.1
Df Residuals:,37,BIC:,76.17
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.1252,0.243,-0.515,0.609,-0.618,0.367
"data[""baseline_calc_value""]",0.8634,0.030,29.123,0.000,0.803,0.923
intervention,0.0620,0.180,0.344,0.732,-0.303,0.427

0,1,2,3
Omnibus:,10.205,Durbin-Watson:,1.902
Prob(Omnibus):,0.006,Jarque-Bera (JB):,9.469
Skew:,-1.009,Prob(JB):,0.00879
Kurtosis:,4.268,Cond. No.,22.0


# Additional analyses (not pre-specified)

**1. Change in top 3 measures per CCG by cost - *excluding herbal* which was not included at the time of the interventions.**

In [14]:
# find top 3 measures per CCG by cost - excluding herbal which was not included at the time of the interventions.
df3 = df2c.loc[df2c["measure_"] != "lpherbal"]
df3 = df3.sort_values(by=["joint_id_","baseline_calc_value"], ascending=False)
df3["measure_rank"] = df3.groupby("joint_id_")["baseline_calc_value"].rank(ascending=False)
df4 = df3.loc[df3.measure_rank <=3]
df4.head()

Unnamed: 0,joint_id_,allocation_,measure_,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,measure_rank
705,99K,con,lpfentanylir,52079.50137,24239.10571,119,70,170.713667,171.033667,305.069315,141.721254,1.0
710,99K,con,lpliothyronine,50050.901,33104.70501,155,110,170.713667,171.033667,293.186257,193.556658,2.0
719,99K,con,lptrimipramine,48736.87774,52512.58144,164,146,170.713667,171.033667,285.489022,307.030671,3.0
692,99J,con,lpliothyronine,209433.01209,136225.25598,794,638,493.998,500.1035,423.955182,272.394126,1.0
701,99J,con,lptrimipramine,168361.17675,160905.19318,445,375,493.998,500.1035,340.813478,321.743785,2.0


In [15]:
df5 = df4.copy()
df5 = df5.groupby(["joint_id_","allocation_"]).agg({"cost_baseline":"sum","cost_follow-up":"sum","denominator_baseline":"mean","denominator_follow-up":"mean"})

### calculate aggregated measure values for combined cost for the top 3 measures
df5["baseline_calc_value"] = df5.cost_baseline / df5.denominator_baseline
df5["follow_up_calc_value"] = df5["cost_follow-up"] / df5["denominator_follow-up"]
df5.head() 


Unnamed: 0_level_0,Unnamed: 1_level_0,cost_baseline,cost_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
joint_id_,allocation_,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00J,I,179617.4137,161140.38067,259.248,261.553833,692.840113,616.088775
00Y,con,140982.1327,65189.09093,255.777833,258.976833,551.1898,251.717847
01F,con,163150.55569,105996.32564,131.572667,132.790333,1240.003413,798.223206
01J,con,203730.64187,158096.42026,165.419167,166.806,1231.602395,947.786172
01V,I,300578.38709,247990.96216,280.439333,281.692833,1071.812515,880.359501


In [16]:
# secondary outcome: Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.

import statsmodels.formula.api as smf
data = df5.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

out = data.groupby("intervention").agg({"joint_id_":"nunique",
                                 "baseline_calc_value":{"mean","std"},
                                 "follow_up_calc_value":{"mean","std"}})

out["change"] = out[("follow_up_calc_value","mean")] - out[("baseline_calc_value","mean")]

display(out)

formula = ('data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention')
compute_regression(data, formula=formula)

Unnamed: 0_level_0,joint_id_,baseline_calc_value,baseline_calc_value,follow_up_calc_value,follow_up_calc_value,change
Unnamed: 0_level_1,nunique,std,mean,std,mean,Unnamed: 6_level_1
intervention,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,20,222.271509,877.234799,207.484766,679.54347,-197.691329
1,20,156.20841,746.320181,147.509083,576.783031,-169.53715


0,1,2,3
Dep. Variable:,"data[""follow_up_calc_value""]",R-squared:,0.726
Model:,OLS,Adj. R-squared:,0.711
Method:,Least Squares,F-statistic:,49.05
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,3.93e-11
Time:,11:44:01,Log-Likelihood:,-239.2
No. Observations:,40,AIC:,484.4
Df Residuals:,37,BIC:,489.5
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-9.5210,76.975,-0.124,0.902,-165.488,146.446
"data[""baseline_calc_value""]",0.7855,0.084,9.351,0.000,0.615,0.956
intervention,0.0725,33.324,0.002,0.998,-67.449,67.594

0,1,2,3
Omnibus:,3.168,Durbin-Watson:,2.039
Prob(Omnibus):,0.205,Jarque-Bera (JB):,2.948
Skew:,-0.634,Prob(JB):,0.229
Kurtosis:,2.599,Cond. No.,4200.0
