# Primary outcomes
**P1. Cost per 1,000 patients for all 18 pre-specified “low-priority” treatments combined.**

**P2. Total items per 1000 across all 18 low priority treatments.** 


In [1]:
# Set dates of baseline and follow-up periods
d4 = '2018-07-01' # month after end of follow-up period
d3 = '2018-01-01' # follow-up start
d2 = '2017-07-01' # month after end of baseline period
d1 = '2017-01-01' # baseline start

# Import dataset from BigQuery
import pandas as pd
import numpy as np
GBQ_PROJECT_ID = '620265099307'

# costs (totals, not divided into individual measures) - for P1
q = '''SELECT * FROM ebmdatalab.measures.ccg_data_lpzomnibus
WHERE EXTRACT (YEAR from month)  >= 2017
'''

# items (summed across all lp measures) - for P2:
q3 = '''SELECT pct_id, month, sum(numerator) AS items FROM
  `ebmdatalab.alex.items_*`
  WHERE _TABLE_SUFFIX <> 'all_low_priority'
  AND _TABLE_SUFFIX <> 'zomnibus'
  AND month >= '2017-01-01'
  GROUP BY pct_id, month'''

df1 = pd.read_gbq(q, GBQ_PROJECT_ID, dialect='standard',verbose=False)
df1["month"] = pd.to_datetime(df1.month)

lpitems = pd.read_gbq(q3, GBQ_PROJECT_ID, dialect='standard',verbose=False)
lpitems["month"] = pd.to_datetime(lpitems.month)

df1.head() # this gives the first few rows of data

Unnamed: 0,pct_id,month,numerator,denominator,calc_value,percentile
0,00C,2017-04-01,20770.54861,107.776,192.719609,0.731959
1,00C,2017-02-01,18554.53583,107.615,172.415888,0.489691
2,00C,2018-04-01,16142.79865,108.273,149.093483,0.639175
3,00C,2018-03-01,17915.92145,108.235,165.527985,0.639175
4,00C,2017-10-01,20805.13246,108.136,192.397837,0.721649


In [2]:
# merge items and costs data
df1a = df1.merge(lpitems, on=["pct_id","month"],how="outer").sort_values(by=["pct_id","month"])
df1a.head()

Unnamed: 0,pct_id,month,numerator,denominator,calc_value,percentile,items
18,00C,2017-01-01,20288.48891,107.615,188.528448,0.592784,928
1,00C,2017-02-01,18554.53583,107.615,172.415888,0.489691,797
17,00C,2017-03-01,22876.98454,107.615,212.581745,0.680412,958
0,00C,2017-04-01,20770.54861,107.776,192.719609,0.731959,867
5,00C,2017-05-01,21191.8976,107.886,196.428615,0.634021,945


In [3]:
### select data only for the baseline and follow-up periods
import datetime

conditions = [
    (df1a['month']  >= d4), # after follow-up period
    (df1a['month']  >= d3), # follow-up
    (df1a['month']  >= d2), # mid
    (df1a['month']  >= d1), # baseline
    (df1a['month']  < d1)] # before

choices = ['after', 'follow-up', 'mid', 'baseline','before']
df1a['period'] = np.select(conditions, choices, default='0')

# take columns of interest from df
df2 = df1a[["pct_id","period", "month", "numerator","denominator","items"]]
df2 = df2.loc[(df2['period']== "baseline") | (df2['period']== "follow-up")].set_index(["pct_id","period", "month"])
df2.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,numerator,denominator,items
pct_id,period,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
00C,baseline,2017-01-01,20288.48891,107.615,928
00C,baseline,2017-02-01,18554.53583,107.615,797
00C,baseline,2017-03-01,22876.98454,107.615,958
00C,baseline,2017-04-01,20770.54861,107.776,867
00C,baseline,2017-05-01,21191.8976,107.886,945
00C,baseline,2017-06-01,20629.7883,107.888,892
00C,follow-up,2018-01-01,16798.32888,108.219,667
00C,follow-up,2018-02-01,17319.07538,108.293,621
00C,follow-up,2018-03-01,17915.92145,108.235,710
00C,follow-up,2018-04-01,16142.79865,108.273,682


In [4]:
### sum numerators for each CCG for each period
agg_6m = df2.groupby(["pct_id","period"]).agg({"numerator":sum,"items":sum,"denominator":"mean"})
agg_6m.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,numerator,items,denominator
pct_id,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
00C,baseline,124312.24379,5387,107.7325
00C,follow-up,102119.89023,4008,108.279167
00D,baseline,315054.76387,15292,290.8515
00D,follow-up,237929.15765,10113,292.0395
00J,baseline,355985.80688,13461,256.521833


In [5]:
### import **allocated** CCGs
ccgs = pd.read_csv('randomisation_group.csv')
# import joint team information
team = pd.read_csv('joint_teams.csv')

ccgs = ccgs.merge(team,on="joint_team", how="left")
#fill black ccg_ids from joint_id column
ccgs["pct_id"] = ccgs["ccg_id"].combine_first(ccgs["joint_id"])
ccgs = ccgs[["joint_id","allocation","pct_id"]]

# merge ccgs with data
df2b = agg_6m.reset_index()
df2b = ccgs.merge(df2b, on="pct_id",how="left")
df2b.head()


Unnamed: 0,joint_id,allocation,pct_id,period,numerator,items,denominator
0,01X,con,01X,baseline,402230.98221,14600,196.474
1,01X,con,01X,follow-up,347631.36917,12006,197.501167
2,99K,con,99K,baseline,397320.37086,8691,169.696667
3,99K,con,99K,follow-up,289294.82914,7659,170.398
4,01J,con,01J,baseline,287300.90109,14283,163.463333


In [6]:
# group up to Joint team groups 
# note: SUM both numerator and population denominator across geographies
df2c = df2b.groupby(["joint_id","allocation","period"]).sum()
df2c = df2c.unstack().reset_index()
df2c.columns = df2c.columns.map('_'.join)
df2c.head()

Unnamed: 0,joint_id_,allocation_,numerator_baseline,numerator_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up
0,00J,I,355985.80688,300047.52211,13461,9824,256.521833,259.612833
1,00Y,con,321697.29913,263617.28942,17915,15048,252.313167,255.081167
2,01F,con,222717.46981,204807.24274,9052,7963,130.523333,131.222333
3,01J,con,287300.90109,264583.05225,14283,12886,163.463333,164.991
4,01V,I,473560.42907,441433.56403,20018,18694,278.8905,280.012


In [7]:
# calculate aggregated measure values
df2c["baseline_calc_value"] = df2c.numerator_baseline / df2c.denominator_baseline
df2c["follow_up_calc_value"] = df2c["numerator_follow-up"] / df2c["denominator_follow-up"]

df2c["baseline_items_thou"] = df2c.items_baseline / df2c.denominator_baseline
df2c["follow_up_items_thou"] = df2c["items_follow-up"] / df2c["denominator_follow-up"]

df2c.head()

Unnamed: 0,joint_id_,allocation_,numerator_baseline,numerator_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,baseline_items_thou,follow_up_items_thou
0,00J,I,355985.80688,300047.52211,13461,9824,256.521833,259.612833,1387.740771,1155.749961,52.475065,37.840964
1,00Y,con,321697.29913,263617.28942,17915,15048,252.313167,255.081167,1274.992119,1033.464339,71.003033,58.992987
2,01F,con,222717.46981,204807.24274,9052,7963,130.523333,131.222333,1706.342185,1560.765135,69.351585,60.683268
3,01J,con,287300.90109,264583.05225,14283,12886,163.463333,164.991,1757.586214,1603.62112,87.377394,78.10123
4,01V,I,473560.42907,441433.56403,20018,18694,278.8905,280.012,1698.015634,1576.480879,71.777275,66.761425


In [8]:
# plot time series chart for intervention versus control

'''# merge MONTHLY data with practice allocations 
dfp = df1.loc[df1.month_no>0]
dfp = dfp.loc[~pd.isnull(dfp.calc_value)]
dfp = prac.merge(dfp, how='left', on='practice_id')#.set_index('allocation')
dfp = dfp[['practice_id','month_no','allocation','calc_value']]

dfp2 = dfp.groupby(['month_no','allocation']).count()
#dfp = pd.DataFrame(dfp.to_records())#.set_index('month_no')
dfp2

import seaborn as sns#; sns.set(color_codes=True)
import matplotlib.pyplot as plt
sns.set(style="darkgrid")

g = sns.tsplot(data=dfp, time="month_no",  value="calc_value", unit="practice_id",condition="allocation")
plt.ylim((0, 0.17))
plt.show()'''


'# merge MONTHLY data with practice allocations \ndfp = df1.loc[df1.month_no>0]\ndfp = dfp.loc[~pd.isnull(dfp.calc_value)]\ndfp = prac.merge(dfp, how=\'left\', on=\'practice_id\')#.set_index(\'allocation\')\ndfp = dfp[[\'practice_id\',\'month_no\',\'allocation\',\'calc_value\']]\n\ndfp2 = dfp.groupby([\'month_no\',\'allocation\']).count()\n#dfp = pd.DataFrame(dfp.to_records())#.set_index(\'month_no\')\ndfp2\n\nimport seaborn as sns#; sns.set(color_codes=True)\nimport matplotlib.pyplot as plt\nsns.set(style="darkgrid")\n\ng = sns.tsplot(data=dfp, time="month_no",  value="calc_value", unit="practice_id",condition="allocation")\nplt.ylim((0, 0.17))\nplt.show()'

In [9]:
### Primary Outcome ########################
# Cost per 1,000 patients for all 18 pre-specified “low-priority” treatments combined, 
# between intervention and control groups, assessed by applying a multivariable linear regression model.

import statsmodels.formula.api as smf
data = df2c
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

lm = smf.ols(formula='data["follow_up_calc_value"] ~ data["baseline_calc_value"] +intervention', data=data).fit()

#output regression coefficients and p-values:
params = pd.DataFrame(lm.params).reset_index().rename(columns={0: 'coefficient','index': 'factor'})
pvals = pd.DataFrame(lm.pvalues[[1,2]]).reset_index().rename(columns={0: 'p value','index': 'factor'})
params.merge(pvals, how='left',on='factor').set_index('factor')


Unnamed: 0_level_0,coefficient,p value
factor,Unnamed: 1_level_1,Unnamed: 2_level_1
Intercept,228.020097,
"data[""baseline_calc_value""]",0.689449,2.317439e-11
intervention,-11.040542,0.7664716


In [10]:
### Primary Outcome P2 ########################
# ITEMS per 1,000 patients for all 18 pre-specified “low-priority” treatments combined, 
# between intervention and control groups, assessed by applying a multivariable linear regression model.

import statsmodels.formula.api as smf
data = df2c
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation_.map({'con':0, 'I':1})

lm = smf.ols(formula='data["follow_up_items_thou"] ~ data["baseline_items_thou"] +intervention', data=data).fit()

#output regression coefficients and p-values:
params = pd.DataFrame(lm.params).reset_index().rename(columns={0: 'coefficient','index': 'factor'})
pvals = pd.DataFrame(lm.pvalues[[1,2]]).reset_index().rename(columns={0: 'p value','index': 'factor'})
params.merge(pvals, how='left',on='factor').set_index('factor')


Unnamed: 0_level_0,coefficient,p value
factor,Unnamed: 1_level_1,Unnamed: 2_level_1
Intercept,-4.303765,
"data[""baseline_items_thou""]",0.902239,5.122012e-18
intervention,-0.959128,0.6034362


# remaining: 
## add confidence intervals