# Secondary Outcomes
**S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.**

**S2. Total items prescribed per 1000 registered patients for Co-proxamol.**

**S3. Total items prescribed per 1000 registered patients for Dosulepin.**

In [43]:
import pandas as pd
import numpy as np
from lp_measure_conditions import tables

from analysis import compute_regression

import logging
logger = logging.getLogger('pandas_gbq')
logger.setLevel(logging.ERROR)

GBQ_PROJECT_ID = '620265099307'

# Set dates of baseline and follow-up periods
baseline_start = '2017-01-01'  # baseline start
mid_start = '2017-07-01'  # month after end of baseline period
followup_start = '2018-01-01'  # follow-up start
post_followup_start = '2018-07-01'  # month after end of follow-up period

## Prepare data

In [44]:
# Load data which should have been generated already by running the 
# primary outcome notebook
# (Specifically, per-measure cost/items numerators, and population denominators)
data = pd.read_csv("../data/all_measure_data.csv").drop(['Unnamed: 0'], axis=1)

data.head(2)

Unnamed: 0,month,pct_id,items,cost,denominator,measure
0,2017-10-01,RXA,0,0.0,4.35,lpcoprox
1,2017-10-01,06M,0,0.0,239.455,lpcoprox


In [45]:
### select data only for the baseline and follow-up periods

conditions = [
    (data['month'] >= post_followup_start),
    (data['month'] >= followup_start),
    (data['month'] >= mid_start),
    (data['month'] >= baseline_start),
    (data['month'] < baseline_start)]

choices = ['after', 'follow-up', 'mid', 'baseline', 'before']
data['period'] = np.select(conditions, choices, default='0')
data.head(2)

Unnamed: 0,month,pct_id,items,cost,denominator,measure,period
0,2017-10-01,RXA,0,0.0,4.35,lpcoprox,mid
1,2017-10-01,06M,0,0.0,239.455,lpcoprox,mid


In [46]:
# take columns of interest from df
data = data[[
    "measure",
    "pct_id",
    "period",
    "month",
    "cost",
    "items",
    "denominator"]]
data = data.loc[(data['period']== "baseline") | (data['period']== "follow-up")].set_index(
    ["pct_id", "period", "month"])
data.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,measure,cost,items,denominator
pct_id,period,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
05C,follow-up,2018-06-01,lpcoprox,0.0,0,319.624
03X,follow-up,2018-06-01,lpcoprox,0.0,0,97.518


In [47]:
### sum numerator and average population denominators for each CCG for each period
agg_6m = data\
         .groupby(["measure", "pct_id", "period"])\
         .agg({"cost": "sum", "items": "sum", "denominator": "mean"})
### CCGs that have been allocated to the RCT 
rct_ccgs = pd.read_csv('../data/randomisation_group.csv')

# Joint Team information (which CCGs work together in Joint Teams)
team = pd.read_csv('../data/joint_teams.csv')

# Map CCGs to Joint Teams
rct_ccgs = rct_ccgs.merge(team, on="joint_team", how="left")

# Fill blank ccg_ids from joint_id column, so even CCGs not in Joint Teams 
# have a value for joint_id
rct_ccgs["pct_id"] = rct_ccgs["ccg_id"].combine_first(rct_ccgs["joint_id"])
rct_ccgs = rct_ccgs[["joint_id", "allocation", "pct_id"]]

# Combine CCG/Joint Team info with measure data
rct_agg_6m = rct_ccgs.merge(agg_6m.reset_index(), on="pct_id", how="left")
rct_agg_6m.head(3)




agg_6m.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cost,items,denominator
measure,pct_id,period,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
lpcoprox,00C,baseline,2948.99296,31,107.7325
lpcoprox,00C,follow-up,1874.83241,24,108.279167


In [48]:
### CCGs that have been allocated to the RCT 
rct_ccgs = pd.read_csv('../data/randomisation_group.csv')

# Joint Team information (which CCGs work together in Joint Teams)
team = pd.read_csv('../data/joint_teams.csv')

# Map CCGs to Joint Teams
rct_ccgs = rct_ccgs.merge(team, on="joint_team", how="left")

# Fill blank ccg_ids from joint_id column, so even CCGs not in Joint Teams 
# have a value for joint_id
rct_ccgs["pct_id"] = rct_ccgs["ccg_id"].combine_first(rct_ccgs["joint_id"])
rct_ccgs = rct_ccgs[["joint_id", "allocation", "pct_id"]]

# Combine CCG/Joint Team info with measure data
rct_agg_6m = rct_ccgs.merge(agg_6m.reset_index(), on="pct_id", how="left")
rct_agg_6m.head(3)


Unnamed: 0,joint_id,allocation,pct_id,measure,period,cost,items,denominator
0,01X,con,01X,lpcoprox,baseline,9100.74863,50,196.474
1,01X,con,01X,lpcoprox,follow-up,9740.93645,47,197.501167
2,01X,con,01X,lpdosulepin,baseline,6375.88864,2447,196.474


In [49]:
# aggregate up to Joint team groups
# XXX: SUM both numerators and population denominator across geographies - is this right?
rct_agg_6m = rct_agg_6m\
             .groupby(["joint_id", "allocation", "measure", "period"])\
             .sum()\
             .unstack()\
             .reset_index()
# Rename columns which have awkward names resulting from the unstack operation
rct_agg_6m.columns = rct_agg_6m.columns.map('_'.join).map(lambda x: x.strip("_"))
rct_agg_6m.head(2)


Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up
0,00J,I,lpcoprox,26605.75613,20065.45311,163,107,256.521833,259.612833
1,00J,I,lpdosulepin,3020.08789,1937.38454,1575,1274,256.521833,259.612833


In [34]:
### calculate aggregated measure values (cost only)
rct_agg_6m["baseline_calc_value"] = rct_agg_6m.cost_baseline / rct_agg_6m.denominator_baseline
rct_agg_6m["follow_up_calc_value"] = rct_agg_6m["cost_follow-up"] / rct_agg_6m["denominator_follow-up"]
rct_agg_6m.head(2)

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,lpcoprox,26605.75613,20065.45311,163,107,256.521833,259.612833,103.717316,77.289912
1,00J,I,lpdosulepin,3020.08789,1937.38454,1575,1274,256.521833,259.612833,11.77322,7.462592


## S1. Cost per 1,000 patients for top 3 pre-specified “low-priority” treatments combined.

In [35]:
# find top 3 measures per joint team by cost
top_3 = rct_agg_6m.sort_values(
    by=["joint_id", "baseline_calc_value"], ascending=False)
top_3["measure_rank"] = top_3\
                      .groupby("joint_id")["baseline_calc_value"]\
                      .rank(ascending=False)
top_3 = top_3.loc[top_3.measure_rank <= 3]
top_3.head()

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value,measure_rank
670,99K,con,lpliothyronine,129145.01841,57558.58123,380,172,169.696667,170.398,761.034503,337.789066,1.0
679,99K,con,lptrimipramine,69674.06189,51635.34145,188,163,169.696667,170.398,410.580026,303.027861,2.0
669,99K,con,lplidocaine,41409.52854,35373.67281,662,524,169.696667,170.398,244.020872,207.594413,3.0
653,99J,con,lpliothyronine,278653.00228,221120.63222,1081,829,485.592,492.281333,573.841831,449.175334,1.0
662,99J,con,lptrimipramine,204273.63041,172486.96408,531,439,485.592,492.281333,420.669266,350.382906,2.0


In [52]:
top_3 = top_3\
      .groupby(["joint_id", "allocation"])\
      .agg({"cost_baseline": "sum",
            "cost_follow-up": "sum",
            "denominator_baseline": "mean",
            "denominator_follow-up": "mean"})

### calculate aggregated measure values for combined cost for the top 3 measures
top_3["baseline_calc_value"] = top_3.cost_baseline / top_3.denominator_baseline
top_3["follow_up_calc_value"] = top_3["cost_follow-up"] / top_3["denominator_follow-up"]
top_3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cost_baseline,cost_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
joint_id,allocation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00J,I,222188.91809,191214.14573,256.521833,259.612833,866.159871,736.535799
00Y,con,164808.49656,147379.1074,252.313167,255.081167,653.190235,577.773378
01F,con,161020.77953,160500.90134,130.523333,131.222333,1233.655128,1223.121837
01J,con,209530.46147,204609.46985,163.463333,164.991,1281.819336,1240.125036
01V,I,298253.13031,304254.86656,278.8905,280.012,1069.427357,1086.577956


In [53]:
data = top_3.copy().reset_index()
data['intervention'] = data.allocation.map({'con': 0, 'I': 1})
formula = ('data["follow_up_calc_value"] '
           '~ data["baseline_calc_value"] + intervention')
compute_regression(data, formula=formula)


Unnamed: 0_level_0,coefficient,p value,conf_int_low,conf_int_high
factor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,69.897256,,-133.26129,273.055802
"data[""baseline_calc_value""]",0.774676,2.456348e-10,0.59188,0.957472
intervention,11.73469,0.7811877,-73.242297,96.711677


## S2: Total items prescribed per 1000 registered patients for Co-proxamol.

In [75]:
# filter data for coproxamol measure:
coprox = rct_agg_6m.loc[rct_agg_6m.measure == "lpcoprox"]

### calculate aggregated measure values (items per 1000 patients)
coprox.loc[:, "baseline_calc_value"] = coprox.loc[:, 'items_baseline'] / coprox.loc[:, "denominator_baseline"]
coprox.loc[:, "follow_up_calc_value"] = coprox.loc[:, "items_follow-up"] / coprox.loc[:, "denominator_follow-up"]
coprox.head()

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
0,00J,I,lpcoprox,26605.75613,20065.45311,163,107,256.521833,259.612833,0.635423,0.412152
17,00Y,con,lpcoprox,8034.09214,7397.55534,46,45,252.313167,255.081167,0.182313,0.176414
34,01F,con,lpcoprox,8743.74849,1897.93113,60,13,130.523333,131.222333,0.459688,0.099069
51,01J,con,lpcoprox,10694.05893,3020.67409,45,26,163.463333,164.991,0.275291,0.157584
68,01V,I,lpcoprox,22347.01914,5283.50532,131,49,278.8905,280.012,0.469718,0.174993


In [71]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Co-proxamol.
data = coprox.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation.map({'con': 0, 'I': 1})
formula = ('data["follow_up_calc_value"] '
           '~ data["baseline_calc_value"] + intervention')
compute_regression(data, formula=formula)

# ## S3: Total items prescribed per 1000 registered patients for Dosulepin.

Unnamed: 0_level_0,coefficient,p value,conf_int_low,conf_int_high
factor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,0.015109,,-0.062606,0.092825
"data[""baseline_calc_value""]",0.589392,5.59931e-12,0.469092,0.709692
intervention,-0.010726,0.7727669,-0.08544,0.063988


## S3: Total items prescribed per 1000 registered patients for Dosulepin

In [76]:
# filter data for dosulepin measure:
dosulepin = rct_agg_6m.copy()
dosulepin = dosulepin.loc[dosulepin.measure == "lpdosulepin"]

### calculate aggregated measure values (items per 1000 patients)
dosulepin["baseline_calc_value"] = dosulepin.items_baseline / dosulepin.denominator_baseline
dosulepin["follow_up_calc_value"] = dosulepin["items_follow-up"] / dosulepin["denominator_follow-up"]
dosulepin.head(2)

Unnamed: 0,joint_id,allocation,measure,cost_baseline,cost_follow-up,items_baseline,items_follow-up,denominator_baseline,denominator_follow-up,baseline_calc_value,follow_up_calc_value
1,00J,I,lpdosulepin,3020.08789,1937.38454,1575,1274,256.521833,259.612833,6.139828,4.907307
18,00Y,con,lpdosulepin,5050.51768,2644.01308,2063,1552,252.313167,255.081167,8.176347,6.084338


In [77]:
## Secondary outcome: Total items prescribed per 1000 registered patients for Dosulepin.
data = dosulepin.copy().reset_index()
# create a new Series called "intervention" to convert intervention/control to numerical values
data['intervention'] = data.allocation.map({'con':0, 'I':1})
formula = ('data["follow_up_calc_value"] '
           '~ data["baseline_calc_value"] + intervention')
compute_regression(data, formula=formula)

Unnamed: 0_level_0,coefficient,p value,conf_int_low,conf_int_high
factor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,-0.134987,,-0.519292,0.249319
"data[""baseline_calc_value""]",0.892695,5.383593e-34,0.852716,0.932674
intervention,0.02354,0.8651594,-0.255397,0.302478
