# Prescribing outcomes
### 1. Change from baseline in the rate of nimodipine prescribing per 1,000 registered patients compared to the change in an earlier control period. 

**Follow-up period**: Three months following intervention, not including month of sending.

**Baseline period:** latest available three months of data at start of study.

**Control period:** baseline period minus one year, follow-up period minus one year. 


In [None]:
###  update dates as appropriate
import datetime

d7 = '2018-10-01' # month after end of follow-up period
d6 = '2018-07-01' # start of 3-month follow-up period

d5 = '2018-10-01' # month after end of "follow-up" period
d4 = '2018-07-01' # start of 3-month "follow-up" period
d3 = '2018-04-01' # month after end of baseline period
d2 = '2018-01-01' # start of 3-month baseline period
d1 = '2017-04-01' # start of 12-month baseline period

In [1]:
import pandas as pd

In [44]:
# load csv of practices contacted 
nim = pd.read_csv('nimodipine_practices.csv')
nim = nim.rename(columns={"practice_":"practice"})
nim.head()

Unnamed: 0.1,Unnamed: 0,practice,12-mo_items,12-mo_quantity,12-mo_total_list_size,3-mo_items,3-mo_quantity,3-mo_total_list_size
0,0,A82036,11,594,10106.75,2,90,10191.0
1,1,A83047,13,2812,14751.9,5,1120,14805.333333
2,2,B82005,1,100,58595.0,1,100,58595.0
3,3,B86081,11,924,14308.909091,3,252,14355.666667
4,4,C81110,6,1344,10537.833333,1,224,10623.0


In [40]:
#  import all nimodipine data, excluding practices not meeting the eligibility criteria. 

# table to query set up as follows
'''SELECT * FROM 
`ebmdatalab.hscic.normalised_prescribing_standard` 
WHERE SUBSTR(bnf_code,1,9) = '0206020M0'
AND EXTRACT(year FROM month) > 2016 '''

#### save as helen.nimodipine_20190513


# import data for all practices prescribing nimodipine, 
# excluding those which are dormant/closed,
# excluding practices which are not standard general practices
# excluding practices in Devon CCG (or one of its predecessor organisations NEW Devon CCG)

# Import dataset from BigQuery
import pandas as pd
import numpy as np
GBQ_PROJECT_ID = '620265099307'

q = '''SELECT  
    a.practice,
    a.pct,
    setting,
    a.month,
    total_list_size,
    SUM(a.items) AS items,
    SUM(a.quantity) AS quantity
FROM helen.nimodipine_20190513 a
   INNER JOIN hscic.practices b ON a.practice = b.code AND setting=4 
   INNER JOIN `hscic.practice_statistics_all_years` pop  ON a.practice = pop.practice AND a.month = pop.month and status_code = "A"
WHERE pct NOT IN ('99P','15N')
    AND pop.total_list_size > 1000 
    AND (male_25_34 + male_35_44 + male_45_54 + male_55_64 + female_25_34 + female_35_44 + female_45_54 + female_55_64)
    / total_list_size BETWEEN 0.1 and 0.85
GROUP BY practice, pct, setting, month, total_list_size
'''
df1 = pd.read_gbq(q, GBQ_PROJECT_ID, dialect='standard',verbose=False)

        ## note: parsing dates is quite memory-intensive, make sure not too many programmes running

df1["month"] = pd.to_datetime(df1.month)

df1.head() # this gives the first few rows of data

  credentials=credentials, verbose=verbose, private_key=private_key)


Unnamed: 0,practice,pct,setting,month,total_list_size,items,quantity
0,G82032,09J,4,2018-03-01 00:00:00+00:00,13884,1,22
1,C81110,04R,4,2017-05-01 00:00:00+00:00,10403,1,224
2,C84018,04K,4,2017-08-01 00:00:00+00:00,9453,1,30
3,H81029,09N,4,2018-03-01 00:00:00+00:00,11800,1,100
4,M83128,05W,4,2017-04-01 00:00:00+00:00,4273,1,72


In [42]:
# classify dates as part of follow-up period 

df1["follow-up"] = False
df1.loc[(df1['month']>=d6)&(df1['month']<d7), "follow-up"] = True

fu = df1.loc[df1["follow-up"]==True].groupby(["practice"])["quantity","total_list_size"].agg({"quantity":"sum","total_list_size":"mean"}).reset_index()

fu.head()

Unnamed: 0,practice,quantity,total_list_size
0,A82029,200,3488.5
1,A82036,112,10122.0
2,A83029,100,4928.0
3,A83047,572,14743.5
4,A84011,400,7526.5


### Merge baseline and follow-up data

In [51]:

nim2 = nim[["practice","3-mo_quantity","3-mo_total_list_size"]].merge(fu, on="practice",how="left")
nim2["baseline_rate"] = 1000*nim2["3-mo_quantity"]/nim2["3-mo_total_list_size"]
nim2["follow-up_rate"] = 1000*nim2["quantity"]/nim2["total_list_size"]
nim2 = nim2.fillna(0)
nim2.head()

Unnamed: 0,practice,3-mo_quantity,3-mo_total_list_size,quantity,total_list_size,baseline_rate,follow-up_rate
0,A82036,90,10191.0,112.0,10122.0,8.831322,11.065007
1,A83047,1120,14805.333333,572.0,14743.5,75.648415,38.796758
2,B82005,100,58595.0,0.0,0.0,1.70663,0.0
3,B86081,252,14355.666667,252.0,14303.333333,17.554044,17.618271
4,C81110,224,10623.0,224.0,10522.0,21.086322,21.288728


In [52]:
from scipy import stats

print(stats.ttest_rel(nim2["baseline_rate"],nim2["follow-up_rate"]))

nim3 = nim2.copy()

nim3 = nim3.drop("practice",axis=1).mean()

print(nim3)

Ttest_relResult(statistic=1.171121009095174, pvalue=0.24633908522948783)
3-mo_quantity             225.661017
3-mo_total_list_size    12791.403955
quantity                  187.796610
total_list_size          8262.161017
baseline_rate              25.958306
follow-up_rate             22.897705
dtype: float64


### identify control practices based on 12-month and 3-month prescribing data

In [9]:
### classify data by baseline and follow-up period

df1["follow-up"] = False
df1["12-mo-baseline"] = False
df1["3-mo-baseline"] = False
df1.loc[(df1['month']>=d1)&(df1['month']<d3), "12-mo-baseline"] = True
df1.loc[(df1['month']>=d2)&(df1['month']<d3), "3-mo-baseline"] = True
df1.loc[(df1['month']>=d4)&(df1['month']<d5), "follow-up"] = True

df1.head()

Unnamed: 0,practice,pct,setting,month,total_list_size,items,quantity,12-mo-baseline,3-mo-baseline,follow-up
0,G82032,09J,4,2018-03-01 00:00:00+00:00,13884,1,22,True,True,False
1,C81110,04R,4,2017-05-01 00:00:00+00:00,10403,1,224,True,False,False
2,C84018,04K,4,2017-08-01 00:00:00+00:00,9453,1,30,True,False,False
3,H81029,09N,4,2018-03-01 00:00:00+00:00,11800,1,100,True,True,False
4,M83128,05W,4,2017-04-01 00:00:00+00:00,4273,1,72,True,False,False


In [17]:
### aggregate data over 6-month periods ( we will want to calculate the change between each)

# Perform groupby aggregation
agg = df1.loc[df1["12-mo-baseline"]==True].groupby(["practice","3-mo-baseline"])["items","quantity","total_list_size"].agg({"items":"sum","quantity":"sum","total_list_size":"mean"}).reset_index()

# any prescribing in last 3 months
filter1 = agg.loc[agg["3-mo-baseline"]==True][["practice"]]

agg = agg.merge(filter1, on="practice", how="inner").groupby(["practice","3-mo-baseline"]).sum()
agg = pd.DataFrame(agg.stack()).unstack(1)
agg.columns = agg.columns.droplevel()

agg["12-mo"] = agg[True] + agg[False]
agg = agg.reset_index().rename(columns={True:"3-mo","level_1":"prescribing"}).drop(False, axis=1).fillna(0).set_index(["practice","prescribing"]).unstack()

# More than one item of nimodipine prescribed OR more than 56 tablets prescribed in the latest 12 months#
agg = agg.loc[(agg["12-mo","items"]>1) | (agg["12-mo","quantity"]>56)]#.sort_values(by="quantity")
agg.head()

3-mo-baseline,3-mo,3-mo,3-mo,12-mo,12-mo,12-mo
prescribing,items,quantity,total_list_size,items,quantity,total_list_size
practice,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A82029,1.0,100.0,3484.0,7.0,700.0,6973.333333
A82036,2.0,112.0,10029.0,13.0,728.0,20040.444444
A82045,2.0,112.0,8644.5,12.0,672.0,17256.277778
A82654,1.0,112.0,7631.0,6.0,672.0,15233.6
A83047,2.0,448.0,14652.0,12.0,2688.0,29205.0


In [30]:
# follow-up data for comparison group ("control")

con = agg.drop("12-mo",axis=1).reset_index()
con.columns = con.columns.map('_'.join)
con = con.rename(columns={"practice_":"practice"}).drop("3-mo_items",axis=1)
con["baseline_rate"] = 1000*con["3-mo_quantity"]/con["3-mo_total_list_size"]

con_fu = df1.loc[df1["follow-up"]==True].groupby(["practice"])["quantity","total_list_size"].agg({"quantity":"sum","total_list_size":"mean"}).reset_index()


con = con.merge(con_fu, on="practice",how="left")

# fill missing population figures with the earlier population, and fill blank quantities with zero
con["total_list_size"] = con["total_list_size"].fillna(con["3-mo_total_list_size"])
con = con.fillna(0)
con["follow-up_rate"] = 1000*con["quantity"]/con["total_list_size"]
con.head()

Unnamed: 0,practice,3-mo_quantity,3-mo_total_list_size,baseline_rate,quantity,total_list_size,follow-up_rate
0,A82029,100.0,3484.0,28.702641,200.0,3488.5,57.331231
1,A82036,112.0,10029.0,11.167614,112.0,10122.0,11.065007
2,A82045,112.0,8644.5,12.956215,0.0,8644.5,0.0
3,A82654,112.0,7631.0,14.676975,0.0,7631.0,0.0
4,A83047,448.0,14652.0,30.576031,572.0,14743.5,38.796758


In [29]:
con2 = con.copy()

con2 = pd.DataFrame(con2.drop("practice",axis=1).sum()).transpose()

con2["follow-up_rate"] = 1000*con2["quantity"]/con2["total_list_size"]

con2

Unnamed: 0,3-mo_quantity,3-mo_total_list_size,quantity,total_list_size,follow-up_rate,baseline_rate
0,14560.0,582892.833333,11168.0,630478.833333,17.713521,24.978863


In [32]:
from scipy import stats

print(stats.ttest_rel(con["baseline_rate"],con["follow-up_rate"]))

con2 = con.copy()

con2 = con2.drop("practice",axis=1).mean()

#con2["follow-up_rate"] = 1000*con2["quantity"]/con2["total_list_size"]

print(con2)

Ttest_relResult(statistic=2.1604069715054197, pvalue=0.035114618683496884)
3-mo_quantity             260.000000
3-mo_total_list_size    10408.800595
baseline_rate              30.556550
quantity                  199.428571
total_list_size         11258.550595
follow-up_rate             22.653988
dtype: float64
