This notebook identifies practices meeting criteria for inclusion in a RCT on feedback about Methotrexate prescribing.

In [1]:
##import libraries needed (might not need them all)
import pandas as pd
import numpy as np
from ebmdatalab import bq, maps, charts
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [2]:
## ensuring the format is consistent for pounds and pence
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [3]:
import importlib
importlib.reload(bq)
measures = ['methotrexate']

def get_data(measure_id):
    sql = """
SELECT
  TRIM(Principal_Supplier) AS supplier,
  m.practice_id,
  practices.name,
  m.pct_id,
  Date as month,
  '{measure_id}' AS measure_id,
  numerator,
  denominator,
  calc_value  
FROM
  measures.practice_data_{measure_id} as m
JOIN
  ebmdatalab.alex.vendors as software #this is where the up to date vendors table is held
ON
  software.ODS = practice_id
  AND DATE(Date) = m.month
JOIN
  hscic.practices 
ON
  practices.code = software.ODS
JOIN
  hscic.practice_statistics
ON
  practice_statistics.practice = practices.code
  AND Date = practice_statistics.month
WHERE
  practices.setting =4
  AND total_list_size > 1000
ORDER BY
  m.month
""".format(measure_id=measure_id)
    import pandas as pd
    df = bq.cached_read(sql, csv_path="df_rct_mtx.csv".format(measure_id))
    return df


df_rct= get_data('methotrexate')
df_rct['month'] = pd.to_datetime(df_rct['month'])
df_rct = df_rct.replace(['TPP'],'SystmOne')
df_rct.rename(columns={'pct_id':'pct'}, inplace=True) ##prep for maps library whe  pct must be colum name
df_rct.fillna(0, inplace=True)
df_rct.head(5)

  credentials=credentials, verbose=verbose, private_key=private_key)


Unnamed: 0,supplier,practice_id,name,pct,month,measure_id,numerator,denominator,calc_value
0,SystmOne,A81634,THE ARRIVAL PRACTICE,00K,2016-01-01,methotrexate,0,0,0.0
1,SystmOne,A83638,SHILDON HEALTH CLINIC,00D,2016-01-01,methotrexate,0,0,0.0
2,SystmOne,A87600,PARK PARADE PRACTICE,99C,2016-01-01,methotrexate,0,0,0.0
3,EMIS,A88020,FLAGG COURT HEALTH CENTRE,00N,2016-01-01,methotrexate,0,0,0.0
4,EMIS,A88614,FLAGG COURT (DR N WIN),00N,2016-01-01,methotrexate,0,0,0.0


In [4]:
df_rct.practice_id.nunique()

7642

In [5]:
##here we isolate last quarter data
df_rct_qtr = df_rct.loc[(df_rct["month"]>= "2019-03-01")]
df_rct_qtr = df_rct_qtr.groupby(['practice_id','name','pct','supplier']).sum()
df_rct_qtr.reset_index().head(5)

Unnamed: 0,practice_id,name,pct,supplier,numerator,denominator,calc_value
0,A81001,THE DENSHAM SURGERY,00K,SystmOne,0,27,0.0
1,A81002,QUEENS PARK MEDICAL CENTRE,00K,SystmOne,0,145,0.0
2,A81004,BLUEBELL MEDICAL CENTRE,00M,SystmOne,0,65,0.0
3,A81005,SPRINGWOOD SURGERY,00M,SystmOne,0,105,0.0
4,A81006,TENNANT STREET MEDICAL PRACTICE,00K,SystmOne,0,96,0.0


In [6]:
df_rct_qtr.drop(columns="calc_value", inplace=True) ## drop calc value to recalulate qtr value

In [7]:
df_rct_qtr['measure_value'] = df_rct_qtr['numerator'] / df_rct_qtr['denominator'] ##calculate quarter value for the measure
df_rct_qtr.fillna(0, inplace=True)
df_rct_qtr.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,numerator,denominator,measure_value
practice_id,name,pct,supplier,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A81001,THE DENSHAM SURGERY,00K,SystmOne,0,27,0.0
A81002,QUEENS PARK MEDICAL CENTRE,00K,SystmOne,0,145,0.0
A81004,BLUEBELL MEDICAL CENTRE,00M,SystmOne,0,65,0.0
A81005,SPRINGWOOD SURGERY,00M,SystmOne,0,105,0.0
A81006,TENNANT STREET MEDICAL PRACTICE,00K,SystmOne,0,96,0.0


In [8]:
df_rct_qtr.sort_values(by="measure_value", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,numerator,denominator,measure_value
practice_id,name,pct,supplier,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
E87048,ROSARY GARDEN SURGERY,08Y,SystmOne,1,1,1.0
K83081,SUMMERLEE MEDICAL CENTRE,04G,SystmOne,3,3,1.0
E85659,"DR RK KUKAR, PARKVIEW CTR FOR H&W",08C,SystmOne,2,2,1.0
E87746,BROMPTON MEDICAL CENTRE,08Y,SystmOne,16,16,1.0
Y00902,THE WESTBOURNE GREEN SURGERY,09A,SystmOne,6,7,0.86


In [16]:
df_rct_sample = df_rct_qtr.loc[(df_rct_qtr["measure_value"]> 0.1) & (df_rct_qtr["numerator"]> 2)]
df_rct_sample.reset_index().head(5)

Unnamed: 0,practice_id,name,pct,supplier,numerator,denominator,measure_value
0,A82032,MARYPORT HEALTH SERVICES,01H,EMIS,28,176,0.16
1,A82038,TEMPLE SOWERBY MEDICAL PRACTICE,01H,EMIS,17,65,0.26
2,A89002,DR BHATE SURGERY,00P,EMIS,15,40,0.38
3,B82008,NORTH HOUSE SURGERY,03E,EMIS,15,106,0.14
4,B82014,KINGSWOOD SURGERY,03E,SystmOne,6,57,0.11


In [17]:
df_rct_sample.columns

Index(['numerator', 'denominator', 'measure_value'], dtype='object')

In [18]:
df_rct_sample.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 661 entries, (A82032, MARYPORT HEALTH SERVICES, 01H, EMIS) to (Y04965, MALLING HEALTH AT FOLESHILL, 05A, EMIS)
Data columns (total 3 columns):
numerator        661 non-null int64
denominator      661 non-null int64
measure_value    661 non-null float64
dtypes: float64(1), int64(2)
memory usage: 126.2+ KB


661 practices meet the eligibility criteria.