This notebook sets out to analyse the total spend and volume in the last 12 months of a subset of measures

In [1]:
import importlib
import pandas as pd
import numpy as np
from ebmdatalab import bq, maps, charts

In [2]:
## ensuring the format is consistent for pounds and pence
pd.set_option('display.float_format', lambda x: '%.2f' % x)

#### Quality -> The folowing subeset of measures are where there are no direct cost savings but there may be theoretical savings that can be modelled. For example tretaingmore people with high-intensity statins should prevent some CVD events. NB there are a couple where cost savings have not been worked out.

In [3]:
import importlib
importlib.reload(bq)
measures = ['statinintensity', 'fungal', 'glaucoma', 'methotrexate', 'ktt13_nsaids_ibuprofen',
           'other_lipid_modifying_drugs', 'tramadol', 'silver'] ##these are the measures from openprescribing

def get_data(measure_id):
    sql = """
SELECT
pct_id, ##this is CCG ODS code
SUM(numerator) AS numerator, ## consult measure to see if it is items or quantity
SUM(denominator) AS denominator,
IEEE_DIVIDE(SUM(numerator), SUM(denominator)) AS calc_value, ##we calculate the measure here for 12 months
  '{measure_id}' AS measure_id
FROM
  measures.ccg_data_{measure_id} AS m
WHERE
month >= '2018-07-01' AND month <= '2019-06-01'
GROUP BY
  pct_id,
  measure_id
""".format(measure_id=measure_id)
    import pandas as pd
    df_quality = bq.cached_read(sql, csv_path="measures_quality_{}.csv".format(measure_id))
    return df_quality

def get_dataframe(measures):
    df_quality = pd.DataFrame()
    for measure in measures:
        df_quality = df_quality.append(get_data(measure))
    return df_quality
df_quality = get_dataframe(measures)

In [4]:
df_quality.head(10)

Unnamed: 0,pct_id,numerator,denominator,calc_value,measure_id
0,00Q,119673.0,224275.0,0.53,statinintensity
1,00R,153486.0,316996.0,0.48,statinintensity
2,00X,123528.0,236992.0,0.52,statinintensity
3,01A,270886.0,554967.0,0.49,statinintensity
4,01E,138397.0,263712.0,0.52,statinintensity
5,01K,242760.0,471549.0,0.51,statinintensity
6,02G,70773.0,148732.0,0.48,statinintensity
7,02M,164649.0,327792.0,0.5,statinintensity
8,02N,77660.0,207008.0,0.38,statinintensity
9,02R,96267.0,439766.0,0.22,statinintensity


#### Cost - these measures could release cash now

In [5]:
import importlib
importlib.reload(bq)
measures = ['lyrica', 'desogestrel', 
             'keppra']

def get_data(measure_id):
    sql = """
SELECT
pct_id,
SUM(num_items) AS numerator_items,
SUM(denom_items) AS denominator_items,
IEEE_DIVIDE(SUM(numerator), SUM(denominator)) AS calc_value_items,
SUM(num_cost) AS numerator_cost,
SUM(denom_cost) AS denominator_cost,
SUM(cost_savings_10) AS savings_10, ##this is savings that would have been made if they prescribed as well as best decile
SUM(cost_savings_20) AS savings_20,
SUM(cost_savings_30) AS savings_30,
SUM(cost_savings_40) AS savings_40, 
SUM(cost_savings_50) AS savings_50, 
SUM(cost_savings_60) AS savings_60,
SUM(cost_savings_70) AS savings_70,
SUM(cost_savings_80) AS savings_80,
SUM(cost_savings_90) AS savings_90,
  '{measure_id}' AS measure_id
FROM
  measures.ccg_data_{measure_id} AS m
WHERE
month >= '2018-07-01' AND month <= '2019-06-01'
GROUP BY
  pct_id,
  measure_id
""".format(measure_id=measure_id)
    import pandas as pd
    df_cost = bq.cached_read(sql, csv_path="measures_cost_{}.csv".format(measure_id))
    return df_cost

def get_dataframe(measures):
    df_cost = pd.DataFrame()
    for measure in measures:
        df_cost = df_cost.append(get_data(measure))
    return df_cost
df_cost = get_dataframe(measures)

In [6]:
df_cost.head(10)

Unnamed: 0,pct_id,numerator_items,denominator_items,calc_value_items,numerator_cost,denominator_cost,savings_10,savings_20,savings_30,savings_40,savings_50,savings_60,savings_70,savings_80,savings_90,measure_id
0,00D,909,74315,0.01,38589.87,226617.43,25977.92,21534.0,17901.47,14542.61,10554.15,6113.15,1394.89,-6307.67,-23427.38,lyrica
1,00J,772,54069,0.01,31507.54,170206.28,21271.12,17561.1,14547.76,11750.66,8440.46,4754.62,795.82,-5625.69,-19692.45,lyrica
2,00K,338,52514,0.01,17952.24,180845.75,7272.61,2951.05,-610.05,-3887.28,-7757.99,-12046.66,-16697.02,-24162.53,-40510.79,lyrica
3,00L,653,52406,0.01,30661.34,178729.4,19935.0,15959.7,12716.7,9705.27,6161.36,2212.02,-2013.01,-8902.36,-24182.45,lyrica
4,00N,960,29441,0.03,50642.39,127955.33,42537.76,40282.93,38460.01,36767.83,34762.16,32519.44,30130.71,26270.52,17611.11,lyrica
5,00P,537,71156,0.01,20099.02,186532.91,8928.66,4395.87,711.93,-2715.66,-6773.53,-11268.88,-16059.19,-23917.77,-41412.47,lyrica
6,00T,474,53288,0.01,20619.99,184441.63,9674.69,5236.21,1646.39,-1684.59,-5643.0,-10041.29,-14766.08,-22333.92,-39467.76,lyrica
7,00V,269,28985,0.01,18220.51,118113.27,11945.59,9762.27,7955.46,6288.68,4335.91,2140.78,-187.91,-3974.88,-12346.37,lyrica
8,00Y,370,43202,0.01,27125.3,183970.59,17146.6,13427.32,10436.79,7666.85,4364.64,681.98,-3246.48,-9517.95,-23822.68,lyrica
9,01C,689,26996,0.03,35362.49,111877.22,28090.77,25689.13,23729.36,21916.58,19773.57,17385.35,14828.82,10688.47,1518.18,lyrica
