This notebook sets out to analyse the total spend and volume in the last 12 months of a subset of measures

In [1]:
import importlib
import pandas as pd
import numpy as np
from ebmdatalab import bq, maps, charts

In [2]:
## ensuring the format is consistent for pounds and pence
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [3]:
import importlib
importlib.reload(bq)
measures = ['statinintensity', 'lyrica', 'desogestrel', 
            'glaucoma', 'fungal', 'glaucoma', 'methotrexate', 'keppra', 'ktt13_nsaids_ibuprofen',
           'other_lipid_modifying_drugs', 'tramadol', 'silver']

def get_data(measure_id):
    sql = """
SELECT
pct_id,
SUM(numerator) AS numerator_items,
SUM(denominator) AS denominator_items,
IEEE_DIVIDE(SUM(numerator), SUM(denominator)) AS calc_value, 
  '{measure_id}' AS measure_id
FROM
  measures.ccg_data_{measure_id} AS m
WHERE
month >= '2018-07-01' AND month <= '2019-06-01'
GROUP BY
  pct_id,
  measure_id
""".format(measure_id=measure_id)
    import pandas as pd
    df = bq.cached_read(sql, csv_path="measures_annual_total.csv".format(measure_id))
    return df

def get_dataframe(measures):
    df = pd.DataFrame()
    for measure in measures:
        df = df.append(get_data(measure))
    return df
df = get_dataframe(measures)

In [4]:
df.head(10)

Unnamed: 0,pct_id,numerator_items,denominator_items,calc_value,measure_id
0,00Q,119673.0,224275.0,0.53,statinintensity
1,00R,153486.0,316996.0,0.48,statinintensity
2,00X,123528.0,236992.0,0.52,statinintensity
3,01A,270886.0,554967.0,0.49,statinintensity
4,01E,138397.0,263712.0,0.52,statinintensity
5,01K,242760.0,471549.0,0.51,statinintensity
6,02G,70773.0,148732.0,0.48,statinintensity
7,02M,164649.0,327792.0,0.5,statinintensity
8,02N,77660.0,207008.0,0.38,statinintensity
9,02R,96267.0,439766.0,0.22,statinintensity


In [5]:
print(df)

    pct_id  numerator_items  denominator_items  calc_value       measure_id
0      00Q        119673.00          224275.00        0.53  statinintensity
1      00R        153486.00          316996.00        0.48  statinintensity
2      00X        123528.00          236992.00        0.52  statinintensity
3      01A        270886.00          554967.00        0.49  statinintensity
4      01E        138397.00          263712.00        0.52  statinintensity
5      01K        242760.00          471549.00        0.51  statinintensity
6      02G         70773.00          148732.00        0.48  statinintensity
7      02M        164649.00          327792.00        0.50  statinintensity
8      02N         77660.00          207008.00        0.38  statinintensity
9      02R         96267.00          439766.00        0.22  statinintensity
10     02T        120036.00          309199.00        0.39  statinintensity
11     02W         46961.00          180613.00        0.26  statinintensity
12     03A  

In [10]:
df.to_csv("test_measure.csv")