In [2]:
import pandas as pd
from io import StringIO

data = StringIO("""
practice,drug_name,items,cost
A81001,Omeprazole 20mg capsules,320,410.32
A81001,Ramipril 5mg capsules,210,52.10
A81001,Salbutamol inhaler,190,288.44
A81002,Omeprazole 20mg capsules,120,155.20
A81002,Ramipril 5mg capsules,340,84.30
A81002,Salbutamol inhaler,60,92.00
A81003,Omeprazole 20mg capsules,540,700.10
A81003,Ramipril 5mg capsules,80,19.20
A81003,Salbutamol inhaler,400,602.10
A81004,Omeprazole 20mg capsules,90,120.00
A81004,Ramipril 5mg capsules,410,100.10
A81004,Salbutamol inhaler,75,112.00
""")

gp = pd.read_csv(data)
gp

Unnamed: 0,practice,drug_name,items,cost
0,A81001,Omeprazole 20mg capsules,320,410.32
1,A81001,Ramipril 5mg capsules,210,52.1
2,A81001,Salbutamol inhaler,190,288.44
3,A81002,Omeprazole 20mg capsules,120,155.2
4,A81002,Ramipril 5mg capsules,340,84.3
5,A81002,Salbutamol inhaler,60,92.0
6,A81003,Omeprazole 20mg capsules,540,700.1
7,A81003,Ramipril 5mg capsules,80,19.2
8,A81003,Salbutamol inhaler,400,602.1
9,A81004,Omeprazole 20mg capsules,90,120.0


In [3]:
gp.groupby("drug_name")["items"].sum()

drug_name
Omeprazole 20mg capsules    1070
Ramipril 5mg capsules       1040
Salbutamol inhaler           725
Name: items, dtype: int64

In [4]:
gp.pivot_table(values="items", index="practice", columns="drug_name")

drug_name,Omeprazole 20mg capsules,Ramipril 5mg capsules,Salbutamol inhaler
practice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A81001,320,210,190
A81002,120,340,60
A81003,540,80,400
A81004,90,410,75


In [5]:
table = gp.pivot_table(values="items", index="practice", columns="drug_name")

proportion = table.div(table.sum(axis=1), axis=0)

proportion

drug_name,Omeprazole 20mg capsules,Ramipril 5mg capsules,Salbutamol inhaler
practice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A81001,0.444444,0.291667,0.263889
A81002,0.230769,0.653846,0.115385
A81003,0.529412,0.078431,0.392157
A81004,0.156522,0.713043,0.130435


In [6]:
proportion.mean()

drug_name
Omeprazole 20mg capsules    0.340287
Ramipril 5mg capsules       0.434247
Salbutamol inhaler          0.225466
dtype: float64

In [7]:
z = (proportion - proportion.mean()) / proportion.std()


In [8]:
z["Ramipril 5mg capsules"]

practice
A81001   -0.472748
A81002    0.728118
A81003   -1.179765
A81004    0.924396
Name: Ramipril 5mg capsules, dtype: float64

In [9]:
alerts = z[z.abs() > 1]
alerts.dropna(how="all")

drug_name,Omeprazole 20mg capsules,Ramipril 5mg capsules,Salbutamol inhaler
practice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A81003,1.077752,-1.179765,1.285905
A81004,-1.047208,,


In [10]:
def prescribing_alerts(df, threshold=1):
    table = df.pivot_table(values="items", index="practice", columns="drug_name")
    proportion = table.div(table.sum(axis=1), axis=0)
    z = (proportion - proportion.mean()) / proportion.std()
    alerts = z[z.abs() > threshold]
    return alerts.dropna(how="all")

prescribing_alerts(gp)

drug_name,Omeprazole 20mg capsules,Ramipril 5mg capsules,Salbutamol inhaler
practice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A81003,1.077752,-1.179765,1.285905
A81004,-1.047208,,


Interpreting outliers

This notebook identifies practices whose prescribing proportions differ significantly from peers using z-scores.

Important:
	•	This does NOT indicate poor practice
	•	Signals require clinical interpretation
	•	Differences may reflect demographics, deprivation, or disease prevalence

Example:
A81003 shows lower ACE inhibitor prescribing and higher bronchodilator use — possibly reflecting a respiratory-heavy population rather than inappropriate care.

In [11]:
z.abs().mean(axis=1).sort_values(ascending=False)

practice
A81003    1.181141
A81004    0.901569
A81002    0.733808
A81001    0.454236
dtype: float64

In [12]:
def flag_outliers(z_scores, threshold=1):
    flagged = z_scores[(z_scores > threshold) | (z_scores < -threshold)]
    return flagged.dropna(how="all")

In [13]:
flag_outliers(z)

drug_name,Omeprazole 20mg capsules,Ramipril 5mg capsules,Salbutamol inhaler
practice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A81003,1.077752,-1.179765,1.285905
A81004,-1.047208,,


In [14]:
def clinical_interpretation(z_scores, threshold=1):
    for practice, row in z_scores.iterrows():
        print(f"\nPractice {practice}")

        for drug, value in row.dropna().items():

            if value > threshold:
                print(f" - Higher prescribing of {drug} compared to peers (z={value:.2f})")

            elif value < -threshold:
                print(f" - Lower prescribing of {drug} compared to peers (z={value:.2f})")

In [15]:
clinical_interpretation(z)


Practice A81001

Practice A81002

Practice A81003
 - Higher prescribing of Omeprazole 20mg capsules compared to peers (z=1.08)
 - Lower prescribing of Ramipril 5mg capsules compared to peers (z=-1.18)
 - Higher prescribing of Salbutamol inhaler compared to peers (z=1.29)

Practice A81004
 - Lower prescribing of Omeprazole 20mg capsules compared to peers (z=-1.05)
