In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
pio.templates.default = "simple_white"
from scipy.stats import pearsonr

In [None]:
# Load Data
eRD_data = pd.read_csv("eRD Rates in NENC.csv")
prescribing_data = pd.read_csv("All prescribing in NENC.csv")
medicines_data = pd.read_csv("BNF Medicine Chapters in NENC.csv")
list_size_data = pd.read_csv("List Sizes by Practice NENC.csv")

In [3]:
prescribing_data = prescribing_data[prescribing_data['PCN'] != 'DUMMY'] # remove dummy fields
prescribing_data = prescribing_data[~prescribing_data['Practice'].str.contains(r'\( ?[CD] ?\d', na=False)]

medicines_data = medicines_data[medicines_data['PCN'] != 'DUMMY'] # remove dummy fields
medicines_data = medicines_data[~medicines_data['Practice'].str.contains(r'\( ?[CD] ?\d', na=False)]

In [4]:
# Add eRD data to prescribing data
merged_df = prescribing_data[["Practice Code", "Items", "Actual Cost"]].merge(
    eRD_data.drop_duplicates("Practice Code"),
    on="Practice Code",
    how="inner",
    validate="one_to_one",
)

# Now add list size
merged_df = merged_df.merge(
    list_size_data[["Practice Code", "List Size"]].drop_duplicates("Practice Code"),
    on="Practice Code",
    how="left",
    validate="one_to_one"
)

In [5]:
merged_df.head()

Unnamed: 0,Practice Code,Items,Actual Cost,ICB,GP Practice,eRD Items,EPS Items,Proportion eRD,Dispensing,List Size
0,A84014,34888.0,265618.1648,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,MARINE MEDICAL GROUP,11190,34644,32.299965,No,13867
1,A84009,80972.0,547616.7379,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,RAILWAY MEDICAL GROUP,32597,80128,40.68116,No,26985
2,A84038,20854.0,152080.8347,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,FORUM FAMILY PRACTICE,7972,20857,38.22218,No,9102
3,A84037,14943.0,114977.1829,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,NETHERFIELD HOUSE,4298,14781,29.07787,No,6019
4,A84030,25345.0,199723.947,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,VILLAGE MEDICAL GROUP,6785,24885,27.265421,No,11439


In [6]:
# Add eRD data to prescribing data
medicines_merged_df = medicines_data[["Practice Code", "Items", "Actual Cost"]].merge(
    eRD_data.drop_duplicates("Practice Code"),
    on="Practice Code",
    how="inner",
    validate="one_to_one",
)

# Now add list size
medicines_merged_df = medicines_merged_df.merge(
    list_size_data[["Practice Code", "List Size"]].drop_duplicates("Practice Code"),
    on="Practice Code",
    how="left",
    validate="one_to_one"
)

In [7]:
medicines_merged_df.head()

Unnamed: 0,Practice Code,Items,Actual Cost,ICB,GP Practice,eRD Items,EPS Items,Proportion eRD,Dispensing,List Size
0,A84014,33694,231546.7402,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,MARINE MEDICAL GROUP,11190,34644,32.299965,No,13867
1,A84009,78655,463127.0102,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,RAILWAY MEDICAL GROUP,32597,80128,40.68116,No,26985
2,A84038,20228,129879.6824,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,FORUM FAMILY PRACTICE,7972,20857,38.22218,No,9102
3,A84037,14501,98891.80325,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,NETHERFIELD HOUSE,4298,14781,29.07787,No,6019
4,A84030,24493,167506.1087,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,VILLAGE MEDICAL GROUP,6785,24885,27.265421,No,11439


In [8]:
merged_df["Items per 1000 Patients"] = (
    merged_df["Items"] / merged_df["List Size"] * 1000
)

merged_df["Spend per 1000 Patients"] = (
    merged_df["Actual Cost"] / merged_df["List Size"] * 1000
)

In [9]:
medicines_merged_df["Items per 1000 Patients"] = (
    medicines_merged_df["Items"] / merged_df["List Size"] * 1000
)

medicines_merged_df["Spend per 1000 Patients"] = (
    medicines_merged_df["Actual Cost"] / merged_df["List Size"] * 1000
)

In [10]:
merged_df.head()

Unnamed: 0,Practice Code,Items,Actual Cost,ICB,GP Practice,eRD Items,EPS Items,Proportion eRD,Dispensing,List Size,Items per 1000 Patients,Spend per 1000 Patients
0,A84014,34888.0,265618.1648,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,MARINE MEDICAL GROUP,11190,34644,32.299965,No,13867,2515.90106,19154.695666
1,A84009,80972.0,547616.7379,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,RAILWAY MEDICAL GROUP,32597,80128,40.68116,No,26985,3000.62998,20293.375501
2,A84038,20854.0,152080.8347,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,FORUM FAMILY PRACTICE,7972,20857,38.22218,No,9102,2291.144803,16708.507438
3,A84037,14943.0,114977.1829,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,NETHERFIELD HOUSE,4298,14781,29.07787,No,6019,2482.638312,19102.372969
4,A84030,25345.0,199723.947,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,VILLAGE MEDICAL GROUP,6785,24885,27.265421,No,11439,2215.665705,17459.913192


In [11]:
medicines_merged_df.head()

Unnamed: 0,Practice Code,Items,Actual Cost,ICB,GP Practice,eRD Items,EPS Items,Proportion eRD,Dispensing,List Size,Items per 1000 Patients,Spend per 1000 Patients
0,A84014,33694,231546.7402,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,MARINE MEDICAL GROUP,11190,34644,32.299965,No,13867,2429.797361,16697.680839
1,A84009,78655,463127.0102,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,RAILWAY MEDICAL GROUP,32597,80128,40.68116,No,26985,2914.767463,17162.386889
2,A84038,20228,129879.6824,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,FORUM FAMILY PRACTICE,7972,20857,38.22218,No,9102,2222.36871,14269.356449
3,A84037,14501,98891.80325,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,NETHERFIELD HOUSE,4298,14781,29.07787,No,6019,2409.204187,16429.939068
4,A84030,24493,167506.1087,NHS NORTH EAST AND NORTH CUMBRIA INTEGRATED CA...,VILLAGE MEDICAL GROUP,6785,24885,27.265421,No,11439,2141.18367,14643.422388


In [12]:
# Scatter plot: Spend vs Proportion eRD
fig = px.scatter(
    merged_df,
    x="Proportion eRD",
    y="Spend per 1000 Patients",
    color="Dispensing",
    hover_data=["Practice Code", "Items", "List Size"],
    labels={
        "Proportion eRD": "Proportion on eRD",
        "Spend per 1000 Patients": "Spend per 1000 Patients (£)"
    },
    title="Spend (all products) per 1000 Patients vs Proportion on eRD",
    trendline="ols", 
)

# Set figure size to square (e.g., 700x700)
fig.update_layout(
    width=1000,
    height=700
)



fig.show()

In [13]:
# Scatter plot: Items vs Proportion eRD
fig = px.scatter(
    merged_df,
    x="Proportion eRD",
    y="Items per 1000 Patients",
    color="Dispensing",
    hover_data=["Practice Code", "Items", "List Size"],
    labels={
        "Proportion eRD": "Proportion on eRD",
        "Items per 1000 Patients": "Items per 1000 Patients"
    },
    title="Items (all products) per 1000 Patients vs Proportion on eRD",
    trendline="ols", 
)

# Set figure size to square (e.g., 700x700)
fig.update_layout(
    width=1000,
    height=700
)

fig.show()

In [14]:
# Scatter plot: Spend vs Proportion eRD
fig = px.scatter(
    medicines_merged_df,
    x="Proportion eRD",
    y="Spend per 1000 Patients",
    color="Dispensing",
    hover_data=["Practice Code", "Items", "List Size"],
    labels={
        "Proportion eRD": "Proportion on eRD",
        "Spend per 1000 Patients": "Spend per 1000 Patients (£)"
    },
    title="Spend (medicines only) per 1000 Patients vs Proportion on eRD",
    trendline="ols", 
)

# Set figure size to square (e.g., 700x700)
fig.update_layout(
    width=1000,
    height=700
)

fig.show()

In [15]:
# Scatter plot: Items vs Proportion eRD
fig = px.scatter(
    medicines_merged_df,
    x="Proportion eRD",
    y="Items per 1000 Patients",
    color="Dispensing",
    hover_data=["Practice Code", "Items", "List Size"],
    labels={
        "Proportion eRD": "Proportion on eRD",
        "Items for Medicines per 1000 Patients": "Items per 1000 Patients"
    },
    title="Items (medicines only) per 1000 Patients vs Proportion on eRD",
    trendline="ols", 
)

# Set figure size to square (e.g., 700x700)
fig.update_layout(
    width=1000,
    height=700
)

fig.show()