In [None]:
import pandas as pd
import numpy as np

from pathlib import Path

from config import DATA_DIR, OUTPUT_DIR

In [None]:
# andexanet and doacs data for secondary care is fetched manually from OpenPrescribing Hospitals
andexanet_df = pd.read_csv(Path(DATA_DIR) / "andexanet_oph.csv", parse_dates=["Date"])
doacs_df = pd.read_csv(Path(DATA_DIR) / "doacs_oph.csv", parse_dates=["Date"])

all_ods_codes = pd.read_csv(Path(DATA_DIR, "all_ods_codes.csv"))

orgs_with_ae = all_ods_codes[all_ods_codes["has_ae_last_six_months"] == True]["ods_code"].unique().tolist()

andexanet_ae_df =andexanet_df[andexanet_df["Trust Code"].isin(orgs_with_ae)]
doacs_ae_df = doacs_df[doacs_df["Trust Code"].isin(orgs_with_ae)]

# May 2021 - June 2025
andexanet_ae_df_time_filtered = andexanet_ae_df[(andexanet_ae_df["Date"] >= "2021-05-01") & (andexanet_ae_df["Date"] < "2025-07-01")]
doacs_ae_df_time_filtered = doacs_ae_df[(doacs_ae_df["Date"] >= "2021-05-01") & (doacs_ae_df["Date"] < "2025-07-01")]

# drop paediatric
paed_orgs = ["RBS", "RQ3", "RCU", "RP4"]

andexanet_ae_df_time_filtered_no_paed = andexanet_ae_df_time_filtered[~andexanet_ae_df_time_filtered["Trust Code"].isin(paed_orgs)][["Date", "Value", "Trust Code", "Trust Name", "Region"]]

andexanet_ae_df_time_filtered_no_paed.to_csv(Path(DATA_DIR, "monthly_doses_andexanet_by_trust.csv"), index=False)


doacs_ae_df_time_filtered_no_paed = doacs_ae_df_time_filtered[~doacs_ae_df_time_filtered["Trust Code"].isin(paed_orgs)]
monthly_sum_doacs = doacs_ae_df_time_filtered_no_paed.groupby(["Date", "Region"])["Value"].sum()
monthly_sum_doacs.to_csv(Path(DATA_DIR, "monthly_ddds_doacs_by_region.csv"))

In [110]:
monthly_doses_andexanet = (
    andexanet_ae_df_time_filtered_no_paed
    .groupby("Date")["Value"]
    .sum()
)
monthly_doses_andexanet.name = "Doses"
monthly_doses_andexanet.to_csv(Path(DATA_DIR, "monthly_doses_andexanet.csv"))

orgs_using_andexanet_monthly = (
    andexanet_ae_df_time_filtered_no_paed
    .groupby("Date")["Trust Code"]
    .nunique()
)
orgs_using_andexanet_monthly.name = "Trust Count"
orgs_using_andexanet_monthly.to_csv(Path(DATA_DIR, "orgs_using_andexanet_monthly.csv"))


monthly_doses_andexanet_by_region = (
    andexanet_ae_df_time_filtered_no_paed
    .groupby(["Region", "Date"])["Value"]
    .sum()
    .unstack(level=0)
    .fillna(0)
)
monthly_doses_andexanet_by_region.to_csv(Path(DATA_DIR, "monthly_doses_andexanet_by_region.csv"))

Now handle the primary care prescribing data for DOACs

In [22]:
doacs_pc = pd.read_csv(Path(DATA_DIR) / "doacs_pc.csv", parse_dates=["month"])
doacs_pc.head()

Unnamed: 0,month,regional_team,nm,strnt_nmrtr_val,strnt_nmrtr_uom,strnt_dnmtr_val,strnt_dnmtr_uom,sum_items,sum_quantity,DDD
0,2019-01-01,Y56,Apixaban 2.5mg tablets,2.5,258684004,,,21696,778047.0,10
1,2019-01-01,Y56,Apixaban 5mg tablets,5.0,258684004,,,28366,1376559.0,10
2,2019-01-01,Y56,Rivaroxaban 10mg tablets,10.0,258684004,,,1241,38242.0,20
3,2019-01-01,Y56,Rivaroxaban 15mg tablets,15.0,258684004,,,10879,211945.0,20
4,2019-01-01,Y56,Rivaroxaban 2.5mg tablets,2.5,258684004,,,157,6077.0,20


In [23]:
doacs_pc["nm"].unique()

array(['Apixaban 2.5mg tablets', 'Apixaban 5mg tablets',
       'Rivaroxaban 10mg tablets', 'Rivaroxaban 15mg tablets',
       'Rivaroxaban 2.5mg tablets', 'Rivaroxaban 20mg tablets',
       'Rivaroxaban 1mg/ml granules for oral suspension sugar free',
       'Rivaroxaban 10mg capsules', 'Rivaroxaban 15mg capsules',
       'Rivaroxaban 20mg capsules',
       'Rivaroxaban 5mg/ml oral suspension sugar free'], dtype=object)

In [24]:
doacs_pc[["nm", "strnt_nmrtr_val", "strnt_nmrtr_uom", "strnt_dnmtr_val","strnt_dnmtr_uom"]].drop_duplicates()

Unnamed: 0,nm,strnt_nmrtr_val,strnt_nmrtr_uom,strnt_dnmtr_val,strnt_dnmtr_uom
0,Apixaban 2.5mg tablets,2.5,258684004,,
1,Apixaban 5mg tablets,5.0,258684004,,
2,Rivaroxaban 10mg tablets,10.0,258684004,,
3,Rivaroxaban 15mg tablets,15.0,258684004,,
4,Rivaroxaban 2.5mg tablets,2.5,258684004,,
5,Rivaroxaban 20mg tablets,20.0,258684004,,
1270,Rivaroxaban 1mg/ml granules for oral suspensio...,1.0,258684004,1.0,258773002.0
3062,Rivaroxaban 10mg capsules,10.0,258684004,,
3064,Rivaroxaban 15mg capsules,15.0,258684004,,
3068,Rivaroxaban 20mg capsules,20.0,258684004,,


There are a couple of rivaroxaban products where the strength is given as a numerator-denominator pair. Ingredient quantity is calculated differently for these.

In [25]:
def get_quantity_grams(row):
    if pd.notna(row["strnt_dnmtr_val"]):
        grams = (row["strnt_nmrtr_val"]/row["strnt_dnmtr_val"]) * row["sum_quantity"]

    else:
        grams = row["strnt_nmrtr_val"] * row["sum_quantity"]

    return grams

doacs_pc["quantity_grams"] = doacs_pc.apply(get_quantity_grams, axis=1)
doacs_pc["quantity_ddds"] = doacs_pc["quantity_grams"]/doacs_pc["DDD"]

In [26]:
doacs_ddds_by_region_pc = doacs_pc.groupby(["month", "regional_team"])[["quantity_ddds"]].sum().reset_index()

In [27]:
region_mapping = {
    "Y61": "East Of England",
    "Y56": "London",
    "Y60": "Midlands",
    "Y63": "North East And Yorkshire",
    "Y62": "North West",
    "Y59": "South East",
    "Y58": "South West"
}

In [28]:
doacs_ddds_by_region_pc["regional_team"] = doacs_ddds_by_region_pc["regional_team"].map(region_mapping)

Merge the primary and secondary care DOACs data.

In [31]:
doacs_all = monthly_sum_doacs.reset_index().merge(doacs_ddds_by_region_pc, left_on=["Date", "Region"], right_on=["month", "regional_team"], how="left")

In [112]:
doacs_all["quantity_pc_sc"] = doacs_all["Value"] + doacs_all["quantity_ddds"]

In [43]:
andexanet_ae_df_time_filtered_no_paed_by_region = andexanet_ae_df_time_filtered_no_paed.groupby(["Date", "Region"])[["Value"]].sum().reset_index()

In [41]:
doacs_all = doacs_all[["Date", "Region", "quantity_pc_sc"]]

In [121]:
measure = andexanet_ae_df_time_filtered_no_paed_by_region.merge(doacs_all, on=["Date", "Region"])

Calculate the measure as number of andexanet vials (secondary care) divided by the number of DDDs (100k) in both primary and secondary care.

In [49]:
measure["quantity_pc_sc_100_000"] = measure["quantity_pc_sc"] / 100_000

In [51]:
measure["andexanet_vials_per_100_000_doac_ddds"] = measure["Value"] / measure["quantity_pc_sc_100_000"]

In [129]:
measure.to_csv(Path(DATA_DIR, "monthly_measure_by_region.csv"), index=False)

In [145]:
measure.pivot(index='Date', columns='Region', values='andexanet_vials_per_100_000_doac_ddds').reset_index().fillna(0).to_csv(Path(DATA_DIR, "measure_by_region.csv"), index=False)