In [None]:
import pandas as pd

from pathlib import Path

from config import DATA_DIR

In [None]:
df = pd.read_csv(Path(DATA_DIR) / "andexanet_doacs.csv", dtype={"udfs": float, "strnt_nmrtr_val": float, "strnt_dnmtr_val": float, "vtm": int}, parse_dates=["year_month"])

In [None]:
df["quantity_doses"] = df["quantity"] / df["udfs"]
df["strnt_dnmtr_val"] = df["strnt_dnmtr_val"].fillna(0)
df["quantity_mgs"] = df.apply(lambda row: row["quantity"] * (row["strnt_nmrtr_val"] / row["strnt_dnmtr_val"]) if row["strnt_dnmtr_val"] != 0 else row["quantity"] * (row["strnt_nmrtr_val"]), axis=1)


In [None]:
print(f"Unique VMPs: {df['vmp_name'].unique()}")

In [None]:
print(f"Unique strength numerators: {df['strnt_nmrtr_uom_name'].unique()}")

In [None]:
# this VMP doesn't have a single strength, so we take the mid point
df.loc[df["vmp_name"] == "Rivaroxaban 15mg tablets and Rivaroxaban 20mg tablets", "strnt_nmrtr_val"] = 17.5
df.loc[df["vmp_name"] == "Rivaroxaban 15mg tablets and Rivaroxaban 20mg tablets", "strnt_nmrtr_uom_name"] = "mg"

In [None]:
print(f"Unique strength numerators: {df['strnt_nmrtr_uom_name'].unique()}")

In [None]:
df["quantity_mgs"] = df.apply(lambda row: row["quantity"] * (row["strnt_nmrtr_val"] / row["strnt_dnmtr_val"]) if row["strnt_dnmtr_val"] != 0 else row["quantity"] * (row["strnt_nmrtr_val"]), axis=1)

In [None]:
df.to_csv(Path(DATA_DIR, "andexanet_doacs_processed.csv"), index=False)

In [None]:
vtms = {
    "Andexanet": 783692006, 
    "Apixaban": 774624002,
    "Rivaroxaban": 777455008, 
}

In [None]:
df_andexanet = df[df["vtm"]==vtms["Andexanet"]]
df_andexanet.to_csv(Path(DATA_DIR, "andexanet_processed.csv"), index=False)

In [None]:
df_doacs = df[df["vtm"].isin([vtms["Apixaban"], vtms["Rivaroxaban"]])]

In [None]:
min_date = df["year_month"].min()
max_date = df["year_month"].max()
dates = pd.date_range(start=min_date, end=max_date, freq='MS')

monthly_doses_andexanet = (
    andexanet
    .groupby("year_month")["quantity_doses"]
    .sum()
    .reindex(index=dates, fill_value=0)
)
monthly_doses_andexanet.to_csv(Path(DATA_DIR, "monthly_doses_andexanet.csv"))

orgs_using_andexanet_monthly = (
    andexanet
    .groupby("year_month")["ods_code"]
    .nunique()
    .reindex(index=dates, fill_value=0)
)
orgs_using_andexanet_monthly.to_csv(Path(DATA_DIR, "orgs_using_andexanet_monthly.csv"))

monthly_doses_andexanet_by_region = (
    df_andexanet
    .groupby(["region", "year_month"])["quantity_doses"]
    .sum()
    .unstack(level=0)
    .reindex(index=dates)
    .fillna(0)
)
monthly_doses_andexanet_by_region.to_csv(Path(DATA_DIR, "monthly_doses_andexanet_by_region.csv"))

In [None]:
monthly_mgs_andexanet_by_region = (
    df_andexanet
    .groupby(by=["year_month", "region"])[["quantity_mgs"]]
    .sum()
    .unstack(level='region')
    .reindex(dates, fill_value=0)
    .stack(future_stack=True)
)

monthly_mgs_andexanet_by_region = monthly_mgs_andexanet_by_region.rename(columns={"quantity_mgs": "quantity_andexanet_mgs"})
monthly_mgs_andexanet_by_region.index.names = ["year_month", "region"]
monthly_mgs_andexanet_by_region.to_csv(Path(DATA_DIR, "monthly_mgs_andexanet_by_region.csv"))


monthly_mgs_doacs_by_region = (
    df_doacs
    .groupby(by=["year_month", "region"])[["quantity_mgs"]]
    .sum()
    .unstack(level='region')
    .reindex(dates, fill_value=0)
    .stack(future_stack=True)
)
monthly_mgs_doacs_by_region = monthly_mgs_doacs_by_region.rename(columns={"quantity_mgs": "quantity_doacs_mgs"})
monthly_mgs_doacs_by_region.index.names = ["year_month", "region"]
monthly_mgs_doacs_by_region.to_csv(Path(DATA_DIR, "monthly_mgs_doacs_by_region.csv"))
