In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from pathlib import Path
from scipy.signal import savgol_filter
from IPython.display import display

from config import DATA_DIR, OUTPUT_DIR

Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
andexanet = pd.read_csv(Path(DATA_DIR, "andexanet_processed.csv"), parse_dates=["year_month"])

In [None]:
total_doses_andexanet = andexanet[["quantity_doses", "quantity_mgs"]].sum().astype(int)
total_doses_andexanet.name = "Total"
total_doses_andexanet.index.name = "Unit"
total_doses_andexanet = total_doses_andexanet.map('{:,}'.format)
total_doses_andexanet.to_csv(Path(OUTPUT_DIR, "total_doses_andexanet.csv"))
display(total_doses_andexanet)

In [None]:
monthly_doses_andexanet = pd.read_csv(Path(DATA_DIR, "monthly_doses_andexanet.csv"), index_col=0, parse_dates=True)

In [None]:
def setup_plot(ax, ylabel):
    ax.set_ylabel(ylabel, fontsize=28)
    ax.tick_params(axis='both', labelsize=22, length=10, width=2)
    ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1, 13, 12)))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    ax.spines['bottom'].set_color('black')
    ax.spines['left'].set_color('black')
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)
    ax.margins(x=0)
    ax.grid(True, linestyle='-', linewidth=2)

In [None]:
def smooth_and_plot(data, output_file_name, ylim=100, ylab=""):
    smoothed_doses = pd.Series(savgol_filter(data.values.flatten(), window_length=11, polyorder=3), index=data.index)

    fig, ax = plt.subplots(figsize=(14, 10))
    ax.plot(data.index, data, linewidth=4, alpha=0.5, color='#005AB5', marker='.', markersize=10)
    ax.plot(smoothed_doses.index, smoothed_doses, linewidth=7, alpha=0.9, color='#005AB5')
    ax.vlines(x=pd.to_datetime('2021-05-01'), ymin=0, ymax=600, color='#DC3220', linestyle='--', linewidth=4, alpha=0.7)
    setup_plot(ax, ylab)
    plt.ylim(0, ylim)
    plt.savefig(Path(OUTPUT_DIR, output_file_name), dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()
        
smooth_and_plot(monthly_doses_andexanet, "monthly_doses_andexanet.png", ylim=600, ylab="Quantity (vials)")

In [None]:
orgs_using_andexanet_monthly = pd.read_csv(Path(DATA_DIR, "orgs_using_andexanet_monthly.csv"), index_col=0, parse_dates=True)

In [None]:
andexanet_pre_may_21 = andexanet[andexanet["year_month"] < pd.to_datetime('2021-05-01')]
andexanet_post_may_21 = andexanet[andexanet["year_month"] >= pd.to_datetime('2021-05-01')]
total_orgs_andexanet_pre_may_21 = andexanet_pre_may_21[["ods_code"]].nunique().astype(int)
total_orgs_andexanet_post_may_21 = andexanet_post_may_21[["ods_code"]].nunique().astype(int)
total_orgs_andexanet_all_time = andexanet[["ods_code"]].nunique().astype(int)   
total_orgs_andexanet = pd.concat([total_orgs_andexanet_pre_may_21, total_orgs_andexanet_post_may_21, total_orgs_andexanet_all_time], axis=1)
total_orgs_andexanet.columns = ["Pre May 2021", "Post May 2021", "All time"]
total_orgs_andexanet.index = ["Total"]
total_orgs_andexanet.to_csv(Path(OUTPUT_DIR, "total_orgs_andexanet.csv"))
display(total_orgs_andexanet)

In [None]:
smooth_and_plot(orgs_using_andexanet_monthly, "orgs_using_andexanet_monthly.png", ylim=60, ylab="Number of organisations")

In [None]:
monthly_doses_andexanet_by_region = pd.read_csv(Path(DATA_DIR, "monthly_doses_andexanet_by_region.csv"), index_col=0, parse_dates=True)

In [None]:
def plot_regional_volumes(df, output_file_name, ylim=100, ylab=""):
    
    fig, axes = plt.subplots(3, 3, figsize=(20, 15), sharex=True, sharey=True)
    axes = axes.flatten()

    regions_ordered = [
        "North West", 
        "Yorkshire and The Humber", 
        "North East", 
        "West Midlands",
        "East Midlands",
        "East of England",
        "South West",
        "London",
        "South East"
    ]
    for idx, main_region in enumerate(regions_ordered):
        plot_single_region(axes[idx], df, main_region, ylim)

    fig.text(0.02, 0.5, ylab, va='center', rotation='vertical', fontsize=32)

    for idx in range(9, len(axes)):
        fig.delaxes(axes[idx])

    plt.tight_layout(rect=[0.05, 0.05, 1, 0.95])
    plt.savefig(Path(OUTPUT_DIR, output_file_name), dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()

def plot_single_region(ax, single_volumes_by_region, main_region, ylim=100):
    for region in single_volumes_by_region.columns:
        if region != main_region:
            region_data = single_volumes_by_region[region]
            smoothed_data = pd.Series(savgol_filter(region_data, window_length=11, polyorder=3), index=region_data.index)
            ax.plot(smoothed_data.index, smoothed_data, color='grey', linewidth=3, alpha=0.3)

    main_region_data = single_volumes_by_region[main_region]
    smoothed_main = pd.Series(savgol_filter(main_region_data, window_length=11, polyorder=3), index=main_region_data.index)
    ax.plot(smoothed_main.index, smoothed_main, color='#005AB5', linewidth=5, alpha=0.9)
    ax.plot(main_region_data.index, main_region_data, color='#005AB5', linewidth=3, alpha=0.5, marker='.', markersize=10)
    ax.vlines(x=pd.to_datetime('2021-05-01'), ymin=0, ymax=150, color='#DC3220', linestyle='--', linewidth=3, alpha=0.7)

    ax.set_title(main_region, fontsize=26)
    setup_plot(ax, '')
    ax.set_ylim(0, ylim)

In [None]:
plot_regional_volumes(monthly_doses_andexanet_by_region, "monthly_doses_andexanet_by_region.png", ylim=150, ylab="Quantity (vials)")

In [None]:
monthly_mgs_andexanet_by_region = pd.read_csv(Path(DATA_DIR, "monthly_mgs_andexanet_by_region.csv"), index_col=[0, 1])
monthly_mgs_doacs_by_region = pd.read_csv(Path(DATA_DIR, "monthly_mgs_doacs_by_region.csv"), index_col=[0, 1])

In [None]:
combined = pd.concat([monthly_mgs_andexanet_by_region, monthly_mgs_doacs_by_region], axis=1)

In [None]:
combined["measure"] = (combined["quantity_andexanet_mgs"] / combined["quantity_doacs_mgs"]).fillna(0)

In [None]:
combined = combined[["measure"]].unstack(level='region')
combined.columns = combined.columns.get_level_values('region')
combined.index = pd.to_datetime(combined.index)

In [None]:
plot_regional_volumes(combined, "monthly_relative_andexanet_by_region.png", ylim=0.02, ylab="Andexanet mg / DOAC mg")