<a href="https://colab.research.google.com/github/Yizhi-Liang/VOI-QBA/blob/main/VOI_QBA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np

!pip install -q lets-plot
from lets_plot import *
LetsPlot.setup_html()

In [None]:
# 1. Parameters and Setups

# simulation settings
n_sim = 15000
np.random.seed(2026)

# economic and utility
wtp = 150000          # Lambda ($/QALY)
qaly_no_event = 0.8
qaly_event = 0.2
cost_sc = 50000
cost_nt = 60000

# true values
p_sc = 0.3

# Variance of theta function
def get_var_theta(theta, n_sc, n_nt, p_sc=p_sc):
    rr = np.exp(theta)
    p_nt = p_sc * rr

    a = n_sc * p_sc
    var_sc = 1/a - 1/n_sc
    c = n_nt * p_nt
    var_nt = 1/c - 1/n_nt

    return var_nt + var_sc

# study designs
## policy 1: trial
trial_cost=5*1e6
n_trial = 400
n_trial_sc = 0.5*n_trial
n_trial_nt = 0.5*n_trial
tau_trial = 4 # years to results

## polity 2: RWE
bias_b = 0.3
n_rwe=4000
n_rwe_sc=0.5*n_rwe
n_rwe_nt=0.5*n_rwe
delta_tau_rwe=2 # 2 years advance in time

# population
I_t = 10000 # annual cohort
total_yrs = 10 #  time horizon
discount_rate = 0.03
update_rate = 0.3

In [None]:
# 2. Helper Functions

# Posterior update (normal-normal)
def get_posterior_mean_variance(prior_mean, prior_var, data_mean, data_var):

    # Normal-Normal Conjugate
    prior_prec = 1.0 / prior_var
    data_prec = 1.0 / data_var
    post_prec = prior_prec + data_prec

    post_var = 1.0 / post_prec
    post_mean = post_var * (prior_mean * prior_prec + data_mean * data_prec)

    return post_mean, post_var

  # Net benefit
def calculate_net_benefit(theta, cost, qaly_no_event=qaly_no_event, qaly_event=qaly_event, p_sc=p_sc):
    """Calculates NHB = E[QALYs] - Cost/WTP"""
    prob_event = np.exp(theta) * p_sc
    expected_qaly = (1 - prob_event)*qaly_no_event + prob_event*qaly_event
    return expected_qaly - cost/wtp

In [None]:
# Nested benefit
def calculate_nested_benefit(rr_prior_mean, bias_b=bias_b, n_sim=n_sim):

    theta_prior_mean = np.log(rr_prior_mean)
    theta_prior_var = get_var_theta(
        p_sc = p_sc,
        theta = theta_prior_mean,
        n_sc = 100,
        n_nt = 100)

    # Net Benefit of SC (constant)
    nb_sc = calculate_net_benefit(theta=0, cost=cost_sc)

    # Prior Only World
    theta_prior = np.random.normal(theta_prior_mean, np.sqrt(theta_prior_var), n_sim)

    # --- Policy 1: Trial Strategy ---
    # 1. Generate Unbiased Trial Data (X_bar)
    var_trial = get_var_theta(theta_prior, n_trial_sc, n_trial_nt)
    X_bar = np.random.normal(theta_prior, np.sqrt(var_trial))

    # 2. Decision Making (Posterior Belief based on X_bar)
    post_mean_trial, post_var_trial = get_posterior_mean_variance(
        prior_mean=theta_prior_mean,
        prior_var=theta_prior_var,
        data_mean=X_bar,
        data_var=var_trial
    )

    # Calculate Expected NB from the Decision Maker's perspective
    dec_samples_trial = np.random.normal(
        loc=post_mean_trial[:, np.newaxis],
        scale=np.sqrt(post_var_trial[:, np.newaxis]),
        size=(n_sim, n_sim)
    )
    # Perceived Expected NB of NT (Average across posterior samples, axis=1)
    expected_nb_nt_trial = np.mean(calculate_net_benefit(dec_samples_trial, cost_nt), axis=1)

    # 3. Decision Rule: Choose NT if Expected NB > NB_SC
    choose_nt_trial = expected_nb_nt_trial > nb_sc

    # 4. Realized Payoff (Use TRUE NB)
    payoff_trial = np.where(choose_nt_trial, expected_nb_nt_trial, nb_sc)
    nb_trial = np.mean(payoff_trial)

    # 5. Before trial disclosure and during the trial
    nb_nt_trial=np.mean(calculate_net_benefit(theta_prior, cost_nt))

    # --- Policy 2: RWE Strategy ---
    # 1. Hypothetical Trial with sample sizes as RWE
    hypo_var_trial = get_var_theta(theta_prior, n_rwe_sc, n_rwe_nt)
    hypo_X_bar = np.random.normal(theta_prior, np.sqrt(hypo_var_trial))

    # 2. Generate Biased RWE Data (R_bar)
    biased_mean_true = hypo_X_bar + np.log(bias_b)
    var_rwe = get_var_theta(biased_mean_true, n_rwe_sc, n_rwe_nt)
    R_bar = np.random.normal(biased_mean_true, np.sqrt(var_rwe))

    # 3. Decision Making (Posterior Belief based on R_bar)
    post_mean_rwe, post_var_rwe = get_posterior_mean_variance(
        prior_mean=theta_prior_mean,
        prior_var=theta_prior_var,
        data_mean=R_bar,
        data_var=var_rwe
    )

    post_mean_true_rwe, post_var_true_rwe = get_posterior_mean_variance(
        prior_mean=theta_prior_mean,
        prior_var=theta_prior_var,
        data_mean=hypo_X_bar,
        data_var=hypo_var_trial
    )

    # Calculate Expected NB from Decision Maker's perspective
    dec_samples_rwe = np.random.normal(
        loc=post_mean_rwe[:, np.newaxis],
        scale=np.sqrt(post_var_rwe[:, np.newaxis]),
        size=(n_sim, n_sim)
    )

    dec_samples_true_rwe = np.random.normal(
        loc=post_mean_true_rwe[:, np.newaxis],
        scale=np.sqrt(post_var_true_rwe[:, np.newaxis]),
        size=(n_sim, n_sim)
    )

    # Perceived Expected NB of NT
    expected_nb_nt_rwe = np.mean(calculate_net_benefit(dec_samples_rwe, cost_nt), axis=1)
    expected_nb_nt_true_rwe = np.mean(calculate_net_benefit(dec_samples_true_rwe, cost_nt), axis=1)

    # 3. Decision Rule
    choose_nt_rwe = expected_nb_nt_rwe > nb_sc

    # 4. Realized Payoff
    payoff_rwe = np.where(choose_nt_rwe, expected_nb_nt_true_rwe, nb_sc)
    nb_rwe = np.mean(payoff_rwe)

    # 5. Before disclosure of RWE
    nb_nt_rwe = np.mean(calculate_net_benefit(theta_prior, cost_nt))

    return nb_sc, nb_nt_trial, nb_nt_rwe, nb_trial, nb_rwe

In [None]:
# Policy Net Benefit
def get_policy_nb(rr_prior_mean, bias_b=bias_b, delta_tau_rwe=delta_tau_rwe, trial_cost=trial_cost, n_sim=n_sim):

  # NBs
  nb_sc, nb_nt_trial, nb_nt_rwe, nb_trial, nb_rwe = calculate_nested_benefit(rr_prior_mean=rr_prior_mean, bias_b=bias_b, n_sim=n_sim)

  # --- Policy 1: Trial ---
  # Trial population payoff (during trial)
  trial_pop_nb = n_trial_sc * nb_sc + n_trial_nt * nb_nt_trial - trial_cost/wtp

  # Population waiting for trial results (Standard of Care)
  discounted_pop_waiting = np.sum([I_t / ((1 + discount_rate)**t) for t in range(1, tau_trial + 1)])
  before_trial_pop_nb = (discounted_pop_waiting+I_t-n_trial) * nb_sc

  # Population after trial results (t > tau_trial)
  discounted_pop_after = np.sum([I_t / ((1 + discount_rate)**t) for t in range(tau_trial + 1, total_yrs)])
  expected_nb_after = update_rate * nb_trial + (1 - update_rate) * nb_sc
  after_trial_pop_nb = discounted_pop_after * expected_nb_after

  # Total Policy 1 Payoff
  policy_1_total = trial_pop_nb + before_trial_pop_nb + after_trial_pop_nb

  # --- Policy 2: RWE ---
  # 1. Before RWE results
  cutoff_rwe = tau_trial - delta_tau_rwe
  discounted_pop_before_rwe = np.sum([I_t / ((1 + discount_rate)**t) for t in range(0, cutoff_rwe + 1)])
  expected_nb_before_rwe = update_rate * nb_nt_rwe + (1 - update_rate) * nb_sc
  policy_2_before = discounted_pop_before_rwe * expected_nb_before_rwe

  # 2. After RWE results (t > tau - delta_tau)
  discounted_pop_after_rwe = np.sum([I_t / ((1 + discount_rate)**t) for t in range(cutoff_rwe + 1, total_yrs)])
  expected_nb_after_rwe = update_rate * nb_rwe + (1 - update_rate) * nb_sc
  policy_2_after = discounted_pop_after_rwe * expected_nb_after_rwe

  policy_2_total = policy_2_before + policy_2_after

  return policy_1_total, policy_2_total

# Figure 1: Population-level expected net benefit under alternative CED policies as a funciton of RWE bias

In [None]:
scaler=1.3

# Custom colors
colors = {
    'Policy 1: RCT-based CED': '#E41A1B',
    'Policy 2: RWE-based CED': '#377EB8'
}

# Add font family configuration
times_new_roman_theme = theme(
    # title=element_text(size=12*scaler, family="Times New Roman"),
    axis_text_x=element_text(size=10*scaler, family="Times New Roman", angle=0),
    axis_text_y=element_text(size=10*scaler, family="Times New Roman"),
    legend_text=element_text(size=11*scaler, family="Times New Roman"),
    legend_position="bottom",  # Move legend to the bottom
    legend_direction="horizontal",
    legend_title=element_text(size=12*scaler, family="Times New Roman"),
    plot_title=element_text(size=16, family="Times New Roman", face = "bold"),
    plot_subtitle=element_text(size=14, family="Times New Roman"),
    axis_title_x=element_text(size=12*scaler, family="Times New Roman"),
    axis_title_y=element_text(size=12*scaler, family="Times New Roman"),
    text=element_text(size=8, family="Times New Roman")
)

In [None]:
range1 = np.linspace(0.1, 1, 30)
range2 = np.linspace(1, 3, 61)[1:]
bias_b_range = np.concatenate((range1, range2))

## Base True RR = 0.63

In [None]:
# Figure 1: Expected Net Benefits under RWE bias
# Data Preparation

rr_true = 0.63

results_p1 = []
for b in bias_b_range:
    p1, p2 = get_policy_nb(rr_prior_mean=rr_true, bias_b=b, n_sim=n_sim)
    results_p1.append({'bias_b': b, 'Policy 1: RCT-based CED': p1, 'Policy 2: RWE-based CED': p2})

df_res_p1 = pd.DataFrame(results_p1)

df_p1 = pd.melt(df_res_p1, id_vars=['bias_b'],
                  value_vars=['Policy 1: RCT-based CED', 'Policy 2: RWE-based CED'],
                  var_name='Policy Scenario', value_name='Net Benefit')

# Find intersection using linear interpolation
diffs_p1 = df_res_p1['Policy 1: RCT-based CED'] - df_res_p1['Policy 2: RWE-based CED']
idx_p1 = np.where(np.diff(np.sign(diffs_p1)))[0]
all_intersections_p1 = []

for i in idx_p1:
   x1, x2 = df_res_p1.bias_b.iloc[i], df_res_p1.bias_b.iloc[i+1]
   y1, y2 = diffs_p1.iloc[i], diffs_p1.iloc[i+1]

   if not np.isclose(y2 - y1, 0.0):
       intersect_val = x1 + (x2 - x1) * (0 - y1) / (y2 - y1)
       all_intersections_p1.append(intersect_val)

# Filter logic for pib_p1
pib_p1 = []
if len(all_intersections_p1) > 0:
    if len(all_intersections_p1) == 1:
        pib_p1 = all_intersections_p1
    else:
        # Save smallest and largest
        pib_p1 = [min(all_intersections_p1), max(all_intersections_p1)]

pib_p1 = sorted(list(set(pib_p1)))

In [None]:
# Figure 1: Expected Net Benefits under RWE bias
# Save data

fig_dat_path = "/content/drive/MyDrive/Research/02_VOI_RWE_QBA/03_output/stats"
df_p1.to_csv(f"{fig_dat_path}/df_p1.csv", index=False)

# Save intersections
pd.DataFrame({'pib_p1': pib_p1}).to_csv(f"{fig_dat_path}/pib_p1.csv", index=False)

In [None]:
# Figure 1: Expected Net Benefits under RWE bias
# Plotting

# Read files
df_p1 = pd.read_csv(f"{fig_dat_path}/df_p1.csv")
df_p1['Net Benefit'] = df_p1['Net Benefit']/1000

# Read pib_p1 and convert back to list for consistency
pib_p1_df = pd.read_csv(f"{fig_dat_path}/pib_p1.csv")
pib_p1 = pib_p1_df['pib_p1'].tolist()

fig1 = (
    ggplot(df_p1, aes(x="bias_b", y="Net Benefit", color="Policy Scenario"))
    + geom_line(size=1.2)
    + scale_color_manual(values=colors)
    + scale_x_continuous(name="RWE Bias Factor (B)", format=".1f")
    + scale_y_continuous(name="Population Expected Net Benefit (1K QALYs)", format=".2f", limits=(25.00, 25.36))
    + theme_bw()
    + times_new_roman_theme
    + theme(legend_title=element_blank())
)

y_min = df_p1['Net Benefit'].min()

for x_val in pib_p1:
  fig1 += geom_vline(xintercept=x_val, linetype='dashed', color='black')
  fig1 += geom_label(x=x_val, y=y_min, label=f"{x_val:.2f}", color='black', hjust=-0.1, vjust=0, fill='white')

fig1

# Figure 2: Structural interpretation of the policy indifference bias (PIB) threshold using quantitative bias analysis

In [None]:
# Figure 2: PIB using QBA
# Data Preparation

# Read pib_p1
pib_p1_df = pd.read_csv(f"{fig_dat_path}/pib_p1.csv")
pib_p1_vals = pib_p1_df['pib_p1'].tolist()

# Define the bias funciton
def get_bias(p_u_sc, rr_ux, rr_uy):
    if p_u_sc <= 0:
        raise ValueError("p_u_sc must be > 0.")
    num = (1.0 / p_u_sc) + rr_ux * (rr_uy - 1.0)
    den = (1.0 / p_u_sc) + (rr_uy - 1.0)
    return num / den

p_u_sc_const = 0.2
rr_ux_min, rr_ux_max = 0.01, 5.3
rr_uy_min, rr_uy_max = 0.01, 5.3
step = 0.02 # Adjusted step size for better performance

rr_ux_vals = np.round(np.arange(rr_ux_min, rr_ux_max + 1e-12, step), 6)
rr_uy_vals = np.round(np.arange(rr_uy_min, rr_uy_max + 1e-12, step), 6)

df_plot2 = pd.DataFrame({
    "rr_ux": np.repeat(rr_ux_vals, len(rr_uy_vals)),
    "rr_uy": np.tile(rr_uy_vals, len(rr_ux_vals)),
})
df_plot2["bias"] = get_bias(p_u_sc_const, df_plot2["rr_ux"].to_numpy(), df_plot2["rr_uy"].to_numpy())

# Determine Policy Zones
if len(pib_p1_vals) >= 2:
    pib_min = min(pib_p1_vals)
    pib_max = max(pib_p1_vals)

    df_plot2['Policy_Zone'] = np.where(
        (df_plot2['bias'] > pib_min) & (df_plot2['bias'] < pib_max),
        'Policy 2: RWE-based CED',
        'Policy 1: RCT-based CED'
    )
else:
    pib_val = pib_p1_vals[0] if pib_p1_vals else NaN
    if pib_val > 1.0:
         df_plot2['Policy_Zone'] = np.where(df_plot2['bias'] < pib_val, 'Policy 2: RWE-based CED', 'Policy 1: RCT-based CED')
    else:
         df_plot2['Policy_Zone'] = np.where(df_plot2['bias'] > pib_val, 'Policy 2: RWE-based CED', 'Policy 1: RCT-based CED')


# Generate curves for each PIB
curves_data = []
a = 1.0 / p_u_sc_const

for val in pib_p1_vals:
    # Curve has vertical asymptote at rr_ux = val
    # Segments logic
    x_left = np.linspace(rr_ux_min, val - 0.05, 100)
    y_left = 1.0 + a * (val - 1.0) / (x_left - val)

    x_right = np.linspace(val + 0.05, rr_ux_max, 100)
    y_right = 1.0 + a * (val - 1.0) / (x_right - val)

    # Determine Line Type label
    # User: "dashed line if PIB ≈ 1.34; the dotted line if PIB ≈ 0.82"
    lt_label = "PIB ≈ 0.82" if val < 1.0 else "PIB ≈ 1.34"

    df_left = pd.DataFrame({"rr_ux": x_left, "rr_uy": y_left, "PIB_Val": val, "Segment": "Left", "LineType": lt_label})
    df_right = pd.DataFrame({"rr_ux": x_right, "rr_uy": y_right, "PIB_Val": val, "Segment": "Right", "LineType": lt_label})

    tmp_df = pd.concat([df_left, df_right])

    # Filter within bounds
    tmp_df = tmp_df[(tmp_df["rr_uy"] >= rr_uy_min) & (tmp_df["rr_uy"] <= rr_uy_max)]

    if not tmp_df.empty:
        curves_data.append(tmp_df)

df_curves = pd.concat(curves_data) if curves_data else pd.DataFrame(columns=["rr_ux", "rr_uy", "PIB_Val", "Segment", "LineType"])
df_curves['Group_ID'] = df_curves['PIB_Val'].astype(str) + "_" + df_curves['Segment']

In [None]:
# Figure 2: PIB using QBA

# Define Linetypes mapping
linetypes_map = {
    "PIB ≈ 0.82": "dotted",  # User: "dotted line if PIB ≈ 0.82"
    "PIB ≈ 1.34": "dashed"   # User: "dashed line if PIB ≈ 1.34"
}

# Plotting
fig2 = (
    ggplot(df_plot2, aes("rr_ux", "rr_uy"))
    + geom_tile(aes(fill="Policy_Zone"))
    + scale_fill_manual(values=colors)
    + geom_line(data=df_curves, mapping=aes("rr_ux", "rr_uy", group="Group_ID", linetype="LineType"),
                color="black", size=1.0)
    + geom_text(x=1.7, y=2.2, label=f"PIB ≈ {pib_p1_vals[1]:.2f}", color='black')
    + geom_text(x=1.0, y=1.3, label=f"PIB ≈ {pib_p1_vals[0]:.2f}", color='black')

    + geom_segment(x=2.0, y=2.3, xend=2.3, yend=2.6,
                   arrow=arrow(type='closed', length=10, angle=20), color='black', size=0.5)
    + geom_segment(x=0.7, y=1.5, xend=0.2, yend=2.3,
                   arrow=arrow(type='closed', length=10, angle=20), color='black', size=0.5)
    + geom_segment(x=1.3, y=1.1, xend=2.0, yend=0.4,
                   arrow=arrow(type='closed', length=10, angle=20), color='black', size=0.5)

    + scale_linetype_manual(values=linetypes_map, guide='none')
    + coord_cartesian(xlim=(0.0, 5.2), ylim=(0.0, 5.2))
    + labs(
        x="Confounder-Treatment Association",
        y="Confounder-Outcome Association",
        fill="Policy"
    )
    + scale_x_continuous(format='.1f', limits=(0.0, 5.2), breaks=[1, 2, 3, 4, 5], expand=[0, 0])
    + scale_y_continuous(format='.1f', limits=(0.0, 5.2), breaks=[0, 1, 2, 3, 4, 5], expand=[0, 0])
    + theme_bw()
    + times_new_roman_theme
    + theme(legend_box="vertical", legend_title=element_blank())
)

fig2

# Figure 3: Sensitivity of the policy indifference bias (PIB) to the timing advantage of RWE and trial costs.

In [None]:
# Figure 3: Faceted Plot (3x3)
# Data Preparation

# Parameters for the grid
timing_options = [1, 2, 3]
cost_options = [0*1e6, 5*1e6, 10*1e6]
cost_labels = {0*1e6: "Low ($0)", 5*1e6: "Mid ($5M)", 10*1e6: "High ($10M)"}

records_grid = []
pib_records = []

for dt in timing_options:
    for cost in cost_options:
        p1_vals = []
        p2_vals = []
        biases = []

        # Generate data for this panel
        for b in bias_b_range:
            p1, p2 = get_policy_nb(rr_prior_mean=rr_true, bias_b=b, delta_tau_rwe=dt, trial_cost=cost, n_sim=n_sim)

            val_p1 = np.mean(p1) / 1e3
            val_p2 = np.mean(p2) / 1e3

            p1_vals.append(val_p1)
            p2_vals.append(val_p2)
            biases.append(b)

            records_grid.append({
                "bias_b": b,
                "Net Benefit": val_p1,
                "Policy": "Policy 1: RCT-based CED",
                "Timing": f"Advantage: {dt} Year(s)",
                "Cost": cost_labels[cost]
            })
            records_grid.append({
                "bias_b": b,
                "Net Benefit": val_p2,
                "Policy": "Policy 2: RWE-based CED",
                "Timing": f"Advantage: {dt} Year(s)",
                "Cost": cost_labels[cost]
            })

        # Find intersection for this panel
        diffs = np.array(p1_vals) - np.array(p2_vals)
        idx = np.where(np.diff(np.sign(diffs)))[0]

        current_intersects = []
        for i in idx:
            x1, x2 = biases[i], biases[i+1]
            y1, y2 = diffs[i], diffs[i+1]
            if not np.isclose(y2 - y1, 0.0):
                sect_x = x1 + (x2 - x1) * (0 - y1) / (y2 - y1)
                current_intersects.append(sect_x)

        pib_low = np.nan
        pib_high = np.nan

        if current_intersects:
            lows = [x for x in current_intersects if x < 1.0]
            highs = [x for x in current_intersects if x > 1.0]
            if lows: pib_low = min(lows)
            if highs: pib_high = max(highs)

        pib_records.append({
            "Timing": f"Advantage: {dt} Year(s)",
            "Cost": cost_labels[cost],
            "PIB_Low": pib_low,
            "PIB_High": pib_high
        })

# Re-create dataframes from the original lists to ensure clean state
df_grid = pd.DataFrame(records_grid)
df_pib = pd.DataFrame(pib_records)

In [None]:
# Figure 3: Faceted Plot (3x3)
# Save data

df_grid.to_csv(f"{fig_dat_path}/df_grid_p3.csv", index=False)
df_pib.to_csv(f"{fig_dat_path}/df_pib_p3.csv", index=False)

In [None]:
# Figure 3: Faceted Plot (3x3)
# Plotting

df_grid = pd.read_csv(f"{fig_dat_path}/df_grid_p3.csv")
df_pib = pd.read_csv(f"{fig_dat_path}/df_pib_p3.csv")

cost_order = ["Low ($0)", "Mid ($5M)", "High ($10M)"]
df_grid['Cost'] = pd.Categorical(df_grid['Cost'], categories=cost_order, ordered=True)
df_grid = df_grid.sort_values(by=['Timing', 'Cost'])
df_pib['Cost'] = pd.Categorical(df_pib['Cost'], categories=cost_order, ordered=True)
df_pib = df_pib.sort_values(by=['Timing', 'Cost'])

# Prepare long format for vertical lines
df_int = pd.melt(df_pib, id_vars=['Timing', 'Cost'], value_vars=['PIB_Low', 'PIB_High'], value_name='x_intercept')
df_int = df_int.dropna() # Remove NaNs
df_int['label'] = df_int['x_intercept'].apply(lambda x: f"{x:.2f}")

# Calculate Y positions to separate labels vertically
y_min = df_grid["Net Benefit"].min()
y_max = df_grid["Net Benefit"].max()
y_range = y_max - y_min

df_int['y_pos'] = df_int['variable'].apply(lambda x: y_min if x == 'PIB_Low' else y_min + 0.15 * y_range)

fig3 = (
    ggplot(df_grid, aes(x="bias_b", y="Net Benefit", color="Policy"))
    + geom_line(size=1.0)
    + facet_grid(x="Cost", y="Timing", x_order=0)
    + scale_color_manual(values=colors)

    + geom_vline(data=df_int, mapping=aes(xintercept="x_intercept"), linetype="dashed", color="black")
    + geom_label(data=df_int, mapping=aes(x="x_intercept", y="y_pos", label="label"),
                color="black", hjust=-0.1, vjust=0, fill="white")

    + labs(x="RWE Bias Factor (B)",
           y="Population Expected Net Benefit ($ Billions)")
    + theme_bw()
    + times_new_roman_theme
    + theme(legend_title=element_blank(), strip_text=element_text(size=12*scaler))
)

fig3

# Figure 4: Sensitivity of the policy indifference bias (PIB) to the expected treatment effect under current evidence (base-case RR).

In [None]:
# Figure 4: Sensitivity to treatment effect
# Data preparation and Plotting

rr_range = [0.5, rr_true, 0.8]
labels_map = {
    0.5: "Clearly Cost-Effective (True RR = 0.5)",
    0.63: "Marginal Cost-Effective (True RR = 0.63)",
    0.8: "Not Cost-Effective (True RR = 0.8)"
}

results_fig4 = []
pib_fig4 = []

# Order for plotting
scenario_order = [
    "Clearly Cost-Effective (True RR = 0.5)",
    "Marginal Cost-Effective (True RR = 0.63)",
    "Not Cost-Effective (True RR = 0.8)"
]

for rr in rr_range:
    p1_vals = []
    p2_vals = []
    biases = []

    label = labels_map[rr]

    for b in bias_b_range:
        p1, p2 = get_policy_nb(rr_prior_mean=rr, bias_b=b, n_sim=n_sim)

        # Convert to 1K QALYs to match Figure 1 scale
        val_p1 = p1 / 1000
        val_p2 = p2 / 1000

        results_fig4.append({
            "bias_b": b,
            "Net Benefit": val_p1,
            "Policy": "Policy 1: RCT-based CED",
            "Scenario": label
        })
        results_fig4.append({
            "bias_b": b,
            "Net Benefit": val_p2,
            "Policy": "Policy 2: RWE-based CED",
            "Scenario": label
        })

        p1_vals.append(val_p1)
        p2_vals.append(val_p2)
        biases.append(b)

    # Find intersections
    diffs = np.array(p1_vals) - np.array(p2_vals)
    idx = np.where(np.diff(np.sign(diffs)))[0]

    current_intersects = []
    for i in idx:
        x1, x2 = biases[i], biases[i+1]
        y1, y2 = diffs[i], diffs[i+1]
        if not np.isclose(y2 - y1, 0.0):
            sect_x = x1 + (x2 - x1) * (0 - y1) / (y2 - y1)
            current_intersects.append(sect_x)

    # Filter intersections based on user request
    final_intersects = []
    if current_intersects:
        if rr == 0.5:
            # Only keep the largest PIB
            final_intersects = [max(current_intersects)]
        elif rr == 0.63:
            # Keep the least and the largest
            final_intersects = [min(current_intersects), max(current_intersects)]
            final_intersects = sorted(list(set(final_intersects)))
        else:
            # For 0.8 (Not Cost-Effective), keep all if any
            final_intersects = current_intersects

    # Store intersections found
    for x_val in final_intersects:
        pib_fig4.append({
            "Scenario": label,
            "x_intercept": x_val,
            "label": f"{x_val:.2f}"
        })

df_fig4 = pd.DataFrame(results_fig4)
df_pib_fig4 = pd.DataFrame(pib_fig4)

# Set order
df_fig4['Scenario'] = pd.Categorical(df_fig4['Scenario'], categories=scenario_order, ordered=True)
if not df_pib_fig4.empty:
    df_pib_fig4['Scenario'] = pd.Categorical(df_pib_fig4['Scenario'], categories=scenario_order, ordered=True)

# Save data
df_fig4.to_csv(f"{fig_dat_path}/df_fig4.csv", index=False)
df_pib_fig4.to_csv(f"{fig_dat_path}/df_pib_fig4.csv", index=False)

In [None]:
# Plotting
# Read back to ensure clean state
df_fig4 = pd.read_csv(f"{fig_dat_path}/df_fig4.csv")
df_pib_fig4 = pd.read_csv(f"{fig_dat_path}/df_pib_fig4.csv") if not df_pib_fig4.empty else pd.DataFrame(columns=['Scenario', 'x_intercept', 'label'])

# Enforce order again
df_fig4['Scenario'] = pd.Categorical(df_fig4['Scenario'], categories=scenario_order, ordered=True)
if not df_pib_fig4.empty:
    df_pib_fig4['Scenario'] = pd.Categorical(df_pib_fig4['Scenario'], categories=scenario_order, ordered=True)

fig4 = (
    ggplot(df_fig4, aes(x="bias_b", y="Net Benefit", color="Policy"))
    + geom_line(size=1.2)
    + facet_grid(x="Scenario", x_order=0, scales="free_y")
    + scale_color_manual(values=colors)
    + scale_x_continuous(name="RWE Bias Factor (B)", format=".1f")
    + scale_y_continuous(name="Population Expected Net Benefit (1K QALYs)",
                         format=".1f",
                         limits=(24.0, 25.8))
    + theme_bw()
    + times_new_roman_theme
    + theme(legend_title=element_blank(), strip_text=element_text(size=12*scaler))
)

if not df_pib_fig4.empty:
    fig4 += geom_vline(data=df_pib_fig4, mapping=aes(xintercept="x_intercept"), linetype="dashed", color="black")
    fig4 += geom_label(data=df_pib_fig4, mapping=aes(x="x_intercept", label="label"),
                      y=24.0, color="black", hjust=-0.1, vjust=0)

fig4

# Save figures

In [None]:
figure_path = "/content/drive/MyDrive/Research/02_VOI_RWE_QBA/03_output/figure"

ggsave(plot=fig1, filename=f"{figure_path}/01_NB_Bias.html")
ggsave(plot=fig2, filename=f"{figure_path}/02_Bias_Interaction.html")
ggsave(plot=fig3, filename=f"{figure_path}/03_Time_Advantage.html")
ggsave(plot=fig4, filename=f"{figure_path}/04_Sensitivity_RR.html")