# 12 - TOST Power Analysis for Equivalence Testing

**Purpose**: This notebook provides functions for conducting power and sample size analysis for Two One-Sided Tests (TOST), which are used to test for statistical equivalence between two proportions. This is essential for studies aiming to show that a new method is "as good as" a standard one.

**Inputs**: None (this notebook uses theoretical parameters for simulation).

**Outputs**:
- Console output with calculated sample sizes and power.
- Plots visualizing the relationship between sample size, power, and other parameters.

### Key Functions:
1.  ****: Calculates the sample size required for a TOST of two proportions.
2.  ****: Calculates the statistical power of a TOST for given sample sizes.
3.  ****: Visualizes how power changes with sample size.
4.  ****: Creates a heatmap to show required sample sizes across a range of proportions and equivalence margins.
5.  ****: Shows how different allocation ratios between groups affect the total required sample size.
6.  ****: Calculates sample size for TOST of *paired* proportions, accounting for correlation.

In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from typing import Tuple, Optional, Union, List

def tost_proportions_sample_size(
    p0: float,
    delta: float,
    alpha: float = 0.05,
    power: float = 0.8,
    allocation_ratio: float = 1.0
) -> Tuple[int, int]:
    """
    Calculate sample size required for Two One-Sided Tests (TOST) of equivalence
    for proportions.

    Parameters
    ----------
    p0 : float
        Expected proportion in the reference group (between 0 and 1)
    delta : float
        Equivalence margin (the maximum allowed difference between proportions)
    alpha : float, optional
        Significance level (default: 0.05)
    power : float, optional
        Desired power (default: 0.8)
    allocation_ratio : float, optional
        Ratio of sample sizes between groups (n2/n1) (default: 1.0)

    Returns
    -------
    tuple
        (n1, n2) - Sample sizes for group 1 and group 2

    Notes
    -----
    This function implements the sample size calculation for TOST of proportions
    using normal approximation.

    The hypotheses tested in TOST are:
    H0: p1 - p2 ≤ -delta or p1 - p2 ≥ delta
    Ha: -delta < p1 - p2 < delta

    where p1 and p2 are the proportions in groups 1 and 2, respectively,
    and delta is the equivalence margin.
    """
    # Parameter validation
    if not (0 < p0 < 1):
        raise ValueError("p0 must be between 0 and 1")
    if not (0 < delta < 1):
        raise ValueError("delta must be between 0 and 1")
    if not (0 < alpha < 0.5):
        raise ValueError("alpha must be between 0 and 0.5")
    if not (0 < power < 1):
        raise ValueError("power must be between 0 and 1")
    if allocation_ratio <= 0:
        raise ValueError("allocation_ratio must be positive")

    # Calculate critical values
    z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # Calculate variance terms
    var1 = p0 * (1 - p0)  # Variance of reference group
    var2 = (p0 + delta) * (1 - (p0 + delta))  # Approximate variance of test group

    # Calculate sample size for equal allocation
    n_equal = ((z_alpha + z_beta)**2 * (var1 + var2)) / (delta**2)

    # Adjust for allocation ratio
    n1 = n_equal * (1 + allocation_ratio) / allocation_ratio
    n2 = n1 * allocation_ratio

    # Round up to next integer
    n1 = int(np.ceil(n1))
    n2 = int(np.ceil(n2))

    return (n1, n2)

def tost_proportions_power(
    n1: int,
    n2: int,
    p0: float,
    delta: float,
    alpha: float = 0.05
) -> float:
    """
    Calculate power for Two One-Sided Tests (TOST) of equivalence for proportions.

    Parameters
    ----------
    n1 : int
        Sample size for group 1
    n2 : int
        Sample size for group 2
    p0 : float
        Expected proportion in the reference group (between 0 and 1)
    delta : float
        Equivalence margin (the maximum allowed difference between proportions)
    alpha : float, optional
        Significance level (default: 0.05)

    Returns
    -------
    float
        Power of the TOST procedure
    """
    # Parameter validation
    if n1 <= 0 or n2 <= 0:
        raise ValueError("Sample sizes must be positive")
    if not (0 < p0 < 1):
        raise ValueError("p0 must be between 0 and 1")
    if not (0 < delta < 1):
        raise ValueError("delta must be between 0 and 1")
    if not (0 < alpha < 0.5):
        raise ValueError("alpha must be between 0 and 0.5")

    # Calculate critical values
    z_alpha = stats.norm.ppf(1 - alpha)

    # Calculate variance terms
    var1 = p0 * (1 - p0)
    var2 = (p0 + delta) * (1 - (p0 + delta))

    # Calculate standard error of the proportion difference
    se = np.sqrt(var1/n1 + var2/n2)

    # Calculate non-centrality parameter
    ncp = delta / se

    # Calculate power
    power = stats.norm.cdf(ncp - z_alpha) + stats.norm.cdf(-ncp - z_alpha) - 1

    return power

# Example usage
def plot_power_curve(p0: float, delta: float, alpha: float = 0.05) -> None:
    """Plot power curve for different sample sizes."""
    # Range of sample sizes to evaluate
    sample_sizes = np.arange(50, 1001, 50)
    powers = []

    for n in sample_sizes:
        # Calculate power for equal group sizes
        power = tost_proportions_power(n, n, p0, delta, alpha)
        powers.append(power)

    # Create plot
    plt.figure(figsize=(10, 6))
    plt.plot(sample_sizes, powers, 'b-')
    plt.axhline(y=0.8, color='r', linestyle='--', label='Power = 0.8')
    plt.xlabel('Sample Size per Group')
    plt.ylabel('Power')
    plt.title(f'Power Curve for TOST of Proportions (p0={p0}, delta={delta}, alpha={alpha})')
    plt.grid(True)
    plt.legend()
    plt.show()

def plot_sample_size_heatmap(power: float = 0.8, alpha: float = 0.05) -> None:
    """Create a heatmap of required sample sizes for different p0 and delta values."""
    p0_values = np.arange(0.1, 0.91, 0.1)
    delta_values = np.arange(0.025, 0.201, 0.025)

    sample_sizes = np.zeros((len(p0_values), len(delta_values)))

    for i, p0 in enumerate(p0_values):
        for j, delta in enumerate(delta_values):
            n1, _ = tost_proportions_sample_size(p0, delta, alpha, power)
            sample_sizes[i, j] = n1

    # Create plot
    plt.figure(figsize=(10, 8))
    plt.contourf(delta_values, p0_values, sample_sizes, 20, cmap='viridis')
    plt.colorbar(label='Sample Size per Group')
    plt.xlabel('Equivalence Margin (delta)')
    plt.ylabel('Expected Proportion (p0)')
    plt.title(f'Required Sample Size for TOST of Proportions (power={power}, alpha={alpha})')

    # Add contour lines with labels
    contours = plt.contour(delta_values, p0_values, sample_sizes,
                          levels=[50, 100, 200, 500, 1000, 2000],
                          colors='black')
    plt.clabel(contours, inline=True, fontsize=8)

    plt.show()

def plot_allocation_effect(p0: float, delta: float, alpha: float = 0.05, power: float = 0.8) -> None:
    """Plot the effect of allocation ratio on sample sizes."""
    ratios = np.linspace(0.1, 5, 50)
    n1_values = []
    n2_values = []
    total_values = []

    for ratio in ratios:
        n1, n2 = tost_proportions_sample_size(p0, delta, alpha, power, ratio)
        n1_values.append(n1)
        n2_values.append(n2)
        total_values.append(n1 + n2)

    # Create plot
    plt.figure(figsize=(10, 6))
    plt.plot(ratios, n1_values, 'b-', label='Group 1 Size (n1)')
    plt.plot(ratios, n2_values, 'r-', label='Group 2 Size (n2)')
    plt.plot(ratios, total_values, 'g-', label='Total Sample Size (n1+n2)')
    plt.xlabel('Allocation Ratio (n2/n1)')
    plt.ylabel('Sample Size')
    plt.title(f'Effect of Allocation Ratio on Sample Size (p0={p0}, delta={delta}, alpha={alpha}, power={power})')
    plt.grid(True)
    plt.legend()
    plt.show()

# # Example execution
# if __name__ == "__main__":
#     # Calculate sample size for a specific scenario
#     p0 = 0.5
#     delta = 0.1
#     alpha = 0.05
#     power = 0.8
#
#     n1, n2 = tost_proportions_sample_size(p0, delta, alpha, power)
#     print(f"For p0={p0}, delta={delta}, alpha={alpha}, power={power}:")
#     print(f"Required sample sizes: n1={n1}, n2={n2}")
#
#     # Plot power curve
#     plot_power_curve(p0, delta, alpha)
#
#     # Plot sample size heatmap
#     plot_sample_size_heatmap(power, alpha)
#
#     # Plot allocation effect
#     plot_allocation_effect(p0, delta, alpha, power)


In [None]:
p0 = 0.80
delta = 0.05
alpha = 0.05
power = 0.8

n1, n2 = tost_proportions_sample_size(p0, delta, alpha, power)
print(f"For p0={p0}, delta={delta}, alpha={alpha}, power={power}:")
print(f"Required sample sizes: n1={n1}, n2={n2}")

In [None]:
p = tost_proportions_power(
    n1=800,
    n2=800,
    p0=0.80,
    delta=0.05,
    alpha=0.05,
)
p

In [None]:
import numpy as np
from scipy import stats

def sample_size_tost_paired_prop(p1, p2, delta, alpha=0.05, power=0.9, correlation=0):
    """
    Calculate sample size for TOST of paired proportions.

    Parameters:
    -----------
    p1 : float
        Expected proportion in condition 1 (0 to 1)
    p2 : float
        Expected proportion in condition 2 (0 to 1)
    delta : float
        Equivalence margin (maximum difference considered equivalent)
    alpha : float
        Significance level (default: 0.05)
    power : float
        Desired power (default: 0.9)
    correlation : float
        Correlation between paired measurements (-1 to 1, default: 0)

    Returns:
    --------
    n : int
        Required sample size (number of pairs)
    """
    # Calculate z-values for alpha and power
    z_alpha = stats.norm.ppf(1 - alpha)
    z_beta = stats.norm.ppf(power)

    # Calculate standard deviation of the difference
    var1 = p1 * (1 - p1)
    var2 = p2 * (1 - p2)
    cov = correlation * np.sqrt(var1 * var2)
    sd_diff = np.sqrt(var1 + var2 - 2 * cov)

    # Calculate required sample size
    n = ((z_alpha + z_beta)**2 * sd_diff**2) / (delta - abs(p1 - p2))**2

    # Round up to the nearest integer
    return int(np.ceil(n))

# Example 1: Equal proportions (true effect = 0)
p1 = 0.5
p2 = 0.5
delta = 0.1  # 10% difference considered equivalent
alpha = 0.05
power = 0.9
correlation = 0.3

n1 = sample_size_tost_paired_prop(p1, p2, delta, alpha, power, correlation)
print(f"Required sample size (equal proportions): {n1}")


In [None]:
import statsmodels.stats.weightstats as sm
import numpy as np

def verify_power_tost_paired_prop(n, p1, p2, delta, alpha=0.05, correlation=0, simulations=1000):
    """
    Verify power of TOST for paired proportions using simulation.

    Parameters:
    -----------
    n : int
        Sample size (number of pairs)
    p1, p2, delta, alpha, correlation :
        Same as in sample_size_tost_paired_prop function
    simulations : int
        Number of simulations to run

    Returns:
    --------
    achieved_power : float
        Proportion of simulations where equivalence was concluded
    """
    successes = 0

    for _ in range(simulations):
        # Generate correlated binary data
        # (simplified approach - in practice more complex correlation models may be needed)
        latent1 = np.random.normal(0, 1, n)
        latent2 = correlation * latent1 + np.sqrt(1 - correlation**2) * np.random.normal(0, 1, n)

        x1 = (latent1 <= stats.norm.ppf(p1)).astype(int)
        x2 = (latent2 <= stats.norm.ppf(p2)).astype(int)

        # Perform TOST
        pvalue = sm.ttost_paired(x1, x2, -delta, delta)[0]

        if pvalue < alpha:
            successes += 1

    return successes / simulations

verify_power_tost_paired_prop(
    n=300,
    p1=p1,
    p2=p2,
    delta=delta,
    alpha=alpha,
    correlation=correlation,
    simulations=10000
)

In [None]:
from scipy.stats import norm
import numpy as np

def tost_samplesize(p1, p2, alpha, power, margin_low, margin_high, ratio=1):
    z_alpha = norm.ppf(1 - alpha)
    z_beta = norm.ppf(power)
    delta = p1 - p2
    n = 1
    while True:
        n2 = int(n * ratio)
        var = p1*(1-p1)/n + p2*(1-p2)/n2
        se = np.sqrt(var)

        # Calculate non-centrality parameters
        lambda_low = (delta - margin_low)/se
        lambda_high = (margin_high - delta)/se

        # Compute power components
        power_low = norm.cdf(lambda_low - z_alpha)
        power_high = norm.cdf(lambda_high - z_alpha)
        total_power = power_low + power_high - 1

        if total_power >= power:
            return {'n1': n, 'n2': n2}
        n += 1



In [None]:
from math import sqrt, ceil

from scipy.stats import norm


def sd_diff_proportions(p1: float, p2: float, rho: float):
    sd = (
        sqrt(p1*(1-p1) + p2*(1-p2) - 2*rho*sqrt(p1*(1-p1)*p2*(1-p2)))  # = 0.7071
    )
    return sd

sd_diff_proportions(0.5, 0.5, 0.3)

In [None]:
def n_for_tost_paired_props(p1: float, p2: float, rho: float, delta: float, alpha: float, power: float):
    z_alpha = norm.ppf(1-alpha)
    z_beta = norm.ppf(power)

    var_diff = p1*(1-p1) + p2*(1-p2) - 2*rho*sqrt(p1*(1-p1)*p2*(1-p2))
    num = (z_alpha + z_beta)**2 * var_diff
    den = (delta - abs(p1 - p2))**2
    n = ceil(num / den)
    return n

# n_for_tost_paired_props(
#     p1=0.5,
#     p2=0.50,
#     rho=0.3,
#     delta=0.1,
#     alpha=0.05,
#     power=0.9
# )  # n = 300

n_for_tost_paired_props(
    p1=0.45,
    p2=0.50,
    rho=0.0,
    delta=0.15,
    alpha=0.05,
    power=0.8
)  # n = 214

In [None]:
sqrt(0.35)