<a href="https://colab.research.google.com/github/gcosma/DECODEclinicalTrialCalc/blob/main/CoopersvFINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math

def calculate_sample_size_binary(p1, p2, alpha=0.05, power=0.9, icc=0.05, cluster_size=6, attrition=0.2):
    """
    Calculate sample size for a cluster-randomized trial with a binary outcome (proportion).

    Parameters:
    -----------
    p1 : float
        Proportion of people in the **control group** whose health needs are identified and met (e.g., 0.5 for 50%).
    p2 : float
        Proportion of people in the **intervention group** whose health needs are identified and met (e.g., 0.75 for 75%).
    alpha : float
        Significance level (default: 0.05).
    power : float
        Statistical power (default: 0.9 for 90% power).
    icc : float
        Intraclass correlation coefficient (default: 0.05).
    cluster_size : float
        Average number of participants per cluster (default: 6).
    attrition : float
        Expected attrition rate (default: 0.2 for 20%).
    """
    # Get z-values
    z_values = {0.01: 2.576, 0.05: 1.96, 0.1: 1.645}
    power_values = {0.8: 0.84, 0.9: 1.28, 0.7: 0.67}

    z_alpha = z_values.get(alpha, 1.96)
    z_beta = power_values.get(power, 1.28)

    # Calculate basic sample size (without clustering)
    p = (p1 + p2) / 2
    base_n = math.ceil(
        ((z_alpha + z_beta) ** 2 * (2 * p * (1 - p))) / ((p2 - p1) ** 2)
    )

    # Design effect for cluster randomization
    design_effect = 1 + (cluster_size - 1) * icc

    # Calculate sample size with clustering
    cluster_adjusted_n = math.ceil(base_n * design_effect)

    # Adjust for attrition
    final_n = math.ceil(cluster_adjusted_n / (1 - attrition))

    # Calculate number of clusters needed
    clusters_per_arm = math.ceil(final_n / (2 * cluster_size))
    total_clusters = clusters_per_arm * 2

    # Calculate participants per arm and total participants
    participants_per_arm = math.ceil(final_n / 2)
    total_participants = participants_per_arm * 2

    return {
        "base_n": base_n,
        "design_effect": design_effect,
        "cluster_adjusted_n": cluster_adjusted_n,
        "final_n": final_n,
        "participants_per_arm": participants_per_arm,
        "clusters_per_arm": clusters_per_arm,
        "total_clusters": total_clusters,
        "total_participants": total_participants
    }

def calculate_sample_size_count(lambda1, lambda2, sigma1, sigma2, alpha=0.05, power=0.9, icc=0.05, cluster_size=6, attrition=0.2):
    """
    Calculate sample size for a cluster-randomized trial with a count outcome.

    Parameters:
    -----------
    lambda1 : float
        Average number of health needs **identified and met per person** in the **control group**.
    lambda2 : float
        Average number of health needs **identified and met per person** in the **intervention group**.
    sigma1 : float
        Standard deviation of health needs in the **control group**.
    sigma2 : float
        Standard deviation of health needs in the **intervention group**.
    alpha : float
        Significance level (default: 0.05).
    power : float
        Statistical power (default: 0.9 for 90% power).
    icc : float
        Intraclass correlation coefficient (default: 0.05).
    cluster_size : float
        Average number of participants per cluster (default: 6).
    attrition : float
        Expected attrition rate (default: 0.2 for 20%).
    """
    # Get z-values
    z_values = {0.01: 2.576, 0.05: 1.96, 0.1: 1.645}
    power_values = {0.8: 0.84, 0.9: 1.28, 0.7: 0.67}

    z_alpha = z_values.get(alpha, 1.96)
    z_beta = power_values.get(power, 1.28)

    # Calculate basic sample size (without clustering)
    base_n = math.ceil(
        ((z_alpha + z_beta) ** 2 * (sigma1 ** 2 + sigma2 ** 2)) / ((lambda2 - lambda1) ** 2)
    )

    # Design effect for cluster randomization
    design_effect = 1 + (cluster_size - 1) * icc

    # Calculate sample size with clustering
    cluster_adjusted_n = math.ceil(base_n * design_effect)

    # Adjust for attrition
    final_n = math.ceil(cluster_adjusted_n / (1 - attrition))

    # Calculate number of clusters needed
    clusters_per_arm = math.ceil(final_n / (2 * cluster_size))
    total_clusters = clusters_per_arm * 2

    # Calculate participants per arm and total participants
    participants_per_arm = math.ceil(final_n / 2)
    total_participants = participants_per_arm * 2

    return {
        "base_n": base_n,
        "design_effect": design_effect,
        "cluster_adjusted_n": cluster_adjusted_n,
        "final_n": final_n,
        "participants_per_arm": participants_per_arm,
        "clusters_per_arm": clusters_per_arm,
        "total_clusters": total_clusters,
        "total_participants": total_participants
    }

def display_parameters_and_results(params, results, outcome_type):
    """
    Display parameters, their explanations, and the results of the calculation.

    Parameters:
    -----------
    params : dict
        Dictionary of parameter values.
    results : dict
        Dictionary of results from the sample size calculation.
    outcome_type : str
        Type of outcome ("binary" or "count").
    """
    # Add headings based on outcome type
    if outcome_type == "binary":
        print("\nBINARY OUTCOME — Proportion of People with New Health Needs Identified and Met")
    elif outcome_type == "count":
        print("\nCOUNT OUTCOME — Total Number of New Health Needs Identified and Met")
    print("=" * 70)

    # Display parameters with explanations
    if outcome_type == "binary":
        print(f"p1: {params['p1']} (Proportion of people in the **control group** whose health needs are identified and met)")
        print(f"p2: {params['p2']} (Proportion of people in the **intervention group** whose health needs are identified and met)")
    elif outcome_type == "count":
        print(f"lambda1: {params['lambda1']} (Average number of health needs **identified and met per person** in the **control group**)")
        print(f"lambda2: {params['lambda2']} (Average number of health needs **identified and met per person** in the **intervention group**)")
        print(f"sigma1: {params['sigma1']} (Standard deviation of health needs in the **control group**)")
        print(f"sigma2: {params['sigma2']} (Standard deviation of health needs in the **intervention group**)")

    print(f"alpha: {params['alpha']} (Significance level, default: 0.05)")
    print(f"power: {params['power']} (Statistical power, default: 0.9 for 90% power)")
    print(f"icc: {params['icc']} (Intraclass correlation coefficient, default: 0.05)")
    print(f"cluster_size: {params['cluster_size']} (Average number of participants per cluster, default: 6)")
    print(f"attrition: {params['attrition']} (Expected attrition rate, default: 0.2 for 20%)")

    print(f"\n{outcome_type.upper()} OUTCOME RESULTS:")
    print("=" * 70)
    print(f"Base sample size (without clustering): {results['base_n']} participants")
    print(f"Design effect: {results['design_effect']:.2f}")
    print(f"Cluster-adjusted sample size: {results['cluster_adjusted_n']} participants")
    print(f"Final sample size after attrition adjustment: {results['final_n']} participants")
    print(f"Required participants per arm: {results['participants_per_arm']}")
    print(f"Required practices (clusters) per arm: {results['clusters_per_arm']}")
    print(f"Total participants needed: {results['total_participants']}")
    print(f"Total practices needed: {results['total_clusters']}")

# Example 1: Binary Outcome (Proportion of People with New Health Needs Identified and Met)
binary_params = {
    "p1": 0.5,   # Proportion in control group
    "p2": 0.75,  # Proportion in intervention group
    "alpha": 0.05,
    "power": 0.9,
    "icc": 0.05,
    "cluster_size": 6,
    "attrition": 0.2
}

binary_results = calculate_sample_size_binary(**binary_params)
display_parameters_and_results(binary_params, binary_results, "binary")

# Example 2: Count Outcome (Total Number of New Health Needs Identified and Met)
count_params = {
    "lambda1": 2.0,  # Average number of health needs in control group
    "lambda2": 3.0,  # Average number of health needs in intervention group
    "sigma1": 1.5,   # Standard deviation in control group
    "sigma2": 1.8,   # Standard deviation in intervention group
    "alpha": 0.05,
    "power": 0.9,
    "icc": 0.05,
    "cluster_size": 6,
    "attrition": 0.2
}

count_results = calculate_sample_size_count(**count_params)
display_parameters_and_results(count_params, count_results, "count")


BINARY OUTCOME — Proportion of People with New Health Needs Identified and Met
p1: 0.5 (Proportion of people in the **control group** whose health needs are identified and met)
p2: 0.75 (Proportion of people in the **intervention group** whose health needs are identified and met)
alpha: 0.05 (Significance level, default: 0.05)
power: 0.9 (Statistical power, default: 0.9 for 90% power)
icc: 0.05 (Intraclass correlation coefficient, default: 0.05)
cluster_size: 6 (Average number of participants per cluster, default: 6)
attrition: 0.2 (Expected attrition rate, default: 0.2 for 20%)

BINARY OUTCOME RESULTS:
Base sample size (without clustering): 79 participants
Design effect: 1.25
Cluster-adjusted sample size: 99 participants
Final sample size after attrition adjustment: 124 participants
Required participants per arm: 62
Required practices (clusters) per arm: 11
Total participants needed: 124
Total practices needed: 22

COUNT OUTCOME — Total Number of New Health Needs Identified and Met
l