<a href="https://colab.research.google.com/github/gcosma/DECODEclinicalTrialCalc/blob/main/Coopers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

# Set up aesthetics for charts
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['axes.grid'] = True
try:
    plt.style.use('ggplot')
except:
    pass  # Fall back to default style if ggplot not available

# -----------------------------
# Sample Size Calculator
# -----------------------------

def calculate_sample_size(p1, p2, alpha=0.05, power=0.9, icc=0.05, cluster_size=6, attrition=0.2):
    """
    Calculate sample size for a cluster-randomized trial for health checks.

    Parameters:
    -----------
    p1 : float
        Expected proportion of health needs met in control group (e.g., 0.33 for 33%)
    p2 : float
        Expected proportion of health needs met in intervention group (e.g., 0.66 for 66%)
    alpha : float
        Significance level (default: 0.05)
    power : float
        Statistical power (default: 0.9 for 90% power)
    icc : float
        Intraclass correlation coefficient (default: 0.05)
    cluster_size : float
        Average number of participants per cluster (default: 2.5)
    attrition : float
        Expected attrition rate (default: 0.2 for 20%)

    Returns:
    --------
    dict
        Dictionary containing sample size calculations.
    """

    # Get z-values
    z_values = {0.01: 2.576, 0.05: 1.96, 0.1: 1.645}
    power_values = {0.8: 0.84, 0.9: 1.28, 0.7: 0.67}

    z_alpha = z_values.get(alpha, 1.96)
    z_beta = power_values.get(power, 1.28)  # Default to 90% power

    # Calculate basic sample size (without clustering)
    p = (p1 + p2) / 2
    base_n = math.ceil(
        ((z_alpha + z_beta) ** 2 * (2 * p * (1 - p))) / ((p2 - p1) ** 2)
    )

        # Calculate basic sample size (without clustering) with continuity correction
    #base_n = math.ceil(
     #   ((z_alpha + z_beta) ** 2 * (p1 * (1 - p1) + p2 * (1 - p2))) / ((p2 - p1) ** 2)
   #     + (1 / abs(p2 - p1))  # Continuity correction
   # )


    # Design effect for cluster randomization
    design_effect = 1 + (cluster_size - 1) * icc

    # Calculate sample size with clustering
    cluster_adjusted_n = math.ceil(base_n * design_effect)

    # Adjust for attrition
    final_n = math.ceil(cluster_adjusted_n / (1 - attrition))

    # Calculate number of clusters needed
    clusters_per_arm = math.ceil(final_n / (2 * cluster_size))
    total_clusters = clusters_per_arm * 2

    # Calculate participants per arm and total participants
    participants_per_arm = math.ceil(final_n / 2)
    total_participants = participants_per_arm * 2

    return {
        "base_n": base_n,
        "cluster_adjusted_n": cluster_adjusted_n,
        "final_n": final_n,
        "participants_per_arm": participants_per_arm,
        "clusters_per_arm": clusters_per_arm,
        "total_clusters": total_clusters,
        "total_participants": total_participants,
        "control_participants": participants_per_arm,
        "intervention_participants": participants_per_arm,
        "control_clusters": clusters_per_arm,
        "intervention_clusters": clusters_per_arm,
        "cluster_size": cluster_size
    }

# -----------------------------
# Display Function
# -----------------------------

def display_sample_size_results(result, p1, p2, power, attrition):
    """Display sample size calculation results in a clear, visual way"""
    cluster_size = result['cluster_size']

    print("\nSAMPLE SIZE CALCULATION RESULTS")
    print("=" * 50)
    print(f"Parameters used from Cooper et al. (2014):")
    print(f"- Control group success rate: {p1*100:.0f}%")
    print(f"- Intervention group success rate: {p2*100:.0f}%")
    print(f"- Statistical power: {power*100:.0f}%")
    print(f"- Attrition rate: {attrition*100:.0f}%")
    print(f"- Intraclass correlation coefficient (ICC): 0.05")
    print(f"- Average cluster size: {cluster_size} participants per practice")
    print("=" * 50)

    print(f"\nRequired participants per arm: {result['participants_per_arm']}")
    print(f"Required practices (clusters) per arm: {result['clusters_per_arm']}")
    print(f"Total participants needed: {result['total_participants']}")
    print(f"Total practices needed: {result['total_clusters']}")

    print("\nCalculation breakdown:")
    print(f"1. Basic sample size (without clustering): {result['base_n']} participants")
    print(f"2. Adjusted for clustering effect: {result['cluster_adjusted_n']} participants")
    print(f"3. Final sample after allowing for {attrition*100:.0f}% attrition: {result['final_n']} participants")

# -----------------------------
# Main Function
# -----------------------------

def main():
    print("=" * 80)
    print("SAMPLE SIZE CALCULATOR FOR INTELLECTUAL DISABILITY HEALTH CHECKS")
    print("Based on Cooper et al. (2014) - The Lancet Psychiatry")
    print("=" * 80)

    # Using parameters from Cooper et al. (2014)
    p1 = 0.33  # Expected proportion in control group
    p2 = 0.66  # Expected proportion in intervention group
    power = 0.9  # 90% power
    attrition = 0.2  # 20% attrition
    cluster_size = 6  # Average participants per practice

    # Calculate sample size using the parameters from the paper
    result = calculate_sample_size(p1, p2, power=power, attrition=attrition, cluster_size=cluster_size)

    # Display the results
    display_sample_size_results(result, p1, p2, power, attrition)

# Run the main function
if __name__ == "__main__":
    main()


SAMPLE SIZE CALCULATOR FOR INTELLECTUAL DISABILITY HEALTH CHECKS
Based on Cooper et al. (2014) - The Lancet Psychiatry

SAMPLE SIZE CALCULATION RESULTS
Parameters used from Cooper et al. (2014):
- Control group success rate: 33%
- Intervention group success rate: 66%
- Statistical power: 90%
- Attrition rate: 20%
- Intraclass correlation coefficient (ICC): 0.05
- Average cluster size: 6 participants per practice

Required participants per arm: 39
Required practices (clusters) per arm: 7
Total participants needed: 78
Total practices needed: 14

Calculation breakdown:
1. Basic sample size (without clustering): 49 participants
2. Adjusted for clustering effect: 62 participants
3. Final sample after allowing for 20% attrition: 78 participants
