# 11 - Test-Retest Reliability Analysis

**Purpose**: This notebook provides a suite of tools for conducting power analysis and sample size estimation for test-retest reliability studies, specifically using the Intraclass Correlation Coefficient (ICC).

**Inputs**: None (this notebook uses simulated data and theoretical parameters).

**Outputs**:
- Console output with calculated sample size and power for example parameters.
- A plot visualizing the relationship between sample size and statistical power.
- A sample of simulated test-retest data and the corresponding ICC calculation results.

### Key Functions:
1.  ****: Determines the required number of subjects to achieve a desired statistical power for detecting a specific ICC value.
2.  ****: Calculates the statistical power for a given sample size and expected ICC.
3.  ****: Generates a plot to visualize the trade-off between sample size and power.
4.  ****: Creates a synthetic dataset with a predefined ICC value, useful for validating the calculation methods.
5.  ****: Uses the  library to compute the ICC from a long-format DataFrame.

### 11.1 ICC-Based Sample Size and Power Functions

This cell defines all the core functions for the test-retest reliability analysis...


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd



def calculate_sample_size_icc(icc0, icc1, alpha=0.05, power=0.80, k=2):
    """
    Calculate the required sample size for test-retest reliability using ICC.

    Parameters:
    -----------
    icc0 : float
        Minimum acceptable ICC value (null hypothesis)
    icc1 : float
        Expected ICC value (alternative hypothesis)
    alpha : float, optional
        Significance level (Type I error rate), default is 0.05
    power : float, optional
        Desired statistical power (1 - Type II error rate), default is 0.80
    k : int, optional
        Number of repetitions or raters per subject, default is 2 for test-retest

    Returns:
    --------
    n : int
        Required sample size (number of subjects)
    """
    # Calculate z-values for alpha and power
    z_alpha = stats.norm.ppf(1 - alpha/2)  # Two-tailed test
    z_beta = stats.norm.ppf(power)

    # Fisher's z-transformation of ICC values
    f_icc0 = 0.5 * np.log((1 + icc0) / (1 - icc0))
    f_icc1 = 0.5 * np.log((1 + icc1) / (1 - icc1))

    # Calculate variance based on the formula by Walter et al.
    var = (1 + (k - 1) * icc0) * (1 - icc0) / (k * (1 - icc0**2))

    # Calculate required sample size
    n = (z_alpha + z_beta)**2 * var / (f_icc1 - f_icc0)**2

    return int(np.ceil(n))  # Round up to the nearest integer

def calculate_power_icc(n, icc0, icc1, alpha=0.05, k=2):
    """
    Calculate the statistical power for test-retest reliability using ICC.

    Parameters:
    -----------
    n : int
        Sample size (number of subjects)
    icc0 : float
        Minimum acceptable ICC value (null hypothesis)
    icc1 : float
        Expected ICC value (alternative hypothesis)
    alpha : float, optional
        Significance level (Type I error rate), default is 0.05
    k : int, optional
        Number of repetitions or raters per subject, default is 2 for test-retest

    Returns:
    --------
    power : float
        Statistical power (1 - Type II error rate)
    """
    # Calculate z-value for alpha
    z_alpha = stats.norm.ppf(1 - alpha/2)  # Two-tailed test

    # Fisher's z-transformation of ICC values
    f_icc0 = 0.5 * np.log((1 + icc0) / (1 - icc0))
    f_icc1 = 0.5 * np.log((1 + icc1) / (1 - icc1))

    # Calculate variance
    var = (1 + (k - 1) * icc0) * (1 - icc0) / (k * (1 - icc0**2))

    # Calculate non-centrality parameter
    ncp = np.sqrt(n) * (f_icc1 - f_icc0) / np.sqrt(var)

    # Calculate power
    power = 1 - stats.norm.cdf(z_alpha - ncp) + stats.norm.cdf(-z_alpha - ncp)

    return power

def plot_sample_size_vs_power(icc0, icc1, alpha=0.05, k=2, max_n=200):
    """
    Plot the relationship between sample size and power for ICC-based reliability.

    Parameters:
    -----------
    icc0 : float
        Minimum acceptable ICC value (null hypothesis)
    icc1 : float
        Expected ICC value (alternative hypothesis)
    alpha : float, optional
        Significance level (Type I error rate), default is 0.05
    k : int, optional
        Number of repetitions or raters per subject, default is 2 for test-retest
    max_n : int, optional
        Maximum sample size to plot, default is 200
    """
    sample_sizes = np.arange(10, max_n + 1, 10)
    powers = [calculate_power_icc(n, icc0, icc1, alpha, k) for n in sample_sizes]

    plt.figure(figsize=(10, 6))
    plt.plot(sample_sizes, powers, '-o')
    plt.axhline(y=0.8, color='r', linestyle='--', label='Power = 0.8')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xlabel('Sample Size (Number of Subjects)')
    plt.ylabel('Statistical Power')
    plt.title(f'Sample Size vs. Power for ICC (icc0={icc0}, icc1={icc1}, alpha={alpha}, k={k})')
    plt.legend()
    plt.tight_layout()
    plt.show()


def simulate_test_retest_data(n, icc, k=2, mean=100, sd=15):
    """
    Simulate test-retest data with a specified ICC.

    Parameters:
    -----------
    n : int
        Number of subjects
    icc : float
        Target ICC value
    k : int, optional
        Number of repetitions, default is 2
    mean : float, optional
        Grand mean of the measurements, default is 100
    sd : float, optional
        Standard deviation of the measurements, default is 15

    Returns:
    --------
    data : pandas.DataFrame
        Simulated test-retest data in long format
    """
    # Calculate between-subject and within-subject variance components
    var_total = sd**2
    var_between = icc * var_total
    var_within = var_total - var_between

    # Generate subject effects (random intercepts)
    subject_effects = np.random.normal(0, np.sqrt(var_between), n)

    # Create empty list for results
    data = []

    # Generate k measurements for each subject
    for i in range(n):
        for j in range(k):
            # Generate error term
            error = np.random.normal(0, np.sqrt(var_within))

            # Calculate observed score
            score = mean + subject_effects[i] + error

            # Add to data list
            data.append({
                'subject': i + 1,
                'measurement': j + 1,
                'score': score
            })

    return pd.DataFrame(data)

# This function requires the pingouin package: pip install pingouin
def calculate_icc_from_data(data):
    """
    Calculate ICC from test-retest data using pingouin.

    Parameters:
    -----------
    data : pandas.DataFrame
        Test-retest data in long format with columns 'subject', 'measurement', 'score'

    Returns:
    --------
    icc_results : pandas.DataFrame
        DataFrame containing ICC values and related statistics
    """
    import pingouin as pg

    # Calculate ICC
    icc_results = pg.intraclass_corr(data=data,
                                     targets='subject',
                                     raters='measurement',
                                     ratings='score')

    return icc_results


### 11.2 Example Usage and Demonstrations

This cell provides a comprehensive demonstration of all the functions defined above...


In [None]:
# Example parameters
icc0 = 0.70  # Minimum acceptable ICC
icc1 = 0.85  # Expected ICC
alpha = 0.05  # Type I error rate
power = 0.80  # Desired power
k = 2  # Number of repetitions (test-retest)

# Example 1: Calculate required sample size
n = calculate_sample_size_icc(icc0, icc1, alpha, power, k)
print(f"Required sample size: {n} subjects")

# Example 2: Calculate power for a given sample size
n_given = 50
actual_power = calculate_power_icc(n_given, icc0, icc1, alpha, k)
print(f"Power with {n_given} subjects: {actual_power:.4f}")

# Example 3: Plot sample size vs. power
plot_sample_size_vs_power(icc0, icc1, alpha, k, max_n=100)

# Example 4: Simulate test-retest data and calculate ICC
n_sim = 30
true_icc = 0.80
simulated_data = simulate_test_retest_data(n_sim, true_icc, k=2)
print("\nSimulated test-retest data (first 5 rows):")
print(simulated_data.head())

# Example 5: Calculate ICC from the simulated data
try:
    import pingouin as pg
    icc_results = calculate_icc_from_data(simulated_data)
    print("\nICC calculation results:")
    print(icc_results)
except ImportError:
    print("\nThe pingouin package is required for ICC calculation.")
    print("Please install it using: pip install pingouin")
