In [1]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import pingouin as pg
import scipy.stats
from scipy.stats import multivariate_normal as mvn

In [2]:
##getting rid of convergence warnings
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
    warnings.simplefilter(action='ignore', category=FutureWarning)

This notebook checks whether the more complex models which may be used for hypothesis testing will be sufficiently powered
<p>i.e., models which add covariates (age, sex, digit span), by-participant random slopes, and random effects of stimuli/fractals
<br>
<p>Will test this with data that simulates these effects (as the extent of power loss depends on their presence in the data)

In [3]:
#this requires a new way of simulating data
    #recreates structures that mixed effects model will model

# Parameters
np.random.seed(42)
condition_names = ['disgust', 'fear', 'points']  # Names of experimental conditions 
mean_age, age_sd = 45, 15  # Mean and SD of participant age - using numbers from Mehrhof and Nord 2024- used the same recruitment method
n_reps = 10000 # Number of simulations for power estimation
n_participants=323 #number of participants from initial power analysis

# Effect sizes
true_intercept = 5          # Baseline intercept
true_slope_condition = 0.2
true_slope_age = -0.05      # Age effect
true_slope_sex = 0.005      # Sex effect (1 for male, 0 for female)

# Variability
sd_intercept_participant = 1  # Random intercept for participants
sd_slope_condition = 0.5        # Random slope for condition per participant
sd_residual = 0.5               # Residual error

In [4]:
# Function to simulate a single dataset and fit the simplest model
n_conditions = len(condition_names)   # Number of experimental conditions
def simple_model_simulate_and_fit():
    # Simulate participant data
    participant_data = pd.DataFrame({
        'participant_id': np.arange(n_participants),
        'age': np.random.normal(mean_age, age_sd, n_participants),  # Age centered around mean_age with age_sd
        'sex': np.random.choice([0, 1], size=n_participants),        # Randomly assign male/female
        'fractals': np.random.choice(range(1,29), size=n_participants), #randomly assign fractals used
        'digit_span': np.random.normal(7, 1.5, n_participants) #guessed mean and sdev on prev lit        
    })
    # Create trial-level data
    data = pd.DataFrame({
        'participant_id': np.repeat(participant_data['participant_id'], n_conditions),
        'condition': np.tile(condition_names, n_participants)
    })
    # Merge with participant data
    data = data.merge(participant_data, on='participant_id')
    # Create a mapping dictionary based on condition names
    condition_map = {name: i for i, name in enumerate(condition_names)}
    # Encode condition using the mapping
    data['condition_num'] = data['condition'].map(condition_map)
    
    # Random effects
    random_intercepts = np.random.normal(0, sd_intercept_participant, n_participants)
    random_slopes = np.random.normal(0, sd_slope_condition, n_participants)
    data['random_intercept'] = data['participant_id'].map(lambda x: random_intercepts[x])
    data['random_slope'] = data['participant_id'].map(lambda x: random_slopes[x])
    
    # Simulate outcome variable
    data['outcome'] = (
        true_intercept
        + data['random_intercept']
        + data['condition_num'] * (true_slope_condition + data['random_slope'])
        + data['age'] * true_slope_age
        + data['sex'] * true_slope_sex
        + np.random.normal(0, sd_residual, len(data))  # Residual error
    )

    conditions = [data['condition']=='disgust',data['condition']=='fear',data['condition']=='points']
    choices = [np.random.randint(1,6, size=len(data)),np.random.randint(6,11, size=len(data)),11]

    data['feedback_details']=np.select(conditions, choices, n_participants)
    
    # Fit mixed-effects model
    formula = 'outcome ~ C(condition_num)'
    model=smf.mixedlm(formula, data, groups=data['participant_id'], missing='drop')
    result = model.fit()
    return result, data

In [5]:
# Perform repeated simulations for power estimation
power_results = []
for i in range(n_reps):
    result, _ = simple_model_simulate_and_fit()
    power_results.append(result.pvalues['C(condition_num)[T.1]'] < 0.05) # Check if condition effect is significant
# Calculate and display estimated power
estimated_power = np.mean(power_results)
print(f"\nEstimated Power for condition effect: {estimated_power:.2f}")


Estimated Power for condition effect: 0.97


In [None]:
#do the same for a medium model
    #just includes effects used to simulate the data

In [6]:
def medium_model_simulate_and_fit():
    # Simulate participant data
    participant_data = pd.DataFrame({
        'participant_id': np.arange(n_participants),
        'age': np.random.normal(mean_age, age_sd, n_participants),  # Age centered around mean_age with age_sd
        'sex': np.random.choice([0, 1], size=n_participants)        # Randomly assign male/female
    })
    # Create trial-level data
    data = pd.DataFrame({
        'participant_id': np.repeat(participant_data['participant_id'], n_conditions),
        'condition': np.tile(condition_names, n_participants)
    })
    # Merge with participant data
    data = data.merge(participant_data, on='participant_id')
    # Create a mapping dictionary based on condition names
    condition_map = {name: i for i, name in enumerate(condition_names)}
    # Encode condition using the mapping
    data['condition_num'] = data['condition'].map(condition_map)
    
    # Random effects
    random_intercepts = np.random.normal(0, sd_intercept_participant, n_participants)
    random_slopes = np.random.normal(0, sd_slope_condition, n_participants)
    data['random_intercept'] = data['participant_id'].map(lambda x: random_intercepts[x])
    data['random_slope'] = data['participant_id'].map(lambda x: random_slopes[x])
    
    # Simulate outcome variable
    data['outcome'] = (
        true_intercept
        + data['random_intercept']
        + data['condition_num'] * (true_slope_condition + data['random_slope'])
        + data['age'] * true_slope_age
        + data['sex'] * true_slope_sex
        + np.random.normal(0, sd_residual, len(data))  # Residual error
    )
    
    # Fit mixed-effects model
    model = smf.mixedlm("outcome ~ C(condition_num)+age+sex", data,
                        groups=data["participant_id"], re_formula='~condition_num')
    result = model.fit()
    return result, data

In [7]:
# Perform repeated simulations for power estimation
power_results = []
for i in range(n_reps):
    result, _ = medium_model_simulate_and_fit()
    power_results.append(result.pvalues['C(condition_num)[T.1]'] < 0.05) # Check if condition effect is significant
# Calculate and display estimated power
estimated_power = np.mean(power_results)
print(f"\nEstimated Power for condition effect: {estimated_power:.2f}")


Estimated Power for condition effect: 0.99


In [None]:
#finally, test the power for the most maximal model
    #this includes modeling effects which are not present in the data

In [8]:
def maximal_model_simulate_and_fit():
    # Simulate participant data
    participant_data = pd.DataFrame({
        'participant_id': np.arange(n_participants),
        'age': np.random.normal(mean_age, age_sd, n_participants),  # Age centered around mean_age with age_sd
        'sex': np.random.choice([0, 1], size=n_participants),        # Randomly assign male/female
        'fractals': np.random.choice(range(1,29), size=n_participants), #randomly assign fractals used
        'digit_span': np.random.normal(7, 1.5, n_participants) #guessed mean and sdev on prev lit        
    })
    # Create trial-level data
    data = pd.DataFrame({
        'participant_id': np.repeat(participant_data['participant_id'], n_conditions),
        'condition': np.tile(condition_names, n_participants)
    })
    # Merge with participant data
    data = data.merge(participant_data, on='participant_id')
    # Create a mapping dictionary based on condition names
    condition_map = {name: i for i, name in enumerate(condition_names)}
    # Encode condition using the mapping
    data['condition_num'] = data['condition'].map(condition_map)
    
    # Random effects
    random_intercepts = np.random.normal(0, sd_intercept_participant, n_participants)
    random_slopes = np.random.normal(0, sd_slope_condition, n_participants)
    data['random_intercept'] = data['participant_id'].map(lambda x: random_intercepts[x])
    data['random_slope'] = data['participant_id'].map(lambda x: random_slopes[x])
    
    # Simulate outcome variable
    data['outcome'] = (
        true_intercept
        + data['random_intercept']
        + data['condition_num'] * (true_slope_condition + data['random_slope'])
        + data['age'] * true_slope_age
        + data['sex'] * true_slope_sex
        + np.random.normal(0, sd_residual, len(data))  # Residual error
    )

    conditions = [data['condition']=='disgust',data['condition']=='fear',data['condition']=='points']
    choices = [np.random.randint(1,6, size=len(data)),np.random.randint(6,11, size=len(data)),11]

    data['feedback_details']=np.select(conditions, choices, n_participants)
    
    # Fit mixed-effects model
    formula = 'outcome ~ C(condition_num)+age+sex+digit_span'
    model=smf.mixedlm(formula, data, groups=data['participant_id'], missing='drop', 
        vc_formula={'feedback_details': '0+feedback_details', "fractals": "0 + fractals"}, 
        re_formula='~condition_num')
    result = model.fit()
    return result, data

In [9]:
# Perform repeated simulations for power estimation
power_results = []
for i in range(n_reps):
    result, _ = maximal_model_simulate_and_fit()
    power_results.append(result.pvalues['C(condition_num)[T.1]'] < 0.05) # Check if condition effect is significant
# Calculate and display estimated power
estimated_power = np.mean(power_results)
print(f"\nEstimated Power for condition effect: {estimated_power:.2f}")


Estimated Power for condition effect: 0.97
