mixed_effects_learning

In [None]:
import os
import tomllib
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

from src.anl_utils import compute_size, compute_growth_rate
from src.sim_utils import generate_condition_space


def compute_empirical_sizes(experiment_parameters, condition_space):
    """
    Get the empirical data.

    Returns
    -------
    bat_sizes : array_like
        The bat sizes.
    """

    base_path = 'results/empirical'

    bat_sizes = np.zeros((experiment_parameters['num_subjects'],
                          experiment_parameters['num_training_sessions']))

    for session in range(experiment_parameters['num_training_sessions']):
        session_path = os.path.join(base_path, f'session_{session + 1}')
        filename = os.path.join(session_path, 'individual_bats.npy')
        individual_bats = np.load(filename)
        for subject, bat in enumerate(individual_bats):
            bat_sizes[subject, session] = compute_size(bat, *condition_space)

    return bat_sizes


def load_data(config_path, simulation_path):
    # load configuration
    with open(config_path, 'rb') as f:
        experiment_parameters = tomllib.load(f)

    # Load model learning data
    data = np.load(simulation_path)
    model_sizes = data['arnold_tongue_size']

    # Get empirical learning data
    condition_space = generate_condition_space(experiment_parameters)
    empirical_sizes = compute_empirical_sizes(experiment_parameters,
                                              condition_space)

    return model_sizes, empirical_sizes, experiment_parameters


def prepare_dataframe(model_sizes, model_growth, empirical_sizes,
                      empirical_growth, experiment_parameters):
    """
    Prepare the DataFrame.

    Parameters
    ----------
    model_sizes : array_like
        The model sizes.
    model_growth : array_like
        The model growth rates.
    empirical_sizes : array_like
        The empirical sizes.
    empirical_growth : array_like
        The empirical growth rates.
    experiment_parameters : dict
        The experiment parameters.

    Returns
    -------
    df : pandas.DataFrame
        The DataFrame.
    """

    # Create subject and session indices
    n_subjects = experiment_parameters['num_subjects']
    n_sessions = experiment_parameters['num_training_sessions']
    subject_idx = np.repeat(np.arange(n_subjects), n_sessions)
    session_idx = np.tile(np.arange(n_sessions), n_subjects)

    # Create a DataFrame
    df = pd.DataFrame(np.hstack((subject_idx[:, None], session_idx[:, None])),
                      columns=['subject', 'session'])

    # Add sizes
    df["model_size"] = model_sizes.reshape(-1)
    df["empirical_size"] = empirical_sizes.reshape(-1)

    # Add growth rates
    df["model_growth"] = model_growth.reshape(-1)
    df["empirical_growth"] = empirical_growth.reshape(-1)

    return df


def run_mixed_effects_analysis(df, dependent_variable, independent_variable,
                               group_variable, output_path):
    # Define the model formula with fixed and random effects
    md = smf.mixedlm(f"{dependent_variable} ~ {independent_variable}",
                     df,
                     groups=df[group_variable])

    # Fit the model
    mdf = md.fit()

    # Save the results of the analysis
    mdf.save(output_path)


if __name__ == '__main__':

    config_path = 'config/analysis/experiment_actual.toml'
    model_path = 'results/simulation/learning_simulation.npz'

    model_sizes, empirical_sizes, experiment_parameters = load_data(
        config_path, model_path)

    # Compute growth rates
    model_growth = compute_growth_rate(model_sizes)
    empirical_growth = compute_growth_rate(empirical_sizes)

    # Prepare the DataFrame
    df = prepare_dataframe(model_sizes, model_growth, empirical_sizes,
                           empirical_growth, experiment_parameters)

    # Save the dataframe
    df.to_csv('results/empirical/learning.csv')

    # Exclude first two sessions
    df = df[df["session"] > 1]

    # Run the mixed effects analysis for size
    run_mixed_effects_analysis(
        df, "empirical_size", "model_size", "subject",
        'results/statistics/mixed_effects_bat_size.pkl')

    # Run the mixed effects analysis for growth
    run_mixed_effects_analysis(
        df, "empirical_growth", "model_growth", "subject",
        'results/statistics/mixed_effects_bat_growth.pkl')


figure 4

In [None]:
import os
import tomllib
import numpy as np

from src.anl_utils import compute_size, compute_growth_rate
from src.sim_utils import generate_condition_space

import seaborn as sns
import matplotlib.pyplot as plt
from src.plot_utils import comparative_lineplot


def compute_empirical_sizes(experiment_parameters, condition_space):
    """
    Get the empirical data.

    Returns
    -------
    bat_sizes : array_like
        The bat sizes.
    """

    base_path = 'results/empirical'

    bat_sizes = np.zeros((experiment_parameters['num_subjects'],
                          experiment_parameters['num_training_sessions']))

    for session in range(experiment_parameters['num_training_sessions']):
        session_path = os.path.join(base_path, f'session_{session + 1}')
        filename = os.path.join(session_path, 'individual_bats.npy')
        individual_bats = np.load(filename)
        for subject, bat in enumerate(individual_bats):
            bat_sizes[subject, session] = compute_size(bat, *condition_space)

    return bat_sizes


def load_data(config_path, simulation_path):
    # load configuration
    with open(config_path, 'rb') as f:
        experiment_parameters = tomllib.load(f)

    # Load model learning data
    data = np.load(simulation_path)
    model_sizes = data['arnold_tongue_size']

    # Get empirical learning data
    condition_space = generate_condition_space(experiment_parameters)
    empirical_sizes = compute_empirical_sizes(experiment_parameters,
                                              condition_space)

    return model_sizes, empirical_sizes


def main():
    # Load data
    config_path = 'config/analysis/experiment_actual.toml'
    model_path = 'results/simulation/learning_simulation.npz'
    model_sizes, empirical_sizes = load_data(config_path, model_path)

    # Compute growth rates
    model_growth = compute_growth_rate(model_sizes)
    empirical_growth = compute_growth_rate(empirical_sizes)

    # Plotting
    sessions = np.arange(1, model_sizes.shape[1] + 1)

    # Plot model sizes
    lower = np.percentile(model_sizes, 2.5, axis=0)
    upper = np.percentile(model_sizes, 97.5, axis=0)
    bounds_model = (lower, upper)

    lower = np.percentile(empirical_sizes, 2.5, axis=0)
    upper = np.percentile(empirical_sizes, 97.5, axis=0)
    bounds_empirical = (lower, upper)

    bounds = (bounds_model, bounds_empirical)

    linecolor = ('#785ef0', '#fe6100')

    y = (model_sizes.mean(axis=0), empirical_sizes.mean(axis=0))

    lineplot = comparative_lineplot(sessions,
                                    y,
                                    bounds,
                                    figsize=(10, 5),
                                    labels=('Model', 'Session',
                                            'Arnold tongue size'),
                                    fontsizes=(14, 12),
                                    line_color=linecolor)

    # save plot and close
    filename = 'results/figures/figure_four/model_sizes.svg'
    #lineplot.savefig(filename)

    plt.show()


if __name__ == "__main__":
    main()
