In [1]:
import ssms
from ssms.config import model_config as model_config
from ssms.basic_simulators.simulator import simulator
import numpy as np
import pandas as pd
from copy import deepcopy
import pickle
from pathlib import Path

In [2]:
class SSMDataGenerator:
    """A class to generate data from Sequential Sampling Models (SSM).
    
    This class handles the generation of simulated data from various sequential sampling models,
    including parameter sampling and trial generation across multiple subjects.
    
    Parameters
    ----------
    model : str
        The name of the sequential sampling model to use
    n_subjects : int
        Number of subjects to simulate
    n_trials : int
        Number of trials per subject
        
    Attributes
    ----------
    model : str
        The name of the sequential sampling model
    model_config : dict
        Configuration dictionary for the specified model
    model_params : list
        List of parameter names for the model
    n_subjects : int
        Number of subjects
    n_trials : int
        Number of trials per subject
    param_buffer : float
        Buffer to constrain parameters away from bounds
    param_std_range_fraction : float
        Fraction of parameter range to use for standard deviation
    df : pandas.DataFrame or None
        DataFrame containing generated data
    group_params : dict
        Dictionary of group-level parameters
    subject_params : dict
        Dictionary of subject-level parameters
    state : dict
        Dictionary tracking simulation state
    """
    def __init__(self,
                 model: str,
                 n_subjects: int, 
                 n_trials: int) -> None:
        
        self.model = model
        self.model_config = deepcopy(ssms.config.model_config[model])
        self.model_params = self.model_config["params"]
        
        self.n_subjects = n_subjects
        self.n_trials = n_trials

        # Parameter constraints
        self.param_buffer = 0.05
        self.param_std_range_fraction = 1/9
        self.continuous_regression_beta_mean_bounds = [-0.5, 0.5]

        # Data
        self.df = None
        self.group_params = {}
        self.subject_params = {}
        self.state = {"sampled": False,
                      "injected_sequential_dynamics": False}
    
    def get_parameter_constraints(self,
                                  target_parameter: str) -> tuple[list[float], float, float]:
        """Get parameter constraints for a given model parameter.

        Parameters
        ----------
        target_parameter : str
            Name of the parameter to get constraints for

        Returns
        -------
        param_bounds_buffered : list[float]
            Lower and upper bounds for the parameter with buffer applied
        param_bounds_range : float
            Range between buffered bounds
        param_bounds_std_max : float 
            Maximum allowed standard deviation for the parameter
        """
        param_bounds = [self.model_config["param_bounds"][0][self.model_params.index(target_parameter)],
                        self.model_config["param_bounds"][1][self.model_params.index(target_parameter)]]
        
        param_bounds_buffered = [0] * 2
        param_bounds_buffered[0] = param_bounds[0] + self.param_buffer
        param_bounds_buffered[1] = param_bounds[1] - self.param_buffer
        print("target_parameter", target_parameter)
        print("param_bounds_buffered", param_bounds_buffered)
        param_bounds_range = param_bounds_buffered[1] - param_bounds_buffered[0]
        print("param_bounds_range", param_bounds_range)
        param_bounds_std_max = param_bounds_range * self.param_std_range_fraction
        print("param_bounds_std_max", param_bounds_std_max)

        return param_bounds_buffered, param_bounds_range, param_bounds_std_max
    
    def get_parameter_constraints_vec(self) -> tuple[list[float], np.ndarray, np.ndarray]:
        """Get parameter constraints vectors for all parameters of a given model.

        Returns
        -------
        param_bounds_buffered : list[float]
            Lower and upper bounds for all parameters with buffer applied
        param_bounds_range : numpy.ndarray
            Range between buffered bounds for all parameters
        param_bounds_std_max : numpy.ndarray
            Maximum allowed standard deviation for all parameters
        """
        param_bounds = self.model_config["param_bounds"]
        param_bounds_buffered = deepcopy(param_bounds)
        param_bounds_buffered[0] = np.array(param_bounds)[0] + self.param_buffer
        param_bounds_buffered[1] = np.array(param_bounds)[1] - self.param_buffer
        param_bounds_range = param_bounds_buffered[1] - param_bounds_buffered[0]
        param_bounds_std_max = param_bounds_range * self.param_std_range_fraction

        return param_bounds_buffered, param_bounds_range, param_bounds_std_max
    
    def prep_dfs_for_saving(self,
                            first_columns: list = ["response", "rt", "participant_id", "trial"]) -> tuple[pd.DataFrame, pd.DataFrame]:
        """Order columns of DataFrames to match the HSSM standard format and create modeling subset.

        This function reorders the columns of a DataFrame to place specified columns at the beginning,
        followed by parameters, and then by model-specific parameters. It also creates a subset
        DataFrame containing only the columns needed for modeling.

        Parameters
        ----------
        first_columns : list, optional
            List of column names to place at the beginning of the DataFrame, by default ["response", "rt", "participant_id", "trial"]

        Returns
        -------
        tuple[pd.DataFrame, pd.DataFrame]
            - DataFrame with reordered columns including all parameters
            - DataFrame subset containing only modeling-relevant columns
            
        Raises
        ------
        ValueError
            If attempting to get modeling df before sampling data
        """

        if not self.state["sampled"]:
            raise ValueError("Attempt to get modeling df before sampling data")

        df_tmp = deepcopy(self.df)
        columns_ = list(df_tmp.columns)
        first_cols_ = [col for col in first_columns if col in columns_]
        remain_cols_ = [col for col in columns_ if col not in first_cols_]
        columns_hssm = first_cols_ + remain_cols_
        df_ordered = df_tmp[columns_hssm]


        modeling_cols = [col for col in df_ordered.columns if \
                         (("beta" not in col) and (col not in self.model_params))]
        df_modeling = df_ordered[modeling_cols]
        
        return df_ordered, df_modeling

    def inject_trials(self) -> pd.DataFrame:
        """Inject trials into the dataframe by repeating each subject's data n_trials times.
        
        Returns
        -------
        pd.DataFrame
            DataFrame with injected trials, where each subject's data is repeated n_trials times
            and a trial counter is added
        """
        df_tmp = deepcopy(self.df)
        df_repeated = df_tmp.loc[np.repeat(df_tmp.index, 
                                         self.n_trials)].\
                                          reset_index(drop = True)
        df_repeated["trial"] = df_repeated.groupby("participant_id").cumcount() + 1

        print("Adding trials to attached df")
        self.df = df_repeated
        return df_repeated

    def attach_simulations_to_df(self) -> pd.DataFrame:
        """Attach simulated response times and choices to the dataframe.

        Returns
        -------
        pd.DataFrame
            DataFrame with simulated response times and choices attached

        Raises
        ------
        ValueError
            If attempting to resample when sequential dynamics have been injected
        """
        if self.state["sampled"] and self.state["injected_sequential_dynamics"]:
            raise ValueError("Disallowing resampling because sequential dynamics (e.g. past dependence) have been injected"
                             "This is to ensure that the injected dynamics are not overriden by the resampling.")

        df_tmp = deepcopy(self.df)
        sim_out = simulator(model = self.model, theta = df_tmp[self.model_params], n_samples = 1)
        df_tmp["rt"] = sim_out["rts"].squeeze()
        df_tmp["response"] = sim_out["choices"].squeeze()

        print("Attaching simulations to df")
        self.df = df_tmp
        self.state["sampled"] = True
        return df_tmp

    def get_parameter_sets_hierarchical(self) -> tuple[pd.DataFrame, dict, dict]:
        """Generate hierarchical parameter sets for a given model.

        Returns
        -------
        pd.DataFrame
            DataFrame containing subject-level parameters and group-level parameters
        dict
            Dictionary containing group-level parameters
        dict
            Dictionary containing subject-level parameters

        Raises
        ------
        ValueError
            If unable to sample parameters within bounds after 100 attempts
        """
        param_bounds_buffered, param_bounds_range, param_bounds_std_max = \
            self.get_parameter_constraints_vec()
        param_bounds_mean = (param_bounds_buffered[1] + param_bounds_buffered[0]) / 2
        max_mean_perturbation = param_bounds_std_max * 2

        # # Sample subject level parameters
        group_params = {}
        subject_params = {}
        cnt = 0
        while True:
            # Sample mean perturbation
            param_bounds_mean_perturbation = np.random.uniform(-max_mean_perturbation, max_mean_perturbation)
            group_sample_means = param_bounds_mean + param_bounds_mean_perturbation

            # Sample std perturbation
            group_sample_stds = np.random.uniform(0.05, np.maximum(0.05, param_bounds_std_max))

            # Subject level parameters
            subject_level_parameters = np.random.normal(loc = group_sample_means,
                                                        scale = group_sample_stds,
                                                        size=(self.n_subjects, 
                                                              len(self.model_params)))
            
            # check if all parameters are within the bounds
            if np.all(subject_level_parameters < (param_bounds_buffered[1])) and np.all(subject_level_parameters > (param_bounds_buffered[0])):
                break
            elif cnt > 100:
                raise ValueError("Failed to sample parameters within bounds after 100 attempts")
            cnt += 1

        df = pd.DataFrame(subject_level_parameters, columns = self.model_params)

        # Identify group means and standard deviations in the parameter sets
        for i, param in enumerate(self.model_params):
            df[f"beta_{param}_intercept_mean"] = group_sample_means[i]
            df[f"beta_{param}_intercept_std"] = group_sample_stds[i]
            df[f"beta_{param}_intercept_subject"] = subject_level_parameters[:, i]

            # Update parameter_dicts
            group_params.update({
                f"beta_{param}_intercept_mean": group_sample_means[i],
                f"beta_{param}_intercept_std": group_sample_stds[i],
            })
            subject_params.update({
                f"beta_{param}_intercept_subject": subject_level_parameters[:, i],
            })

        df["participant_id"] = np.arange(self.n_subjects).astype(str)
        print("Attaching subject level parameters")
        self.df = df
        self.group_params = group_params
        self.subject_params = subject_params
        return df, group_params, subject_params

    def inject_categorical_regression(self,
                                      target_parameter: str,
                                      categorical_name: str = "x") -> tuple[pd.DataFrame, dict, dict]:
        """Injects categorical regression effects into parameter values.

        Parameters
        ----------
        df : pd.DataFrame
            DataFrame containing subject and trial data
        model : str
            Name of the model being used
        target_parameter : str
            Parameter to inject categorical effects into
        categorical_name : str, optional
            Name of the categorical variable, by default "x"

        Returns
        -------
        tuple[pd.DataFrame, dict, dict]
            Modified DataFrame, group parameters dict, and subject parameters dict

        Raises
        ------
        ValueError
            If unable to sample parameters within bounds after 100 attempts
        """
        param_bounds_buffered, param_bounds_range, param_bounds_std_max = self.get_parameter_constraints(target_parameter)
        n_subjects = len(self.df["participant_id"].unique())
        n_trials = len(self.df["trial"].unique())

        # Inject categorical covariate
        group_params = {}
        subject_params = {}

        cnt = 0
        while True:
            df_tmp = deepcopy(self.df)
            # Add categorical column to dataframe
            df_tmp[f"{categorical_name}"] = np.random.choice([0,1],
                                                             size=(n_subjects * n_trials),
                                                             p = [0.5, 0.5])
            
            # Sample group level parameters
            beta_group_mean = np.random.uniform(0.1, param_bounds_range * 1/5)
            beta_group_std = np.random.uniform(0.05, np.maximum(0.05, param_bounds_std_max))
            beta_subject = np.random.normal(beta_group_mean, beta_group_std, size=n_subjects)
            
            # Update parameter_dicts
            group_params.update({
                f"beta_{target_parameter}_{categorical_name}_group_mean": \
                    beta_group_mean,
                f"beta_{target_parameter}_{categorical_name}_group_std": \
                    beta_group_std,
            })
            subject_params.update({
                f"beta_{target_parameter}_{categorical_name}_subject": beta_subject
            })

            df_tmp[f"beta_{target_parameter}_{categorical_name}_group_mean"] = \
                beta_group_mean
            df_tmp[f"beta_{target_parameter}_{categorical_name}_group_std"] = \
                beta_group_std
            df_tmp[f"beta_{target_parameter}_{categorical_name}_subject"] = \
                beta_subject[df_tmp[f"participant_id"].astype(int)]
            
            df_tmp[target_parameter] = df_tmp[target_parameter] + \
                (beta_subject[df_tmp[f"participant_id"].astype(int)] * df_tmp[f"{categorical_name}"])
            if cnt > 100:
                raise ValueError("Failed to sample parameters within bounds after 100 attempts")
            cnt += 1

            if np.all(df_tmp[target_parameter] < (param_bounds_buffered[1])) and \
                np.all(df_tmp[target_parameter] > (param_bounds_buffered[0])):
                break
        
        # Update dataframe
        self.df = df_tmp
        self.group_params.update(group_params)
        self.subject_params.update(subject_params)
        return self.df, self.group_params, self.subject_params

    def inject_continuous_regression(self,
                                     target_parameter: str,
                                     covariate_names: list[str]) -> tuple[pd.DataFrame, dict, dict]:
        """Injects continuous regression effects into a parameter of choice.
        
        Parameters
        ----------
        df : pd.DataFrame
            DataFrame containing trial-wise data
        model : str
            Name of the model being used
        target_parameter : str
            Parameter to inject regression effects into
        covariate_names : list[str]
            List of names for the continuous covariates to add
            
        Returns
        -------
        df : pd.DataFrame
            DataFrame with added continuous regression effects
        group_params : dict
            Dictionary containing group-level parameters
        subject_params : dict
            Dictionary containing subject-level parameters
        """
        param_bounds_buffered, param_bounds_range, param_bounds_std_max = self.get_parameter_constraints(target_parameter)

        n_subjects = len(self.df["participant_id"].unique())
        n_trials = len(self.df["trial"].unique())

        group_params = {}
        subject_params = {}

        cnt = 0
        while True:
            df_tmp = deepcopy(self.df)
            for covariate_name in covariate_names:
                df_tmp[f"continuous_{covariate_name}"] = \
                    np.random.normal(-1, 1, size=(n_subjects * n_trials))

            for i, covariate_name in enumerate(covariate_names):
                # Sample parameters
                beta_group_mean = np.random.uniform(self.continuous_regression_beta_mean_bounds[0], 
                                                    self.continuous_regression_beta_mean_bounds[1])
                beta_group_std = np.random.uniform(0.05, 
                                                   np.minimum(0.25,np.maximum(0.05, 
                                                              param_bounds_std_max)))
                
                beta_subjects = np.random.normal(loc = beta_group_mean,
                                                 scale = beta_group_std,
                                                 size = n_subjects)
                
                # Update parameter_dicts
                group_params.update({
                    f"beta_{target_parameter}_{covariate_name}_group_mean": beta_group_mean,
                    f"beta_{target_parameter}_{covariate_name}_group_std": beta_group_std,
                })
                subject_params.update({
                    f"beta_{target_parameter}_{covariate_name}_subject": beta_subjects[df_tmp[f"participant_id"].astype(int).values]
                })

                # Update df
                df_tmp[f"beta_{target_parameter}_{covariate_name}_group_mean"] = beta_group_mean
                df_tmp[f"beta_{target_parameter}_{covariate_name}_group_std"] = beta_group_std
                df_tmp[f"beta_{target_parameter}_{covariate_name}_subject"] = beta_subjects[df_tmp[f"participant_id"].astype(int).values]

            # Compute linear predictor (target_parameter)
            for covariate_name in covariate_names:
                df_tmp[target_parameter] = df_tmp[target_parameter] + \
                    (df_tmp[f"beta_{target_parameter}_{covariate_name}_subject"] * df_tmp[f"continuous_{covariate_name}"])

            
            if np.all(df_tmp[target_parameter] < (np.array(param_bounds_buffered[1]))) and \
                      np.all(df_tmp[target_parameter] > (np.array(param_bounds_buffered[0]))):
                break
            
            if cnt > 100:
                raise ValueError("Failed to sample parameters within bounds after 100 attempts")
            cnt += 1

        self.df = df_tmp
        self.group_params.update(group_params)
        self.subject_params.update(subject_params)
        return self.df, self.group_params, self.subject_params

    def inject_sticky_choice(self,
                             target_parameter: str) -> tuple[pd.DataFrame, dict, dict]:
        """Injects trial-by-trial dependencies into a parameter based on previous trial responses.
        
        Parameters
        ----------
        target_parameter : str
            Parameter to inject dependencies into ('v' or 'z')
            
        Returns
        -------
        df_tmp : pd.DataFrame
            DataFrame with injected trial dependencies
        group_params : dict
            Dictionary of group-level parameters
        subject_params : dict
            Dictionary of subject-level parameters
            
        Raises
        ------
        ValueError
            If target_parameter is not 'v' or 'z'
            If parameters cannot be sampled within bounds after 100 attempts
        """
        if target_parameter not in ["v", "z"]:
            raise ValueError("Past dependence conceptually only implemented for drift 'v' or bias 'z': "
                             f" You specified {target_parameter}")
        
        param_bounds_buffered, param_bounds_range, param_bounds_std_max = \
            self.get_parameter_constraints(target_parameter)

        n_subjects = len(self.df["participant_id"].unique())
        n_trials = len(self.df["trial"].unique())

        cnt = 0
        group_params = {}
        subject_params = {}
        while True:
            df_tmp = deepcopy(self.df)
            print("subject_params", self.subject_params)
            print("group_params", self.group_params)
            print("z-range a priori max ", df_tmp.z.min(), " min ", df_tmp.z.max())
            
            beta_sticky_group_mean = np.random.uniform(0.05, param_bounds_std_max)
            beta_sticky_group_std = np.random.uniform(0.05,
                                        np.maximum(0.05, param_bounds_std_max / 2))
            
            beta_sticky_subjects = np.random.normal(loc = beta_sticky_group_mean,
                                                      scale = beta_sticky_group_std,
                                                      size = n_subjects)
            
            # print("beta_sticky_pre_clip",
            #       beta_sticky_pre_clip)
            
            # beta_sticky_subjects = np.clip(beta_sticky_pre_clip,
            #                                  param_bounds_buffered[0],
            #                                  param_bounds_buffered[1])
            print("beta_sticky_subjects",
                  beta_sticky_subjects)
            
            # Update parameter_dicts
            group_params.update({
                f"beta_sticky_{target_parameter}_group_mean": beta_sticky_group_mean,
                f"beta_sticky_{target_parameter}_group_std": beta_sticky_group_std,
            })
            subject_params.update({
                f"beta_sticky_{target_parameter}_subject": beta_sticky_subjects
            })
            
            # Update df
            df_tmp[f"beta_sticky_{target_parameter}_group_mean"] = \
                beta_sticky_group_mean
            df_tmp[f"beta_sticky_{target_parameter}_group_std"] = \
                beta_sticky_group_std
            df_tmp[f"beta_sticky_{target_parameter}_subject"] = \
                beta_sticky_subjects[df_tmp[f"participant_id"].astype(int).values]

            # Compute linear predictor (target_parameter)
            df_tmp["response_l1"] = 0
            df_tmp["response"] = 0
            for i in df_tmp["participant_id"].unique():
                print('participant_id', i)
                displacement_tmp = 0
                for j in range(1, n_trials + 1, 1):

                    params_tmp = df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j), 
                                        self.model_params].to_dict(orient = "records")[0]
                    
                    if j > 1:
                        displacement_tmp = (df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j-1), "response"].values * \
                            df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j), f"beta_sticky_{target_parameter}_subject"].values)[0]
                        
                    # Apply displacement
                    params_tmp[f"{target_parameter}"] = params_tmp[f"{target_parameter}"] + displacement_tmp

                    df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j), f"{target_parameter}"] = \
                        params_tmp[f"{target_parameter}"]
                    
                    sim_tmp = simulator(model = self.model,
                                        theta = params_tmp,
                                        n_samples = 1)

                    df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j), "rt"] = \
                        sim_tmp['rts'].squeeze()
                    df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j), "response"] = \
                        sim_tmp['choices'].squeeze()
                    
                    if j > 1:
                        df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j), "response_l1"] = \
                            df_tmp.loc[(df_tmp["participant_id"] == i) & (df_tmp["trial"] == j-1), "response"].values
            
            if np.all(df_tmp[f"{target_parameter}"] < (param_bounds_buffered[1])) and \
                  np.all(df_tmp[f"{target_parameter}"] > (param_bounds_buffered[0])):
                break
            else:
                print(f"{target_parameter} is out of bounds, trying again, "
                      f"max {target_parameter} = {df_tmp[f'{target_parameter}'].max()} "
                      f"min {target_parameter} = {df_tmp[f'{target_parameter}'].min()}")
                print(f"allowable range = {param_bounds_buffered[0]} to {param_bounds_buffered[1]}")
            
            if cnt > 5:
                raise ValueError("Failed to sample parameters within bounds after 5 attempts")
            cnt += 1
        
        self.df = df_tmp
        self.group_params.update(group_params)
        self.subject_params.update(subject_params)
        return self.df, self.group_params, self.subject_params
    
    def make_simple_hierarchical_dataset(self) -> tuple[pd.DataFrame, dict, dict]:
        """Generate a simple hierarchical dataset with parameters and simulated trials.
        
        This method creates a hierarchical dataset by first generating parameter sets,
        then injecting trials and attaching simulations.
        
        Returns
        -------
        pd.DataFrame
            DataFrame containing the hierarchical dataset with parameters and simulated data
        dict
            Dictionary containing group-level parameters
        dict
            Dictionary containing subject-level parameters
        """
        # group_params = {}
        # subject_params = {}

        _, _, _= self.get_parameter_sets_hierarchical()
        
        # group_params.update(group_params_tmp)
        # subject_params.update(subject_params_tmp)

        _ = self.inject_trials()
        _ = self.attach_simulations_to_df()
        return self.df, self.group_params, self.subject_params
    
    def make_hierarchical_regression_dataset(self,
                                             categorical_target: str | None = "a",
                                             categorical_covariate: str | None = "cost_fail_condition", 
                                             continuous_target: str | None = "v",
                                             continuous_covariate: str | None = "difficulty") -> tuple[pd.DataFrame, dict, dict]:
        """Generate a hierarchical regression dataset with both categorical and continuous covariates.

        Parameters
        ----------
        categorical_target : str or None, optional
            Parameter to be modulated by categorical covariate, by default "a"
        categorical_covariate : str or None, optional
            Name of categorical covariate, by default "cost_fail_condition"
        continuous_target : str or None, optional
            Parameter to be modulated by continuous covariate, by default "v"
        continuous_covariate : str or None, optional
            Name of continuous covariate, by default "difficulty"

        Returns
        -------
        tuple[pd.DataFrame, dict, dict]
            DataFrame containing the hierarchical dataset with parameters and simulated data,
            Dictionary containing group-level parameters,
            Dictionary containing subject-level parameters

        Raises
        ------
        ValueError
            If categorical_target and continuous_target are the same parameter
        """
        
        if (categorical_target is not None) and (continuous_target is not None):
            if categorical_target == continuous_target:
                raise ValueError("categorical_target and continuous_target cannot be the same")
        
        df, group_params_tmp, subject_params_tmp = self.get_parameter_sets_hierarchical()
        df = self.inject_trials()
        
        group_params = {}
        subject_params = {}

        if categorical_target is not None:
            df, group_params_tmp, subject_params_tmp = \
                self.inject_categorical_regression(target_parameter = categorical_target,
                                                   categorical_name = categorical_covariate)
        
            group_params.update(group_params_tmp)
            subject_params.update(subject_params_tmp)

        if continuous_target is not None:
            df, group_params_tmp, subject_params_tmp = \
                self.inject_continuous_regression(target_parameter = continuous_target,
                                                  covariate_names = [continuous_covariate])

            group_params.update(group_params_tmp)
            subject_params.update(subject_params_tmp)

        df = self.attach_simulations_to_df()
        return self.df, self.group_params, self.subject_params

    def make_hierarchical_regression_with_sticky_choice(self,
                                                        categorical_target: str | None = "a",
                                                        categorical_covariate: str | None = "cost_fail_condition", 
                                                        continuous_target: str | None = "v",
                                                        continuous_covariate: str | None = "difficulty",
                                                        sticky_target: str = "z") -> tuple[pd.DataFrame, dict, dict]:
        """Generate hierarchical regression data with sticky choice effects.

        Parameters
        ----------
        categorical_target : str or None, optional
            Parameter to apply categorical regression to. Default is "a"
        categorical_covariate : str or None, optional
            Name of categorical covariate. Default is "cost_fail_condition"
        continuous_target : str or None, optional
            Parameter to apply continuous regression to. Default is "v"
        continuous_covariate : str or None, optional
            Name of continuous covariate. Default is "difficulty"
        sticky_target : str, optional
            Parameter to apply sticky choice effects to. Default is "z"

        Returns
        -------
        tuple
            Contains:
            - pd.DataFrame: DataFrame with simulated data
            - dict: Group-level parameters
            - dict: Subject-level parameters

        Raises
        ------
        ValueError
            If categorical_target and continuous_target are the same parameter
        """
        

        if (categorical_target is not None) and (continuous_target is not None):
            if categorical_target == continuous_target:
                raise ValueError("categorical_target and continuous_target cannot be the same")
        
        df = self.get_parameter_sets_hierarchical()
        df = self.inject_trials()
        
        group_params = {}
        subject_params = {}
        
        if categorical_target is not None:
            df, group_params_tmp, subject_params_tmp = self.inject_categorical_regression(
                                                    target_parameter = categorical_target,
                                                    categorical_name = categorical_covariate)
            group_params.update(group_params_tmp)
            subject_params.update(subject_params_tmp)
        
        if continuous_target is not None:
            df, group_params_tmp, subject_params_tmp = self.inject_continuous_regression(
                                                   target_parameter = continuous_target,
                                                   covariate_names = [continuous_covariate])
            group_params.update(group_params_tmp)
            subject_params.update(subject_params_tmp)
        
        # Note: This adds simulations to the df
        df, group_params_tmp, subject_params_tmp = self.inject_sticky_choice(
                                                target_parameter = sticky_target)
        group_params.update(group_params_tmp)
        subject_params.update(subject_params_tmp)
        return df, group_params, subject_params
    
    def save_data(self, 
                  filename_base: str,
                  folder: str = "data") -> None:
        """Save simulation data and parameters to files.

        Parameters
        ----------
        filename_base : str
            Base filename to use for saving files. Will be appended with suffixes.
        folder : str, optional
            Folder to save files in, by default "data"
        columns_for_modeling : list[str] | None, optional
            List of column names to include in modeling dataset, by default None

        Raises
        ------
        ValueError
            If columns_for_modeling is None
        """
        # Make folder via pathlib
        Path(folder).mkdir(parents=True,
                           exist_ok=True)
        
        df_ordered, df_modeling = self.prep_dfs_for_saving()
        
        # Save Full df:
        df_ordered.to_parquet(f"{folder}/{filename_base}_full.parquet",
                           index=False)

        # Save df for modeling:
        df_modeling.to_parquet(f"{folder}/{filename_base}_modeling.parquet",
                               index=False)
        
        # Save the parameter dicts as one dict via pickle
        with open(f"{folder}/{filename_base}_parameters.pkl", "wb") as f:
            pickle.dump({"group_params": self.group_params,
                         "subject_params": self.subject_params},
                         f)

    @classmethod
    def load_data(cls, 
                  filename_base: str,
                  folder: str = "data") -> tuple[pd.DataFrame, pd.DataFrame, dict]:
        """Load saved simulation data and parameters from files.

        Parameters
        ----------
        filename_base : str
            Base filename used when saving files
        folder : str, optional
            Folder containing saved files, by default "data"

        Returns
        -------
        tuple[pd.DataFrame, pd.DataFrame, dict]
            Contains:
            - DataFrame with modeling data
            - DataFrame with full data  
            - Dict containing group and subject parameters
        """
        with open(f"{folder}/{filename_base}_parameters.pkl", "rb") as f:
            parameters = pickle.load(f)
        
        df_modeling = pd.read_parquet(f"{folder}/{filename_base}_modeling.parquet")
        df_ordered = pd.read_parquet(f"{folder}/{filename_base}_full.parquet")
        return df_ordered, df_modeling, parameters

## Basic Model

In [3]:
BasicDDMGenerator = SSMDataGenerator(model = "ddm", 
                                     n_subjects = 10, 
                                     n_trials = 200)
basic_ddm_df, basic_ddm_group_params, basic_ddm_subject_params = \
    BasicDDMGenerator.make_simple_hierarchical_dataset()

Attaching subject level parameters
Adding trials to attached df
Attaching simulations to df


In [6]:
BasicDDMGenerator.save_data(filename_base = "basic_ddm",
                            folder = "data/basic_ddm_data/")

In [7]:
df_basic_ddm_modeling, df_basic_ddm_ordered, basic_ddm_parameter_dict = \
    SSMDataGenerator.load_data(filename_base = "basic_ddm",
                            folder = "data/basic_ddm_data/")

In [5]:
basic_ddm_df

Unnamed: 0,v,a,z,t,beta_v_intercept_mean,beta_v_intercept_std,beta_v_intercept_subject,beta_a_intercept_mean,beta_a_intercept_std,beta_a_intercept_subject,beta_z_intercept_mean,beta_z_intercept_std,beta_z_intercept_subject,beta_t_intercept_mean,beta_t_intercept_std,beta_t_intercept_subject,participant_id,trial,rt,response
0,1.322655,1.909583,0.471465,1.052335,0.960122,0.620211,1.322655,1.518073,0.174993,1.909583,0.550717,0.073419,0.471465,1.071439,0.060588,1.052335,0,1,2.294668,1
1,1.322655,1.909583,0.471465,1.052335,0.960122,0.620211,1.322655,1.518073,0.174993,1.909583,0.550717,0.073419,0.471465,1.071439,0.060588,1.052335,0,2,3.106365,1
2,1.322655,1.909583,0.471465,1.052335,0.960122,0.620211,1.322655,1.518073,0.174993,1.909583,0.550717,0.073419,0.471465,1.071439,0.060588,1.052335,0,3,2.479129,1
3,1.322655,1.909583,0.471465,1.052335,0.960122,0.620211,1.322655,1.518073,0.174993,1.909583,0.550717,0.073419,0.471465,1.071439,0.060588,1.052335,0,4,3.611279,1
4,1.322655,1.909583,0.471465,1.052335,0.960122,0.620211,1.322655,1.518073,0.174993,1.909583,0.550717,0.073419,0.471465,1.071439,0.060588,1.052335,0,5,3.830290,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.846129,1.576588,0.541085,1.073784,0.960122,0.620211,0.846129,1.518073,0.174993,1.576588,0.550717,0.073419,0.541085,1.071439,0.060588,1.073784,9,196,1.904842,1
1996,0.846129,1.576588,0.541085,1.073784,0.960122,0.620211,0.846129,1.518073,0.174993,1.576588,0.550717,0.073419,0.541085,1.071439,0.060588,1.073784,9,197,1.902348,1
1997,0.846129,1.576588,0.541085,1.073784,0.960122,0.620211,0.846129,1.518073,0.174993,1.576588,0.550717,0.073419,0.541085,1.071439,0.060588,1.073784,9,198,1.607595,1
1998,0.846129,1.576588,0.541085,1.073784,0.960122,0.620211,0.846129,1.518073,0.174993,1.576588,0.550717,0.073419,0.541085,1.071439,0.060588,1.073784,9,199,2.376922,1


In [6]:
basic_ddm_group_params

{'beta_v_intercept_mean': np.float64(0.9601221993778131),
 'beta_v_intercept_std': np.float64(0.6202113666129679),
 'beta_a_intercept_mean': np.float64(1.5180728415903617),
 'beta_a_intercept_std': np.float64(0.1749925915456391),
 'beta_z_intercept_mean': np.float64(0.5507165437128025),
 'beta_z_intercept_std': np.float64(0.07341890304247371),
 'beta_t_intercept_mean': np.float64(1.0714385818751013),
 'beta_t_intercept_std': np.float64(0.060588478055820814)}

In [7]:
basic_ddm_subject_params

{'beta_v_intercept_subject': array([ 1.32265516,  1.94299148,  0.77003458, -0.08295184,  1.28413463,
         1.01848192,  1.31925443,  2.05077732,  0.95835859,  0.84612861]),
 'beta_a_intercept_subject': array([1.9095831 , 1.70493003, 1.44084288, 1.59546849, 1.1993287 ,
        1.32271812, 1.35280074, 1.60880281, 1.3982332 , 1.57658805]),
 'beta_z_intercept_subject': array([0.47146457, 0.65394621, 0.57093546, 0.48780022, 0.4813742 ,
        0.60548385, 0.43091594, 0.538138  , 0.44015418, 0.5410845 ]),
 'beta_t_intercept_subject': array([1.05233494, 1.13249462, 1.08738971, 1.21808513, 1.10820932,
        1.00465332, 1.04894588, 0.99597588, 1.13682005, 1.07378389])}

## Basic Model Angle

In [7]:
BasicAngleGenerator = SSMDataGenerator(model = "angle", 
                                       n_subjects = 10, 
                                       n_trials = 200)
basic_angle_df, basic_angle_group_params, basic_angle_subject_params = \
    BasicAngleGenerator.make_simple_hierarchical_dataset()

Attaching subject level parameters
Adding trials to attached df
Attaching simulations to df


In [33]:
basic_angle_df

Unnamed: 0,v,a,z,t,theta,beta_v_intercept_mean,beta_v_intercept_std,beta_v_intercept_subject,beta_a_intercept_mean,beta_a_intercept_std,...,beta_t_intercept_mean,beta_t_intercept_std,beta_t_intercept_subject,beta_theta_intercept_mean,beta_theta_intercept_std,beta_theta_intercept_subject,subject_id,trial,rt,response
0,0.743397,2.72398,0.476295,0.623608,0.400385,0.005676,0.976004,0.743397,2.494229,0.274647,...,0.475915,0.313005,0.623608,0.362399,0.077915,0.400385,0,1,2.778610,-1
1,0.743397,2.72398,0.476295,0.623608,0.400385,0.005676,0.976004,0.743397,2.494229,0.274647,...,0.475915,0.313005,0.623608,0.362399,0.077915,0.400385,0,2,2.858990,-1
2,0.743397,2.72398,0.476295,0.623608,0.400385,0.005676,0.976004,0.743397,2.494229,0.274647,...,0.475915,0.313005,0.623608,0.362399,0.077915,0.400385,0,3,3.714947,1
3,0.743397,2.72398,0.476295,0.623608,0.400385,0.005676,0.976004,0.743397,2.494229,0.274647,...,0.475915,0.313005,0.623608,0.362399,0.077915,0.400385,0,4,4.044841,1
4,0.743397,2.72398,0.476295,0.623608,0.400385,0.005676,0.976004,0.743397,2.494229,0.274647,...,0.475915,0.313005,0.623608,0.362399,0.077915,0.400385,0,5,3.666496,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1.676167,2.65122,0.271608,1.026236,0.338067,0.005676,0.976004,1.676167,2.494229,0.274647,...,0.475915,0.313005,1.026236,0.362399,0.077915,0.338067,9,196,1.964551,1
1996,1.676167,2.65122,0.271608,1.026236,0.338067,0.005676,0.976004,1.676167,2.494229,0.274647,...,0.475915,0.313005,1.026236,0.362399,0.077915,0.338067,9,197,2.930026,1
1997,1.676167,2.65122,0.271608,1.026236,0.338067,0.005676,0.976004,1.676167,2.494229,0.274647,...,0.475915,0.313005,1.026236,0.362399,0.077915,0.338067,9,198,2.814898,1
1998,1.676167,2.65122,0.271608,1.026236,0.338067,0.005676,0.976004,1.676167,2.494229,0.274647,...,0.475915,0.313005,1.026236,0.362399,0.077915,0.338067,9,199,3.624437,1


In [34]:
basic_angle_group_params

{'beta_v_intercept_mean': np.float64(0.00567571578249649),
 'beta_v_intercept_std': np.float64(0.9760035475648428),
 'beta_a_intercept_mean': np.float64(2.4942285454825748),
 'beta_a_intercept_std': np.float64(0.2746472136387566),
 'beta_z_intercept_mean': np.float64(0.31834788265932645),
 'beta_z_intercept_std': np.float64(0.08336666554648507),
 'beta_t_intercept_mean': np.float64(0.47591511501298656),
 'beta_t_intercept_std': np.float64(0.3130051085314358),
 'beta_theta_intercept_mean': np.float64(0.3623988311070121),
 'beta_theta_intercept_std': np.float64(0.07791506480900795)}

In [35]:
basic_angle_subject_params

{'beta_v_intercept_subject': array([ 0.74339724, -0.66451858, -0.42109058, -0.47756154,  0.52871729,
         0.45555462,  1.00010245,  0.23110114,  0.59938256,  1.67616714]),
 'beta_a_intercept_subject': array([2.72398025, 2.11794733, 2.48927966, 2.50918747, 2.49755695,
        2.83002048, 2.06157363, 2.77538807, 1.7819492 , 2.65122033]),
 'beta_z_intercept_subject': array([0.4762949 , 0.32691378, 0.40648586, 0.36609949, 0.37852609,
        0.36808803, 0.42949309, 0.20347374, 0.36952977, 0.27160753]),
 'beta_t_intercept_subject': array([0.62360766, 0.47255278, 0.25647814, 0.28690505, 0.08567513,
        0.2919534 , 0.22723642, 0.60500744, 0.7739675 , 1.02623645]),
 'beta_theta_intercept_subject': array([0.4003847 , 0.35436501, 0.49375101, 0.35190404, 0.34606763,
        0.38171108, 0.28246842, 0.43559515, 0.51754703, 0.33806673])}

## Basic Model Angle Regression

In [12]:
BasicAngleRegressionGenerator = SSMDataGenerator(model = "angle", 
                                                 n_subjects = 10,
                                                 n_trials = 500)
basic_angle_regression_df, basic_angle_regression_group_params, basic_angle_regression_subject_params = \
    BasicAngleRegressionGenerator.make_hierarchical_regression_dataset(categorical_target = "a",
                                                                       categorical_covariate = "costly_fail_condition",
                                                                       continuous_target = "v",
                                                                       continuous_covariate = "difficulty")

Attaching subject level parameters
Adding trials to attached df
target_parameter a
param_bounds_buffered [0.35, 2.95]
param_bounds_range 2.6
param_bounds_std_max 0.43333333333333335
target_parameter v
param_bounds_buffered [-2.95, 2.95]
param_bounds_range 5.9
param_bounds_std_max 0.9833333333333334
Attaching simulations to df


In [13]:
basic_angle_regression_df.beta_a_costly_fail_condition_subject.unique()

array([-0.03209808,  0.15448509,  0.24749627,  0.16245184,  0.28416612,
        0.06680176,  0.23839145,  0.09697825, -0.011294  ,  0.28011928])

In [14]:
basic_angle_regression_df

Unnamed: 0,v,a,z,t,theta,beta_v_intercept_mean,beta_v_intercept_std,beta_v_intercept_subject,beta_a_intercept_mean,beta_a_intercept_std,...,costly_fail_condition,beta_a_costly_fail_condition_group_mean,beta_a_costly_fail_condition_group_std,beta_a_costly_fail_condition_subject,continuous_difficulty,beta_v_difficulty_group_mean,beta_v_difficulty_group_std,beta_v_difficulty_subject,rt,response
0,2.187921,2.074711,0.434626,1.387398,0.227899,0.898552,0.576321,2.053199,2.205854,0.271286,...,0,0.143583,0.106631,-0.032098,-0.748822,-0.261719,0.100471,-0.179912,2.113653,1
1,2.379267,2.042613,0.434626,1.387398,0.227899,0.898552,0.576321,2.053199,2.205854,0.271286,...,1,0.143583,0.106631,-0.032098,-1.812374,-0.261719,0.100471,-0.179912,2.485609,1
2,2.495176,2.074711,0.434626,1.387398,0.227899,0.898552,0.576321,2.053199,2.205854,0.271286,...,0,0.143583,0.106631,-0.032098,-2.456629,-0.261719,0.100471,-0.179912,1.743468,1
3,2.350250,2.074711,0.434626,1.387398,0.227899,0.898552,0.576321,2.053199,2.205854,0.271286,...,0,0.143583,0.106631,-0.032098,-1.651091,-0.261719,0.100471,-0.179912,1.800189,1
4,2.510770,2.074711,0.434626,1.387398,0.227899,0.898552,0.576321,2.053199,2.205854,0.271286,...,0,0.143583,0.106631,-0.032098,-2.543305,-0.261719,0.100471,-0.179912,1.691984,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,1.595360,2.010042,0.537740,1.374099,0.206319,0.898552,0.576321,1.449787,2.205854,0.271286,...,0,0.143583,0.106631,0.280119,-0.895544,-0.261719,0.100471,-0.162552,2.130083,1
4996,1.839195,2.010042,0.537740,1.374099,0.206319,0.898552,0.576321,1.449787,2.205854,0.271286,...,0,0.143583,0.106631,0.280119,-2.395585,-0.261719,0.100471,-0.162552,1.675543,1
4997,1.383093,2.290161,0.537740,1.374099,0.206319,0.898552,0.576321,1.449787,2.205854,0.271286,...,1,0.143583,0.106631,0.280119,0.410290,-0.261719,0.100471,-0.162552,4.792216,1
4998,1.491031,2.290161,0.537740,1.374099,0.206319,0.898552,0.576321,1.449787,2.205854,0.271286,...,1,0.143583,0.106631,0.280119,-0.253729,-0.261719,0.100471,-0.162552,2.107514,1


In [15]:
basic_angle_regression_group_params

{'beta_v_intercept_mean': np.float64(0.8985523514158159),
 'beta_v_intercept_std': np.float64(0.5763211891271494),
 'beta_a_intercept_mean': np.float64(2.205854013065632),
 'beta_a_intercept_std': np.float64(0.271286356870632),
 'beta_z_intercept_mean': np.float64(0.49804345054987226),
 'beta_z_intercept_std': np.float64(0.07967798291748555),
 'beta_t_intercept_mean': np.float64(1.3495061277400267),
 'beta_t_intercept_std': np.float64(0.26545147091450744),
 'beta_theta_intercept_mean': np.float64(0.22275434569435665),
 'beta_theta_intercept_std': np.float64(0.13394492797522622),
 'beta_a_costly_fail_condition_group_mean': 0.14358275690147387,
 'beta_a_costly_fail_condition_group_std': 0.10663116155199742,
 'beta_v_difficulty_group_mean': -0.2617190284603007,
 'beta_v_difficulty_group_std': 0.10047114619298894}

In [87]:
basic_angle_regression_subject_params

{'beta_v_intercept_subject': array([-0.56839222, -1.2856468 , -0.68706092, -0.73604032, -0.97430782,
        -1.27550914, -1.14417443, -1.36282834, -0.48417237, -0.83099222]),
 'beta_a_intercept_subject': array([1.20111708, 1.13359025, 1.17394317, 1.22399196, 1.21184242,
        1.1691834 , 1.15354416, 1.22549401, 1.06267049, 1.04161651]),
 'beta_z_intercept_subject': array([0.36929481, 0.28087344, 0.29580589, 0.44317167, 0.26062397,
        0.29409115, 0.32237403, 0.40561904, 0.35766977, 0.37542188]),
 'beta_t_intercept_subject': array([1.1990351 , 1.69623232, 1.39154207, 1.31225399, 1.64798422,
        1.51793684, 1.50988996, 1.35375607, 1.12892466, 1.3177839 ]),
 'beta_theta_intercept_subject': array([0.04878168, 0.40993746, 0.23706887, 0.21233059, 0.44028583,
        0.35010803, 0.34286332, 0.18326965, 0.39665851, 0.33360488]),
 'beta_a_cost_fail_condition_subject': array([0.74720431, 0.31186981, 0.31186981, ..., 0.51668533, 0.51668533,
        1.12491106], shape=(5000,)),
 'beta_v

## Basic Model Angle Regression Past Dependence

In [43]:
AngleStickyChoice = SSMDataGenerator(model = "angle",
                                     n_subjects = 10,
                                     n_trials = 500)

basic_angle_regression_df, basic_angle_regression_group_params, basic_angle_regression_subject_params = \
    AngleStickyChoice.make_hierarchical_regression_with_sticky_choice(categorical_target = "a",
                                                                      categorical_covariate = "costly_fail_condition",
                                                                      continuous_target = "v",
                                                                      continuous_covariate = "difficulty",
                                                                      sticky_target = "z")

Attaching subject level parameters
Adding trials to attached df
target_parameter a
param_bounds_buffered [0.35, 2.95]
param_bounds_range 2.6
param_bounds_std_max 0.28888888888888886
target_parameter v
param_bounds_buffered [-2.95, 2.95]
param_bounds_range 5.9
param_bounds_std_max 0.6555555555555556
target_parameter z
param_bounds_buffered [0.15000000000000002, 0.85]
param_bounds_range 0.7
param_bounds_std_max 0.07777777777777777
subject_params {'beta_v_intercept_subject': array([ 0.07083904,  0.0597692 , -0.17537639,  0.29205686, -0.20610663,
       -0.03388467,  0.1755367 ,  0.1101317 ,  0.09443455, -0.256974  ]), 'beta_a_intercept_subject': array([1.5682795 , 1.56302392, 1.5690351 , 1.52425186, 1.51458631,
       1.54180073, 1.51362326, 1.62705313, 1.58458537, 1.66368609]), 'beta_z_intercept_subject': array([0.38210284, 0.23151295, 0.34551051, 0.28657059, 0.34287878,
       0.23329189, 0.38429745, 0.28739246, 0.48855932, 0.48807426]), 'beta_t_intercept_subject': array([1.08590318, 0.

In [44]:
basic_angle_regression_df

Unnamed: 0,v,a,z,t,theta,beta_v_intercept_mean,beta_v_intercept_std,beta_v_intercept_subject,beta_a_intercept_mean,beta_a_intercept_std,...,continuous_difficulty,beta_v_difficulty_group_mean,beta_v_difficulty_group_std,beta_v_difficulty_subject,beta_sticky_z_group_mean,beta_sticky_z_group_std,beta_sticky_z_subject,response_l1,response,rt
0,0.361145,1.670847,0.382103,1.085903,0.817828,0.023818,0.18828,0.070839,1.641428,0.084619,...,-2.662952,-0.117149,0.156556,-0.109017,0.064734,0.05,0.035528,0,1,1.701666
1,-0.036491,1.568280,0.417630,1.085903,0.817828,0.023818,0.18828,0.070839,1.641428,0.084619,...,0.984527,-0.117149,0.156556,-0.109017,0.064734,0.05,0.035528,1,1,1.635126
2,0.226707,1.568280,0.417630,1.085903,0.817828,0.023818,0.18828,0.070839,1.641428,0.084619,...,-1.429764,-0.117149,0.156556,-0.109017,0.064734,0.05,0.035528,1,-1,1.403814
3,0.252512,1.568280,0.346575,1.085903,0.817828,0.023818,0.18828,0.070839,1.641428,0.084619,...,-1.666473,-0.117149,0.156556,-0.109017,0.064734,0.05,0.035528,-1,-1,1.846738
4,0.145374,1.670847,0.346575,1.085903,0.817828,0.023818,0.18828,0.070839,1.641428,0.084619,...,-0.683701,-0.117149,0.156556,-0.109017,0.064734,0.05,0.035528,-1,1,1.957390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,-0.473804,1.663686,0.471543,1.125252,0.516766,0.023818,0.18828,-0.256974,1.641428,0.084619,...,-0.935526,-0.117149,0.156556,0.231773,0.064734,0.05,0.016532,-1,-1,2.744105
4996,-0.619987,1.844522,0.471543,1.125252,0.516766,0.023818,0.18828,-0.256974,1.641428,0.084619,...,-1.566246,-0.117149,0.156556,0.231773,0.064734,0.05,0.016532,-1,1,1.643629
4997,-0.787605,1.663686,0.504606,1.125252,0.516766,0.023818,0.18828,-0.256974,1.641428,0.084619,...,-2.289444,-0.117149,0.156556,0.231773,0.064734,0.05,0.016532,1,-1,1.764809
4998,-0.681949,1.663686,0.471543,1.125252,0.516766,0.023818,0.18828,-0.256974,1.641428,0.084619,...,-1.833581,-0.117149,0.156556,0.231773,0.064734,0.05,0.016532,-1,-1,2.770151


['subject_id',
 'trial',
 'costly_fail_condition',
 'continuous_difficulty',
 'response_l1',
 'response',
 'rt']

In [None]:



#     return df[[col for col in df.columns if \
#  (("beta" not in col) and (col not in model_config["angle"]["params"]))] + \
# ["beta"] + model_config["angle"]["params"]]

# order_columns_to_hssm_standard(basic_angle_regression_df, model_config)

In [None]:
[col for col in basic_angle_regression_df.columns if \
 (("beta" not in col) and (col not in model_config["angle"]["params"]))]

['subject_id',
 'trial',
 'costly_fail_condition',
 'continuous_difficulty',
 'response_l1',
 'response',
 'rt']

In [None]:
[col for col in basic_angle_regression_df.columns if \
 (("beta" not in col) and (col not in model_config["angle"]["params"]))]

['subject_id',
 'trial',
 'costly_fail_condition',
 'continuous_difficulty',
 'response_l1',
 'response',
 'rt']

In [49]:
["beta"] + model_config["angle"]["params"]

['beta', 'v', 'a', 'z', 't', 'theta']

In [None]:
AngleStickyChoice.save_data(filename_base = "angle_sticky_choice",
                            )

In [39]:
basic_angle_regression_df

Unnamed: 0,v,a,z,t,theta,beta_v_intercept_mean,beta_v_intercept_std,beta_v_intercept_subject,beta_a_intercept_mean,beta_a_intercept_std,...,continuous_difficulty,beta_v_difficulty_group_mean,beta_v_difficulty_group_std,beta_v_difficulty_subject,beta_sticky_z_group_mean,beta_sticky_z_group_std,beta_sticky_z_subject,response_l1,response,rt
0,0.316135,1.795942,0.378706,0.992742,0.461533,0.568445,0.170478,0.623215,1.571679,0.26387,...,-1.456839,0.215873,0.193514,0.210785,0.062011,0.05,0.115059,0,1,2.727716
1,0.179236,1.795942,0.493765,0.992742,0.461533,0.568445,0.170478,0.623215,1.571679,0.26387,...,-2.106312,0.215873,0.193514,0.210785,0.062011,0.05,0.115059,1,-1,3.120431
2,0.583559,1.622398,0.263648,0.992742,0.461533,0.568445,0.170478,0.623215,1.571679,0.26387,...,-0.188133,0.215873,0.193514,0.210785,0.062011,0.05,0.115059,-1,-1,1.271058
3,0.353642,1.795942,0.263648,0.992742,0.461533,0.568445,0.170478,0.623215,1.571679,0.26387,...,-1.278903,0.215873,0.193514,0.210785,0.062011,0.05,0.115059,-1,-1,2.385264
4,0.145220,1.795942,0.263648,0.992742,0.461533,0.568445,0.170478,0.623215,1.571679,0.26387,...,-2.267691,0.215873,0.193514,0.210785,0.062011,0.05,0.115059,-1,1,2.630082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.358185,1.462658,0.375271,1.162775,0.350785,0.568445,0.170478,0.339719,1.571679,0.26387,...,-0.641534,0.215873,0.193514,-0.028783,0.062011,0.05,-0.022413,-1,-1,1.941853
4996,0.345848,1.462658,0.375271,1.162775,0.350785,0.568445,0.170478,0.339719,1.571679,0.26387,...,-0.212909,0.215873,0.193514,-0.028783,0.062011,0.05,-0.022413,-1,-1,2.663720
4997,0.422390,1.462658,0.375271,1.162775,0.350785,0.568445,0.170478,0.339719,1.571679,0.26387,...,-2.872155,0.215873,0.193514,-0.028783,0.062011,0.05,-0.022413,-1,-1,2.097954
4998,0.362893,1.462658,0.375271,1.162775,0.350785,0.568445,0.170478,0.339719,1.571679,0.26387,...,-0.805100,0.215873,0.193514,-0.028783,0.062011,0.05,-0.022413,-1,-1,2.866457


## RL?