In [16]:
import ssms
from ssms.config import model_config as model_config
from ssms.basic_simulators.simulator import simulator
import numpy as np
import pandas as pd

In [39]:
model_config["ddm"]

{'name': 'ddm',
 'params': ['v', 'a', 'z', 't'],
 'param_bounds': [[-3.0, 0.3, 0.1, 0.0], [3.0, 2.5, 0.9, 2.0]],
 'boundary_name': 'constant',
 'boundary': <function ssms.basic_simulators.boundary_functions.constant(t: float | numpy.ndarray = 0) -> float | numpy.ndarray>,
 'boundary_params': [],
 'n_params': 4,
 'default_params': [0.0, 1.0, 0.5, 0.001],
 'nchoices': 2,
 'choices': [-1, 1],
 'n_particles': 1,
 'simulator': <cyfunction ddm_flexbound at 0x169a637c0>}

In [None]:
def sample_parameter_space(model):

In [14]:
model_config['ddm']['param_bounds']

[[-3.0, 0.3, 0.1, 0.0], [3.0, 2.5, 0.9, 2.0]]

In [32]:
np.std(np.random.normal(loc = [0, scale = 0.5, size = 1000))

np.float64(0.5004331047940945)

In [128]:
# Generate basic DDM

def get_parameter_sets_hierarchical(model, n_parameter_sets):
    tmp_config = model_config[model]["param_bounds"]
    param_bounds = model_config[model]["param_bounds"]

    param_bounds_mean = (np.array(param_bounds)[1] + np.array(param_bounds)[0]) / 2
    param_bounds_range = np.array(param_bounds)[1] - np.array(param_bounds)[0]
    param_bounds_std_max = param_bounds_range / 6
    
    # 
    max_mean_perturbation = param_bounds_std_max * 2


    # Sample mean perturbation
    param_bounds_mean_perturbation = np.random.uniform(-max_mean_perturbation, max_mean_perturbation)
    group_sample_means = param_bounds_mean + param_bounds_mean_perturbation

    # Sample std perturbation
    group_sample_stds = np.random.uniform(0.05, np.maximum(0.05, param_bounds_std_max))

    # # Sample subject level parameters
    cnt = 0
    while True:
        subject_level_parameters = np.random.normal(loc = group_sample_means,
                                                    scale = group_sample_stds,
                                                    size=(n_parameter_sets, len(model_config[model]["params"])))
        
        # check if all parameters are within the bounds
        if np.all(subject_level_parameters < (np.array(param_bounds[1]) - 0.05)) and np.all(subject_level_parameters > (np.array(param_bounds[0]) + 0.05)):
            break
        elif cnt > 100:
            raise ValueError("Failed to sample parameters within bounds after 100 attempts")
        cnt += 1

    df = pd.DataFrame(subject_level_parameters, columns = model_config[model]["params"])


    # Identify group means and standard deviations in the parameter sets
    for i, param in enumerate(model_config[model]["params"]):
        df[f"{param}_group_mean"] = group_sample_means[i]
        df[f"{param}_group_std"] = group_sample_stds[i]

    df["group_distributions"] = "Normal"
    df["subject_id"] = np.arange(n_parameter_sets).astype(str)
    return df

def get_parameter_sets_uniform(model, n_parameter_sets):
    param_bounds = model_config[model]["param_bounds"]
    df = pd.DataFrame(np.random.uniform(np.array(param_bounds[0] + 0.05), np.array(param_bounds[1] - 0.05), 
                                        size=(n_parameter_sets, len(model_config[model]["params"]))), 
                                        columns = model_config[model]["params"])
    df["group_distributions"] = "Uniform"
    df["subject_id"] = np.arange(n_parameter_sets).astype(str)
    return df


def inject_trials(df, n_trials):
    df_repeated = df.loc[np.repeat(df.index, n_trials)].reset_index(drop = True)
    df_repeated["trial"] = df_repeated.groupby("subject_id").cumcount() + 1
    return df_repeated


def inject_categorical_regression(df, model: str, levels: int, target_parameter: str, categorical_name: str = "x", buffer: float = 0.05):
    param_bounds = model_config[model]["param_bounds"][model_config[model]["params"].index(target_parameter)]
    param_bounds_range = param_bounds[1] - param_bounds[0]
    param_bounds_std_max = param_bounds_range / 6

    n_subjects = len(df["subject_id"].unique())
    n_trials = len(df["trial"].unique())

    # Inject categorical covariate
    cnt = 0
    while True:
        df[f"categorical_{categorical_name}"] = np.random.choice(range(levels),
                                                                size=(n_subjects * n_trials),
                                                                p = np.ones(levels) / levels)
        
        beta_group_mean = np.random.uniform(param_bounds[0] + (param_bounds_range * 0.2),
                                            param_bounds[1] - (param_bounds_range * 0.2), size=levels)
        beta_group_std = np.random.uniform(0.05, np.maximum(0.05, param_bounds_std_max), size = levels)
        beta_subject = np.random.normal(beta_group_mean, beta_group_std, size=(n_subjects, levels))
        
        df[f"beta_{target_parameter}_{categorical_name}_group_mean"] = beta_group_mean[df[f"categorical_{categorical_name}"].values]
        df[f"beta_{target_parameter}_{categorical_name}_group_std"] = beta_group_std[df[f"categorical_{categorical_name}"].values]
        df[target_parameter] = beta_subject[df[f"subject_id"].astype(int), df[f"categorical_{categorical_name}"]]

        if cnt > 100:
            raise ValueError("Failed to sample parameters within bounds after 100 attempts")
        cnt += 1

        if np.all(df[target_parameter] < (np.array(param_bounds[1]) - buffer)) and np.all(df[target_parameter] > (np.array(param_bounds[0]) + buffer)):
            break
    return df

def inject_continuous_regression(df,
                                 model:str,
                                 target_parameter: str,
                                 covariate_names: str,
                                 buffer: float = 0.05):
    param_bounds = model_config[model]["param_bounds"][model_config[model]["params"].index(target_parameter)]
    param_bounds_range = param_bounds[1] - param_bounds[0]
    param_bounds_std_max = param_bounds_range / 6

    n_subjects = len(df["subject_id"].unique())
    n_trials = len(df["trial"].unique())

    cnt = 0
    while True:
        for covariate_name in covariate_names:
            df[f"continuous_{covariate_name}"] = np.random.normal(-1, 1, size=(n_subjects * n_trials))

        beta_intercept_mean = np.random.uniform(param_bounds[0] + (param_bounds_range * 0.3),
                                                                                        param_bounds[1] - (param_bounds_range * 0.3))
        beta_intercept_std = np.random.uniform(0.05, 
                                                                                        np.maximum(0.05, param_bounds_std_max))
        beta_subjects_intercept = np.random.normal(loc = beta_intercept_mean,
                                                scale = beta_intercept_std,
                                                size = n_subjects)
        
        df[f"beta_intercept_{target_parameter}_mean"] = beta_intercept_mean
        df[f"beta_intercept_{target_parameter}_std"] = beta_intercept_std
        df[f"beta_intercept_{target_parameter}_subject"] = beta_subjects_intercept[df[f"subject_id"].astype(int).values]

        
        for i, covariate_name in enumerate(covariate_names):
            beta_group_mean = np.random.uniform(-0.75, 0.75)
            beta_group_std = np.random.uniform(0.05, np.maximum(0.05, param_bounds_std_max))
            
            beta_subjects = np.random.normal(loc = beta_group_mean,
                                                scale = beta_group_std,
                                                size = n_subjects)

            df[f"beta_{target_parameter}_{covariate_name}_group_mean"] = beta_group_mean
            df[f"beta_{target_parameter}_{covariate_name}_group_std"] = beta_group_std
            df[f"beta_{target_parameter}_{covariate_name}_subject"] = beta_subjects[df[f"subject_id"].astype(int).values]

        
        # Compute linear predictor (target_parameter)
        df[target_parameter] = df[f"beta_intercept_{target_parameter}_subject"]
        for covariate_name in covariate_names:
                df[target_parameter] += df[f"beta_{target_parameter}_{covariate_name}_subject"] * df[f"continuous_{covariate_name}"]

        
        if np.all(df[target_parameter] < (np.array(param_bounds[1]) - buffer)) and np.all(df[target_parameter] > (np.array(param_bounds[0]) + buffer)):
            break
        
        if cnt > 100:
            raise ValueError("Failed to sample parameters within bounds after 100 attempts")
        cnt += 1
    return df

def inject_rl_process(df,
                      model:str,
                      target_parameter: str,
                      n_features_continuous: int,
                      n_levels: list[int]):
    

    
    
    # n_features_continuous = n_levels

    # n_levels[i]for i, feature in enumerate(features):

    # # Generate continuous features
    # continuous_features = np.random.normal(0, 1, size=(n_subjects, n_trials, n_features_continuous))


# def inject_continuous_regression(df, target_parameters, )
    



# def inject_trials(df, n_trials):

# def generate_basic_ddm(n_trials, n_subjects):


#     out = {}
#     for i in range(n_subjects):
#         out[i] = simulator(model = "ddm",
#                            n_samples = 250)
        
#     return out


In [122]:
inject_categorical_regression(df_repeated, model = "ddm", levels = 2, target_parameter = "v")

Unnamed: 0,v,a,z,t,v_group_mean,v_group_std,a_group_mean,a_group_std,z_group_mean,z_group_std,t_group_mean,t_group_std,group_distributions,subject_id,trial,categorical_x,beta_v_x_group_mean,beta_v_x_group_std
0,-0.822224,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,0,1,0,-0.915056,0.067462
1,-0.894038,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,0,2,1,-1.026643,0.162658
2,-0.894038,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,0,3,1,-1.026643,0.162658
3,-0.822224,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,0,4,0,-0.915056,0.067462
4,-0.894038,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,0,5,1,-1.026643,0.162658
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-1.001312,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,9,96,1,-1.026643,0.162658
996,-1.001312,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,9,97,1,-1.026643,0.162658
997,-1.001312,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,9,98,1,-1.026643,0.162658
998,-1.001312,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,0.939569,0.136288,Normal,9,99,1,-1.026643,0.162658


In [129]:
inject_continuous_regression(df_repeated, model = "ddm", target_parameter = "v", covariate_names = ["y"])

Unnamed: 0,v,a,z,t,v_group_mean,v_group_std,a_group_mean,a_group_std,z_group_mean,z_group_std,...,beta_v_x_group_std,continuous_y,beta_intercept_v_y_mean,beta_intercept_v_y_std,beta_intercept_v_mean,beta_intercept_v_std,beta_intercept_v_subject,beta_v_y_group_mean,beta_v_y_group_std,beta_v_y_subject
0,-1.974794,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.067462,-2.219938,-1.246681,0.338462,-1.394676,0.095451,-1.498008,0.305429,0.082528,0.214775
1,-2.184325,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,-3.195525,-1.246681,0.338462,-1.394676,0.095451,-1.498008,0.305429,0.082528,0.214775
2,-1.692233,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,-0.904320,-1.246681,0.338462,-1.394676,0.095451,-1.498008,0.305429,0.082528,0.214775
3,-2.058978,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.067462,-2.611900,-1.246681,0.338462,-1.394676,0.095451,-1.498008,0.305429,0.082528,0.214775
4,-1.939745,2.017536,0.477875,0.730873,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,-2.056746,-1.246681,0.338462,-1.394676,0.095451,-1.498008,0.305429,0.082528,0.214775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-1.187441,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,0.058019,-1.246681,0.338462,-1.394676,0.095451,-1.203156,0.305429,0.082528,0.270860
996,-0.867256,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,1.240122,-1.246681,0.338462,-1.394676,0.095451,-1.203156,0.305429,0.082528,0.270860
997,-1.464540,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,-0.965019,-1.246681,0.338462,-1.394676,0.095451,-1.203156,0.305429,0.082528,0.270860
998,-1.468661,1.483215,0.328819,0.786841,1.558753,0.640445,1.693206,0.269878,0.366587,0.114088,...,0.162658,-0.980232,-1.246681,0.338462,-1.394676,0.095451,-1.203156,0.305429,0.082528,0.270860


In [118]:
df_repeated

Unnamed: 0,v,a,z,t,v_group_mean,v_group_std,a_group_mean,a_group_std,z_group_mean,z_group_std,t_group_mean,t_group_std,group_distributions,subject_id,trial
0,0.408419,0.653024,0.726926,1.87690,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,0,1
1,0.408419,0.653024,0.726926,1.87690,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,0,2
2,0.408419,0.653024,0.726926,1.87690,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,0,3
3,0.408419,0.653024,0.726926,1.87690,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,0,4
4,0.408419,0.653024,0.726926,1.87690,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.539778,0.604161,0.526089,1.25701,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,9,96
996,0.539778,0.604161,0.526089,1.25701,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,9,97
997,0.539778,0.604161,0.526089,1.25701,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,9,98
998,0.539778,0.604161,0.526089,1.25701,0.341096,0.297378,0.694203,0.080475,0.622343,0.127846,1.63571,0.222316,Normal,9,99


In [121]:
df_out = get_parameter_sets_hierarchical("ddm", 10)
df_repeated = inject_trials(df_out, 100)

In [98]:
df_out['subject_id'].astype(int)

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
Name: subject_id, dtype: int64

In [8]:
generate_basic_ddm(250, 10)

TypeError: simulator() missing 1 required positional argument: 'theta'

In [3]:
out = generate_basic_ddm(250, 10)

TypeError: 'module' object is not callable

In [None]:
# Generate ANGLE

    simulator(model = "ddm",
              n_samples = 250)

In [None]:
# Generate ANGLE with regression backend


In [None]:
# Generate ANGLE with hierarchy

In [None]:
# Generate ANGLE RL

