# MSE-Based Model Fitting

We'll demo a way to fit models to datasets with item repetitions based on the mean squared error by comparison to a curve. 

First we specify a loss function. This time, we use the mean squared error between CMR's simulated recall probability by lag function and the curve exhibited in the Lohnas dataset to see if we can find parameters that don't have this distortion.

## Loss Function

In [1]:
# export
from numba import njit
from repfr.Repetition_Effects import recall_probability_by_lag
from repfr.DefaultCMR import DefaultCMR as CMR
import numpy as np

@njit(fastmath=True, nogil=True)
def cmr_rep_mse(
    curve, presentations, list_types, experiment_count, presentation_count,
    encoding_drift_rate, start_drift_rate, recall_drift_rate,
    shared_support, item_support, learning_rate, primacy_scale, primacy_decay, 
    stop_probability_scale, stop_probability_growth, choice_sensitivity):
    """
    Apply organizational analyses to visually compare the behavior of the model 
    with these parameters against specified dataset.
    """
    
    # we can use the same model for list types 1 and 2
    stable_models = [
        CMR(
            presentation_count, presentation_count, encoding_drift_rate, start_drift_rate, 
            recall_drift_rate, shared_support, item_support, learning_rate, 
            primacy_scale, primacy_decay, stop_probability_scale, 
            stop_probability_growth, choice_sensitivity), 
        CMR(
            int(presentation_count/2), presentation_count, encoding_drift_rate, start_drift_rate, 
            recall_drift_rate, shared_support, item_support, learning_rate, 
            primacy_scale, primacy_decay, stop_probability_scale, 
            stop_probability_growth, choice_sensitivity)]
    stable_models[0].experience(np.eye(list_length, list_length))
    stable_models[1].experience(np.eye(int(list_length/2), int(list_length/2))[np.repeat(np.arange(int(list_length/2)), 2)])
    
    total_presented, total_retrieved = np.zeros(5), np.zeros(5)
    
    # generate simulation data from model
    sim = np.zeros((np.shape(presentations)[0] * experiment_count, 
                   np.shape(presentations)[1]))
    sim = sim.astype(np.int_)
    for trial_index, presentation in enumerate(presentations):
        
        item_count = np.max(presentation)+1
        
        if list_types[trial_index] > 2:
            model = CMR(
                item_count, presentation_count, encoding_drift_rate, start_drift_rate, 
                recall_drift_rate, shared_support, item_support, learning_rate, 
                primacy_scale, primacy_decay, stop_probability_scale, 
                stop_probability_growth, choice_sensitivity)

            model.experience(np.eye(item_count, item_count)[presentations[trial_index]])
        else:
            model = stable_models[list_types[trial_index]-1]
        
        # free recall for specified number of experiments
        for experiment in range(experiment_count):
            recalled = model.free_recall() + 1
            sim[trial_index * experiment_count + experiment, :len(recalled)] = recalled
            
    return np.mean(np.square(recall_probability_by_lag(presentations, sim, experiment_count)[-1] - curve))

A wrapper "objective function" helps control which of relevant model parameters are fixed or free during model fitting.

In [2]:
# export

def cmr_rep_mse_objective_function(data_to_fit, presentations, list_types, experiment_count, 
                                   list_length, fixed_parameters, free_parameters):
    """
    Generates and returns an objective function for input to support search 
    through parameter space for ICMR model fit using an optimization function.

    Arguments:  
    - fixed_parameters: dictionary mapping parameter names to values they'll 
        be fixed to during search, overloaded by free_parameters if overlap  
    - free_parameters: list of strings naming parameters for fit during search  
    - data_to_fit: array where rows identify a unique trial of responses and 
        columns corresponds to a unique recall index

    Returns a function that accepts a vector x specifying arbitrary values for 
    free parameters and returns evaluation of icmr_likelihood using the model 
    class, all parameters, and provided data.
    """
    return lambda x: cmr_rep_mse(data_to_fit, presentations, list_types, experiment_count, list_length, **{**fixed_parameters, **{
        free_parameters[i]:x[i] for i in range(len(x))}})

### Demo

Now we test and time the loss function:

In [4]:
from repfr.Data_Preparation import prepare_repdata

trials, events, list_length, presentations, list_types, rep_data, subjects = prepare_repdata(
    'data/repFR.mat')

events.head()

Unnamed: 0,subject,list,item,input,output,study,recall,repeat,intrusion,condition
0,1,1,0,1,1.0,True,True,0,False,4
1,1,1,1,2,2.0,True,True,0,False,4
2,1,1,2,3,3.0,True,True,0,False,4
3,1,1,3,4,4.0,True,True,0,False,4
4,1,1,4,5,5.0,True,True,0,False,4


In [5]:
data_curve = recall_probability_by_lag(presentations[list_types==4], trials[list_types==4])[-1]

parameters = {
    'presentation_count': list_length, 
}

stored_result = [8.44186613e-01, 4.82922372e-01, 9.64152301e-01, 4.66982063e-02,
       2.22044605e-16, 4.11208644e-01, 4.38262927e+00, 3.66252400e-01,
       2.51594034e-02, 1.01416573e-01, 1.14461246e+00]

free_parameters = [
    'encoding_drift_rate',
    'start_drift_rate',
    'recall_drift_rate',
    'shared_support',
    'item_support',
    'learning_rate',
    'primacy_scale',
    'primacy_decay',
    'stop_probability_scale',
    'stop_probability_growth',
    'choice_sensitivity',]

cmr_rep_mse(data_curve, presentations[list_types==4], list_types[list_types==4], experiment_count=10, 
    **{**parameters, **{free_parameters[i]:stored_result[i] for i in range(len(stored_result))}})

0.002115849255518534

In [6]:
%%timeit

cmr_rep_mse(data_curve, presentations[list_types==4], list_types[list_types==4], experiment_count=10, 
    **{**parameters, **{free_parameters[i]:stored_result[i] for i in range(len(stored_result))}})

293 ms ± 4.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Results

In [None]:
from scipy.optimize import differential_evolution
import numpy as np

free_parameters = [
    'encoding_drift_rate',
    'start_drift_rate',
    'recall_drift_rate',
    'shared_support',
    'item_support',
    'learning_rate',
    'primacy_scale',
    'primacy_decay',
    'stop_probability_scale',
    'stop_probability_growth',
    'choice_sensitivity',]

parameters = {
}

lb = np.finfo(float).eps
ub = 1-np.finfo(float).eps

bounds = [
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, 100),
    (lb, 100),
    (lb, ub),
    (lb, 10),
    (lb, 10)
]

# cost function to be minimized
# ours scales inversely with the probability that the data could have been 
# generated using the specified parameters and our model
cost_function = cmr_rep_mse_objective_function(
    data_curve, presentations[list_types==4], list_types[list_types==4], 1, list_length, parameters, free_parameters)

result = differential_evolution(cost_function, bounds, disp=True, maxiter=70)
print(result)

```
     fun: 5.1299521657655926e-05
 message: 'Maximum number of iterations has been exceeded.'
    nfev: 11967
     nit: 70
 success: False
       x: array([8.53688927e-01, 3.91315203e-01, 1.60218200e-01, 1.34342503e-03,
       6.74953399e-01, 3.88335343e-01, 9.12294910e+00, 7.30879498e+01,
       2.46238233e-03, 2.73722040e-01, 3.47056155e+00])
```