In [1]:
# hide
# default_exp model_fitting

from nbdev.showdoc import *

# InstanceCMR Repetitions Benchmark
> Pipeline for parameter fitting updated to respect the possibility of item repetition during encoding

## Configuring the Parameter Search
This time, we specify a log_likelihood function that accepts a presentations parameter indicating when an item was presented for encoding. This adds some runtime to the cost function, since a unique model is instantiated for each trial given that encoding patterns can vary between them.

In [10]:
# export
# hide

import numpy as np
from numba import njit
from instance_cmr.models import InstanceCMR

@njit(fastmath=True, nogil=True)
def icmr_rep_likelihood(
        trials, presentations, list_types, list_length, encoding_drift_rate, start_drift_rate, 
        recall_drift_rate, shared_support, item_support, learning_rate, 
        primacy_scale, primacy_decay, stop_probability_scale, 
        stop_probability_growth, choice_sensitivity, context_sensitivity, feature_sensitivity):
    """
    Generalized cost function for fitting the InstanceCMR model optimized 
    using the numba library.
    
    Output scales inversely with the likelihood that the model and specified 
    parameters would generate the specified trials. For model fitting, is 
    usually wrapped in another function that fixes and frees parameters for 
    optimization.

    **Arguments**:
    - data_to_fit: typed list of int64-arrays where rows identify a unique 
        trial of responses and columns corresponds to a unique recall index.  
    - A configuration for each parameter of `InstanceCMR` as delineated in 
        `Formal Specification`.

    **Returns** the negative sum of log-likelihoods across specified trials 
    conditional on the specified parameters and the mechanisms of InstanceCMR.
    """

    likelihood = np.ones((len(trials), list_length))
    
    # we can use the same model for list types 1 and 2
    stable_models = [InstanceCMR(
            list_length, list_length, encoding_drift_rate, start_drift_rate, 
            recall_drift_rate, shared_support, item_support, learning_rate, 
            primacy_scale, primacy_decay, stop_probability_scale, 
            stop_probability_growth, choice_sensitivity, context_sensitivity, feature_sensitivity), 
                    InstanceCMR(
            int(list_length/2), list_length, encoding_drift_rate, start_drift_rate, 
            recall_drift_rate, shared_support, item_support, learning_rate, 
            primacy_scale, primacy_decay, stop_probability_scale, 
            stop_probability_growth, choice_sensitivity, context_sensitivity, feature_sensitivity)]
    stable_models[0].experience(np.eye(list_length, list_length + 1, 1))
    stable_models[1].experience(np.eye(int(list_length/2), int(list_length/2) + 1, 1)[np.repeat(np.arange(int(list_length/2)), 2)])

    for trial_index in range(len(trials)):

        item_count = np.max(presentations[trial_index])+1
        
        if list_types[trial_index] > 2:
            model = InstanceCMR(
                item_count, list_length, encoding_drift_rate, start_drift_rate, 
                recall_drift_rate, shared_support, item_support, learning_rate, 
                primacy_scale, primacy_decay, stop_probability_scale, 
                stop_probability_growth, choice_sensitivity, context_sensitivity, feature_sensitivity)

            model.experience(np.eye(item_count, item_count + 1, 1)[presentations[trial_index]])
        else:
            model = stable_models[list_types[trial_index]-1]
            
        trial = trials[trial_index]

        model.force_recall()
        for recall_index in range(len(trial) + 1):

            # identify index of item recalled; if zero then recall is over
            if recall_index == len(trial) and len(trial) < item_count:
                recall = 0
            elif trial[recall_index] == 0:
                recall = 0
            else:
                recall = presentations[trial_index][trial[recall_index]-1] + 1

            # store probability of and simulate recalling item with this index
            activation_cue = np.hstack(
                    (np.zeros(model.item_count + 1), model.context))
            likelihood[trial_index, recall_index] = \
                model.outcome_probabilities(activation_cue)[recall]

            if recall == 0:
                break
            model.force_recall(recall)

        # reset model to its pre-retrieval (but post-encoding) state
        model.force_recall(0)

    return -np.sum(np.log(likelihood))

def icmr_rep_objective_function(data_to_fit, presentations, list_types, list_length, fixed_parameters, free_parameters):
    """
    Generates and returns an objective function for input to support search 
    through parameter space for ICMR model fit using an optimization function.

    Arguments:  
    - fixed_parameters: dictionary mapping parameter names to values they'll 
        be fixed to during search, overloaded by free_parameters if overlap  
    - free_parameters: list of strings naming parameters for fit during search  
    - data_to_fit: array where rows identify a unique trial of responses and 
        columns corresponds to a unique recall index

    Returns a function that accepts a vector x specifying arbitrary values for 
    free parameters and returns evaluation of icmr_likelihood using the model 
    class, all parameters, and provided data.
    """
    return lambda x: icmr_rep_likelihood(data_to_fit, presentations, list_types, list_length, **{**fixed_parameters, **{
        free_parameters[i]:x[i] for i in range(len(x))}})

## Single-Subject Demo

In [12]:
from instance_cmr.datasets import *

trials, events, list_length, presentations, list_types, rep_data, subjects = prepare_repdata(
    '../../data/repFR.mat')

events.head()

Unnamed: 0,subject,list,item,input,output,study,recall,repeat,intrusion,condition
0,1,1,0,1,1.0,True,True,0,False,4
1,1,1,1,2,2.0,True,True,0,False,4
2,1,1,2,3,3.0,True,True,0,False,4
3,1,1,3,4,4.0,True,True,0,False,4
4,1,1,4,5,5.0,True,True,0,False,4


In [14]:
lb = np.finfo(float).eps
hand_fit_parameters = {
    'encoding_drift_rate': .8,
    'start_drift_rate': .7,
    'recall_drift_rate': .8,
    'shared_support': 0.01,
    'item_support': 1.0,
    'learning_rate': .3,
    'primacy_scale': 1,
    'primacy_decay': 1,
    'stop_probability_scale': 0.01,
    'stop_probability_growth': 0.3,
    'choice_sensitivity': 2,
    'context_sensitivity': 1,
    'feature_sensitivity': 1
}

icmr_rep_likelihood(trials[:48], presentations[:48], list_types[:48], list_length, **hand_fit_parameters)

3293.0806120089983

In [6]:
%%timeit
icmr_rep_likelihood(trials[:48], presentations[:48], list_types[:48], list_length, **hand_fit_parameters)

34 ms ± 645 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


```
17 ms ± 60.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
```

Now we perform the single subject fitting...

In [16]:
from scipy.optimize import differential_evolution
import numpy as np

free_parameters = [
    'encoding_drift_rate',
    'start_drift_rate',
    'recall_drift_rate',
    'shared_support',
    'item_support',
    'learning_rate',
    'primacy_scale',
    'primacy_decay',
    'stop_probability_scale',
    'stop_probability_growth',
    'context_sensitivity']

lb = np.finfo(float).eps
ub = 1-np.finfo(float).eps

bounds = [
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, ub),
    (lb, 100),
    (lb, 100),
    (lb, ub),
    (lb, 10),
    (lb, 10)
]

# cost function to be minimized
# ours scales inversely with the probability that the data could have been 
# generated using the specified parameters and our model
cost_function = icmr_rep_objective_function(
    trials[:48], presentations[:48], list_types[:48], list_length, {'choice_sensitivity': 1, 'feature_sensitivity': 1}, free_parameters)

result = differential_evolution(cost_function, bounds, disp=True)
print(result)

differential_evolution step 1: f(x)= 3017.03
differential_evolution step 2: f(x)= 3017.03
differential_evolution step 3: f(x)= 3017.03
differential_evolution step 4: f(x)= 3017.03
differential_evolution step 5: f(x)= 2684.91
differential_evolution step 6: f(x)= 2684.91
differential_evolution step 7: f(x)= 2618.1
differential_evolution step 8: f(x)= 2618.1
differential_evolution step 9: f(x)= 2618.1
differential_evolution step 10: f(x)= 2600.4
differential_evolution step 11: f(x)= 2595.87
differential_evolution step 12: f(x)= 2591.16
differential_evolution step 13: f(x)= 2573.26
differential_evolution step 14: f(x)= 2573.26
differential_evolution step 15: f(x)= 2573.26
differential_evolution step 16: f(x)= 2573.26
differential_evolution step 17: f(x)= 2545.82
differential_evolution step 18: f(x)= 2545.82
differential_evolution step 19: f(x)= 2545.82
differential_evolution step 20: f(x)= 2519.43
differential_evolution step 21: f(x)= 2406.36
differential_evolution step 22: f(x)= 2379.63
d

For the first subject in our murd_trials data structure, the function runs pretty slowly compared to my primary baseline code and returns an output with the following attributes:

```
     fun: 1896.0488460602892
     jac: array([-1.37909932e+02, -6.49760292e+01,  3.80207664e+01,  3.89726438e+01,
       -1.08398125e+00,  4.96122311e+01, -7.76151406e+00,  7.50098939e+00,
        1.87378446e+03,  5.77529363e+01, -6.19131701e+00])
 message: 'Optimization terminated successfully.'
    nfev: 7656
     nit: 43
 success: True
       x: array([7.38333200e-01, 9.11533659e-01, 9.45503715e-01, 1.59987819e-01,
       5.26298280e-01, 3.21809937e-01, 8.64043185e-01, 1.10011958e+00,
       1.24608414e-03, 2.67508370e-01, 2.93468878e+00])
```

The `x` attribute of the result object contains the best parameter configuration found, while the `fun` attribute represents the overall cost of the configuration as computed with our specified cost function. 