In this notebook, we will use implementations of the CMR model to generate free recalls. We will change the existing code for the retrieval competition to take into account the concreteness of each sentence.

## (0) Load in Data

First, we transform our free recall data into format that can be used to train the CMR model

In [3]:
import pandas as pd
from cymr import fit, cmr
import numpy as np

# (0) Load in data
our_df = pd.read_csv("C:/Summer 2023 - DSI-SRP/featureFR/Recall Analysis/data/psifr_al1_sl.csv")


# (1) Create function to a number to each sentence
def assign_item_number(story_list: str, passage: int, item: int):
    """Assign item number based on story_list, passage, and item of the sentence.

    Args:
        story_list (str): e.g. 'A',
        passage (int): e.g. 1
        item (int): e.g. 0
    """
    item_number = 100*(ord(story_list) - ord('A'))  + 20*(passage - 1) + item
    return item_number

assign_item_number(story_list="B",passage=1,item=1)

# (1.1) Add item numbers to dataframe
story_list_col = our_df.story_list.to_list()
passage_col = our_df.passage.to_list()
item = our_df.item.to_list()

item_numbers = [assign_item_number(story_list_col[i], passage_col[i], item[i]) for i in range(len(our_df))]
our_df["item_index"]  = item_numbers
our_df.head(1)

# (1.2) Create formatted dataframe
cmr_columns = ['subject', 'list', 'position', 'trial_type', 'item', 'item_index']
data_lst = []

for index, row in our_df.iterrows():
    if row["study"]:
        study_item = [
            row["subject"],
            row["list"],
            row["input"],
            "study",
            row["item_string"],
            row["item_index"]
        ]
        data_lst.append(study_item)
    if row["recall"]:
        recall_item = [
            row["subject"],
            row["list"],
            int(row["output"]),
            "recall",
            row["item_string"],
            row["item_index"]
        ]
        data_lst.append(recall_item)
        
our_data = pd.DataFrame(data_lst, 
                           columns = cmr_columns).sort_values(["subject", "list", "trial_type", "position"], 
                                                              ascending=[True, True, False, True],
                                                              ignore_index=True)
print(len(our_data))
our_data.head(1)

2226


Unnamed: 0,subject,list,position,trial_type,item,item_index
0,0,0,1,study,Smaller and larger abscesses may require diffe...,0


## (1) Fit Data to Model

In [2]:
# (0) Calculate the probability of not remembering anything
our_data.head()
groupby_columns = ["subject", "list"]
num_empty_recalls = 0 
for trial_index, trial in enumerate(our_data.groupby(groupby_columns)):
    trial = trial[1]
    if len(trial) ==  trial.trial_type.value_counts()["study"]:    # IF participant does not recall anything
        num_empty_recalls += 1

prob_not_recalling_anything = num_empty_recalls / trial_index
prob_not_recalling_anything

import numpy as np 
## (1.1) Create a parameter object (represents the parameters of a CMR model)
param_def = cmr.CMRParameters()
param_def.set_fixed(T=0.1, X1=prob_not_recalling_anything)
param_def.set_free(B_enc=(0.01, .999), 
             B_start=(0.01, .999), 
             B_rec=(0.01, .999), 
             X2=(0.01, .999), 
             Lfc=(0.01, .999), 
             Lcf=(0.01, .999),
             P1=(0.01, .999),
             P2=(0.5, 4))

param_def.set_dependent(Dfc='1 - Lfc', Dcf='1 - Lcf')

## (1.2) Grab all the studied items and associate them with a pattern in the model
n_items = our_data.item_index.max() + 1
loc_patterns = np.eye(n_items)
items = our_data.groupby('item_index')['item'].first().to_numpy()      # List of all the study items, based on their item index
patterns = {'items': items, 'vector': {'loc': loc_patterns}}        # List of items and patterns they correspond to

# (1.3) Set up the weights connecting the two layers of the model
param_def.set_sublayers(f=['task'], c=['task'])
weights = {(('task', 'item'), ('task', 'item')): 'loc'}
param_def.set_weights('fc', weights)
param_def.set_weights('cf', weights)
# print(par)

# (2) Create Model using Parameters
model = cmr.CMR()
results = model.fit_indiv(our_data, par, patterns=patterns, tol=0.05, n_rep=1)
best = fit.get_best_results(results)
# logl = logarithm of the probability of the participant's recall sequence according to the model
# k = number of free parameters
# n = number of data points (tested before found acceptable fit)
best

NameError: name 'par' is not defined

In [None]:
subj_parameters = best.T.to_dict()
subj_parameters

## (2) Simulate Free Recall Using CMR

`cmr.study_list` is a function that presents a bunch of items to a network and the CMR model change its weights in response. Then at the end, the start item is presented again (representing 'tell me everything you can remember about the passage in as much possible')

###### Additional Code

In [150]:
def generate_subject(
    cmr_network, study, recall, param, param_def=None, patterns=None, **kwargs
):
    cmr_network.set_default_options(param_def)      # Set default the parameter options
    n_item = len(study['input'][0])     # = 20
    n_list = len(study['input'])        # = 4 (variable)
    if param_def is None:
        raise ValueError('Must provide a Parameters object.')
    
    n_sub = len(param_def.sublayers['c'])   # = 1
    param = cmr.prepare_list_param(n_item, n_sub, param, param_def)     # Changes the learning rates into an array of 20 values, Add probability of stopping at each index

    item_index = np.arange(len(patterns['items']))  # [0, 1, 2, ..., 79]
    recalls_list = []
    for i in range(n_list):     # for each list
        # access the dynamic parameters needed for this list
        list_param = param.copy()
        list_param = param_def.get_dynamic(list_param, i)              # Parameters should not change with list (do not need to worry about that)

        # simulate study
        item_pool, item_study, item_recall = cmr.get_list_items(                   # item_pool = [20, 21, 22, ..., 39]
            item_index, study, recall, i, param_def.options['scope']               # item_study = [0, 1, ..., 19]
                                                                                # item_recall  = [1,5,3,8,19]
        )
        net = cmr.study_list(param_def, list_param, item_pool, item_study, patterns)   # Creates network that has studied items

        # simulate recall
        recall_index = net.generate_recall(
            ('task', 'item'),
            net.c_sublayers,
            list_param['B_rec'],
            list_param['T'],
            prob_of_ending,
        )

        items = patterns['items'][item_pool]
        recall_items = items[recall_index]
        recalls_list.append(recall_items)
        
    return recalls_list

###### Copied Code

In [None]:
def generate_subject(
    self, study, recall, param, param_def=None, patterns=None, **kwargs
):
    self.set_default_options(param_def)
    n_item = len(study['input'][0])
    n_list = len(study['input'])
    if param_def is None:
        raise ValueError('Must provide a Parameters object.')
    n_sub = len(param_def.sublayers['c'])
    param = prepare_list_param(n_item, n_sub, param, param_def)

    item_index = np.arange(len(patterns['items']))
    recalls_list = []
    for i in range(n_list):
        # access the dynamic parameters needed for this list
        list_param = param.copy()
        list_param = param_def.get_dynamic(list_param, i)

        # simulate study
        item_pool, item_study, item_recall = get_list_items(
            item_index, study, recall, i, param_def.options['scope']
        )
        net = study_list(param_def, list_param, item_pool, item_study, patterns)

        # simulate recall
        if param_def.options['filter_recalls']:
            recall_index = net.generate_recall(
                ('task', 'item'),
                net.c_sublayers,
                list_param['B_rec'],
                list_param['T'],
                list_param['p_stop'],
                filter_recalls=True,
                A1=list_param['A1'],
                A2=list_param['A2'],
            )
        else:
            recall_index = net.generate_recall(
                ('task', 'item'),
                net.c_sublayers,
                list_param['B_rec'],
                list_param['T'],
                list_param['p_stop'],
            )

        items = patterns['items'][item_pool]
        recall_items = items[recall_index]
        recalls_list.append(recall_items)
    return recalls_list

##### Hacking in recall simulation
It looks like the function we want to hack into in order to influence how information is recalled in `net.generate_recall()`

In [155]:
rec_ind = my_cmr_network.get_segment('f', ('task', 'item'))
n_item = rec_ind[1] - rec_ind[0]

AttributeError: 'CMR' object has no attribute 'get_segment'

In [None]:
net.generate_recall(
                ('task', 'item'),
                net.c_sublayers,
                list_param['B_rec'],
                list_param['T'],
                prob_of_ending,
            )
def generate_recall(
        cmr_network,
        segment,
        sublayers,
        B,
        T,
        p_stop,
        amin=0.000001,
        filter_recalls=False,
        A1=None,
        A2=None,
    ):
        """
        Generate a sequence of simulated free recall events.

        Parameters
        ----------
        segment : tuple of str, str
            Sublayer and segment to retrieve items from.

        sublayers : str or list of str
            Sublayer(s) of context to update.

        B : float or numpy.ndarray
            Context updating rate after each recall.

        T : float
            Decision parameter for choice rule.

        p_stop : numpy.array
            Probability of stopping at each output position.

        amin : float, optional
            Minimum activation of each not-yet-recalled item on each
            recall attempt.

        filter_recalls : bool, optional
            If true, potential recalls will be filtered based on match
            to context.

        A1 : float, optional
            Intercept mapping context match to an expit to determine
            recovery probability.

        A2 : float, optional
            Slope mapping match to an expit.

        Returns
        -------
        recalls : list of int
            Indices of items recalled at each output position.
        """
        if not isinstance(sublayers, list):
            sublayers = [sublayers]

        # weights to use for recall (assume fixed during recall)
        rec_ind = cmr_network.get_segment('f', *segment)
        n_item = rec_ind[1] - rec_ind[0]
        # n_sub = len(sublayers)
        # param = prepare_recall_param(n_item, n_sub, B, T, amin)

        # recalls = []
        # exclude = np.zeros(n_item, dtype=np.dtype('i'))
        # item_ind = np.arange(n_item)
        # for i in range(n_item):
        #     # stop recall with some probability
        #     if np.random.rand() < p_stop[i]:
        #         break

        #     # calculate item support
        #     operations.cue_item(
        #         rec_ind[0],
        #         n_item,
        #         self.w_cf_pre,
        #         self.w_cf_exp,
        #         self.w_ff_pre,
        #         self.w_ff_exp,
        #         self.f_in,
        #         self.c,
            #     exclude,
            #     np.asarray(recalls, dtype=np.dtype('i')),
            #     i,
            # )
            # operations.apply_softmax(
            #     rec_ind[0], n_item, self.f_in, exclude, amin, param['T']
            # )

            # # select item for recall proportionate to support
            # support = self.f_in[rec_ind[0] : rec_ind[1]]
            # p_recall = support / np.sum(support)

            # if filter_recalls:
            #     # acceptance probability based on context match
            #     operations.item_match(
            #         rec_ind[0], n_item, self.w_fc_pre, self.w_fc_exp, self.c, self.match
            #     )
            #     operations.apply_expit(rec_ind[0], n_item, self.match, A1, A2)

            #     # recall probabiity is selection + acceptance
            #     p_recall = p_recall * self.match[rec_ind[0] : rec_ind[1]]

            #     # rescale probability to vary between 0 and 1
            #     p_recall = p_recall / np.sum(p_recall)

            # if np.any(np.isnan(p_recall)):
            #     n = np.count_nonzero(exclude == 0)
            #     p_recall[exclude == 0] = 1 / n
            #     p_recall[exclude == 1] = 0
            #     recall = np.random.choice(item_ind, p=p_recall)
            # else:
            #     recall = np.random.choice(item_ind, p=p_recall)
            # recalls.append(recall)
            # exclude[recall] = 1

            # # integrate context associated with the item into context
        #     item = (*segment, recall)
        #     self.integrate(item, sublayers, param['B'][i])
        # return recalls

In [None]:
def generate_recall(
        self,
        segment,
        sublayers,
        B,
        T,
        p_stop,
        amin=0.000001,
        filter_recalls=False,
        A1=None,
        A2=None,
    ):
        """
        Generate a sequence of simulated free recall events.

        Parameters
        ----------
        segment : tuple of str, str
            Sublayer and segment to retrieve items from.

        sublayers : str or list of str
            Sublayer(s) of context to update.

        B : float or numpy.ndarray
            Context updating rate after each recall.

        T : float
            Decision parameter for choice rule.

        p_stop : numpy.array
            Probability of stopping at each output position.

        amin : float, optional
            Minimum activation of each not-yet-recalled item on each
            recall attempt.

        filter_recalls : bool, optional
            If true, potential recalls will be filtered based on match
            to context.

        A1 : float, optional
            Intercept mapping context match to an expit to determine
            recovery probability.

        A2 : float, optional
            Slope mapping match to an expit.

        Returns
        -------
        recalls : list of int
            Indices of items recalled at each output position.
        """
        if not isinstance(sublayers, list):
            sublayers = [sublayers]

        # weights to use for recall (assume fixed during recall)
        rec_ind = self.get_segment('f', *segment)
        n_item = rec_ind[1] - rec_ind[0]
        n_sub = len(sublayers)
        param = prepare_recall_param(n_item, n_sub, B, T, amin)

        recalls = []
        exclude = np.zeros(n_item, dtype=np.dtype('i'))
        item_ind = np.arange(n_item)
        for i in range(n_item):
            # stop recall with some probability
            if np.random.rand() < p_stop[i]:
                break

            # calculate item support
            operations.cue_item(
                rec_ind[0],
                n_item,
                self.w_cf_pre,
                self.w_cf_exp,
                self.w_ff_pre,
                self.w_ff_exp,
                self.f_in,
                self.c,
                exclude,
                np.asarray(recalls, dtype=np.dtype('i')),
                i,
            )
            operations.apply_softmax(
                rec_ind[0], n_item, self.f_in, exclude, amin, param['T']
            )

            # select item for recall proportionate to support
            support = self.f_in[rec_ind[0] : rec_ind[1]]
            p_recall = support / np.sum(support)

            if filter_recalls:
                # acceptance probability based on context match
                operations.item_match(
                    rec_ind[0], n_item, self.w_fc_pre, self.w_fc_exp, self.c, self.match
                )
                operations.apply_expit(rec_ind[0], n_item, self.match, A1, A2)

                # recall probabiity is selection + acceptance
                p_recall = p_recall * self.match[rec_ind[0] : rec_ind[1]]

                # rescale probability to vary between 0 and 1
                p_recall = p_recall / np.sum(p_recall)

            if np.any(np.isnan(p_recall)):
                n = np.count_nonzero(exclude == 0)
                p_recall[exclude == 0] = 1 / n
                p_recall[exclude == 1] = 0
                recall = np.random.choice(item_ind, p=p_recall)
            else:
                recall = np.random.choice(item_ind, p=p_recall)
            recalls.append(recall)
            exclude[recall] = 1

            # integrate context associated with the item into context
            item = (*segment, recall)
            self.integrate(item, sublayers, param['B'][i])
        return recalls

In [None]:
def generate_subject(
    self, study, recall, param, param_def=None, patterns=None, **kwargs
):
    self.set_default_options(param_def)
    n_item = len(study['input'][0])
    n_list = len(study['input'])
    if param_def is None:
        raise ValueError('Must provide a Parameters object.')
    n_sub = len(param_def.sublayers['c'])
    param = prepare_list_param(n_item, n_sub, param, param_def)

    item_index = np.arange(len(patterns['items']))
    recalls_list = []
    for i in range(n_list):
        # access the dynamic parameters needed for this list
        list_param = param.copy()
        list_param = param_def.get_dynamic(list_param, i)

        # simulate study
        item_pool, item_study, item_recall = get_list_items(
            item_index, study, recall, i, param_def.options['scope']
        )
        net = study_list(param_def, list_param, item_pool, item_study, patterns)

        # simulate recall
        if param_def.options['filter_recalls']:
            recall_index = net.generate_recall(
                ('task', 'item'),
                net.c_sublayers,
                list_param['B_rec'],
                list_param['T'],
                list_param['p_stop'],
                filter_recalls=True,
                A1=list_param['A1'],
                A2=list_param['A2'],
            )
        else:
            recall_index = net.generate_recall(
                ('task', 'item'),
                net.c_sublayers,
                list_param['B_rec'],
                list_param['T'],
                list_param['p_stop'],
            )

        items = patterns['items'][item_pool]
        recall_items = items[recall_index]
        recalls_list.append(recall_items)
    return recalls_list

Transform simulated recall data into psifr format to perform nice analyses.