In [1]:
# default_exp analyses

# Study Phase Retrieval Effect
They considered transitions between items following a shared repeated item. They calculated the proportion of those items recalled in $S_j = {j + 1, j + 2}$ of which CMR then recalled an item in the set $S_i = {i + 1, i + 2}$. They also calculated the proportion of recalls $S_i$ of which CMR then transitioned to an item in the set $S_j$. They calculated the proportion of transitions for each of lags $j - i >= 4$, and represented the mean percent of transitions across these lags. 

We'll extend this analysis by performing a masked, reference-shifted lag-crp analysis. We'll only track transitions from neighbors of repeatedly-presented items. And we'll shift our lag-reference from the these items to the alternative position of the repeatedly-presented items they transitioned from.

To estimate the proportion of transitions that CMR would make at these lags in the absence of repeated items, they considered transitions in control lists matched to the same serial positions considered in the mixed lists. They matched these serial positions to 100 random shuffles of the control lists, and took the mean across the reshuffled datasets. We'll develop that functionality elsewhere.

## Functions
We start with a specification of `picky_lag_crp` that supports item repetitions. Then we need to write a mask that selects transitions from items

In [2]:
from repetition_cmr.analyses import recall_by_all_study_positions

In [3]:
# export

from numba import njit
import numpy as np

#@njit(nogil=True)
def lag_crp(trials, presentations, max_repeats=2, mask=None, reference_positions=None):

    if mask is None:
        mask = np.ones(np.shape(trials), dtype=np.bool_)
    
    list_length = len(presentations[0])
    lag_range = list_length - 1
    total_actual_lags = np.zeros(lag_range * 2 + 1)
    total_possible_lags = np.zeros(lag_range * 2 + 1)
    terminus = np.sum(trials != 0, axis=1) # number of recalls per trial
    recall_by_study_position = recall_by_all_study_positions(trials, presentations, max_repeats)
    if reference_positions is None:
        reference_positions = recall_by_study_position
    
    for trial_index in range(len(trials)):
        
        previous_item = 0
        item_count = np.max(presentations[trial_index]) + 1
        possible_items = np.arange(item_count) # initial pool of possible recalls, 1-indexed
        possible_positions = np.zeros((item_count, max_repeats), dtype=np.int32)
        
        # we track possible positions using presentations and alt_presentations
        for item in range(item_count):
            pos = np.nonzero(presentations[trial_index] == item)[0] + 1
            possible_positions[item, :len(pos)] = pos
            
        for recall_index in range(terminus[trial_index]):
            
            current_item = presentations[trial_index][trials[trial_index, recall_index]-1]
            
            # track possible and actual lags
            if (recall_index > 0) and (mask[trial_index, recall_index]) and (
                reference_positions[0, trial_index, recall_index-1] != 0):
                
                # item indices don't help track lags anymore
                # so more complex calculation needed to identify possible lags given previous item
                current_index = np.nonzero(possible_items==current_item)[0]
                possible_lags = np.zeros((len(recall_by_study_position) ** 2, len(possible_items)), dtype=np.int32)

                index = 0
                for x in range(len(recall_by_study_position)):
                    for y in range(len(reference_positions)):
                        if reference_positions[y, trial_index, recall_index-1] > 0:
                        
                            possible_lags[index] = possible_positions[
                                possible_items, x] - reference_positions[y, trial_index, recall_index-1]
                            
                            # if tracked position is 0, then we don't actually want to count it in our lags
                            possible_lags[index][possible_positions[possible_items, x] == 0] = 0
                        
                        index += 1

                possible_lags += lag_range
                total_actual_lags[possible_lags[:, current_index].flatten()] += 1
                total_possible_lags[possible_lags.flatten()] += 1
                        

            # update pool to exclude recalled item (updated to still identify 1-indexed item)
            previous_item = current_item
            possible_items = possible_items[possible_items != previous_item]
                    
    # small correction to avoid nans and commit to excluding multiply-tracked single presentations 
    total_actual_lags[lag_range] = 0
    total_possible_lags[total_actual_lags==0] += 1
    
    return total_actual_lags/total_possible_lags

In [67]:
# mask depends on presentation order: 
# we mask all recalls with serial positions immediately after that of an item repetition
# first we need to identify the relevant serial positions
trials_shape = np.shape(trials)
mask = np.zeros(trials_shape, dtype=np.bool_)
reference = np.zeros((2, trials_shape[0], trials_shape[1]), dtype=np.int32)

for trial_index, presentation in enumerate(presentations):

    masked_items = []
    reference_positions = []
    for current_index, current_item in enumerate(presentation[:-1]):

        # identify each time current_item occurs in presentation and skip if count is 1
        positions = np.nonzero(presentation == current_item)[0]
        if len(positions) == 1:
            continue

        # also skip if lag between positions is <4
        if positions[1] - positions[0] < 4:
            continue

        # identify each time successive_item occurs in presentation and skip if count is 2
        if len(np.nonzero(presentation == presentation[current_index+1])[0]) > 1:
            continue

        # recall of item at succesive serial position should be in mask 
        masked_items.append(current_index+2)

        # reference position should be the position that's distinct from current_index
        reference_positions.append(positions[positions != current_index][0])

        #print(current_item)

    for i in range(len(masked_items)):
        mask[trial_index, trials[trial_index]==masked_items[i]] = True
        reference[0, trial_index, trials[trial_index]==masked_items[i]] = reference_positions[i]

In [54]:
trials[0][mask[0]]

array([11, 20])

In [55]:
reference[:, 0]

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0, 18,  0,  0,  0,  0,  0,  9,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0]])

In [56]:
trials[0]

array([ 1,  2,  3,  4,  5,  6,  7,  9, 10, 11, 17, 14, 12, 15, 25, 20, 28,
       30, 39, 38, 37, 18,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0])

In [57]:
recall_by_study_position[:, 0]

array([[ 1,  2,  3,  4,  5,  6,  7,  9, 10, 11, 17, 14, 12, 15, 25, 20,
        28, 30, 39, 38, 37, 18,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 19,  0,  0,  0, 13,  0, 31,  0,
        34,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0]])

In [65]:
presentations[0]

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11, 12, 13, 14, 15,
       16,  9, 17, 18, 19, 18, 20, 21, 22, 19, 23, 24, 25, 21, 26, 27, 23,
       28, 29, 30, 31, 32, 33])

In [64]:
presentations[0][27]

23

## Howard & Kahana (2005) Dataset
> Kahana, M. J., & Howard, M. W. (2005). Spacing and lag effects in free recall of pure lists. Psychonomic Bulletin & Review, 12(1), 159-164.

In [6]:
from repetition_cmr.datasets import prepare_howakaha05_data

trials, events, list_length, presentations, list_types, rep_data, subjects = prepare_howakaha05_data(
    '../../data/HowaKaha05.dat')

for condition in [0, 2, 1]:
    print(lag_crp(trials[list_types==condition], presentations[list_types==condition], max_repeats=3))
    print()

[0.10091743 0.10091743 0.10091743 0.12621359 0.12621359 0.07462687
 0.09539474 0.09539474 0.04827586 0.06615776 0.06615776 0.03217158
 0.06157113 0.06157113 0.03872437 0.08468468 0.08468468 0.05859375
 0.07443366 0.07443366 0.02811951 0.04796512 0.04796512 0.02728732
 0.05797101 0.05797101 0.03896104 0.05447942 0.05447942 0.02445652
 0.05524239 0.05524239 0.03817734 0.05789474 0.05789474 0.02813599
 0.04433498 0.04433498 0.02330744 0.04413146 0.04413146 0.02705515
 0.03954306 0.03954306 0.01851852 0.04620462 0.04620462 0.03345389
 0.04898911 0.04898911 0.02226027 0.04477612 0.04477612 0.02791461
 0.04464918 0.04464918 0.02267396 0.04155496 0.04155496 0.02453532
 0.05303514 0.05303514 0.03573981 0.05616606 0.05616606 0.02876712
 0.06323877 0.06323877 0.04259502 0.06480955 0.06480955 0.03099304
 0.05706522 0.05706522 0.03379602 0.06077922 0.06077922 0.03565167
 0.06896552 0.06896552 0.04433221 0.09163534 0.09163534 0.06280455
 0.13422819 0.13422819 0.09860665 0.09860665 0.09860665 0.
 0.

## Lohnas & Kahana (2014) Dataset
> Siegel, L. L., & Kahana, M. J. (2014). A retrieved context account of spacing and repetition effects in free recall. Journal of Experimental Psychology: Learning, Memory, and Cognition, 40(3), 755.

In [4]:
from compmemlearn.datasets import prepare_lohnas2014_data

trials, events, list_length, presentations, list_types, rep_data, subjects = prepare_lohnas2014_data(
    '../../data/repFR.mat')

for condition in [1, 2, 3, 4]:
    print(lag_crp(trials[list_types==condition], presentations[list_types==condition], 2))
    print()

%timeit lag_crp(trials[list_types==condition], presentations[list_types==condition], 2)

[0.10416667 0.04166667 0.02147239 0.0228833  0.01654412 0.01880878
 0.01775956 0.02140309 0.02038627 0.01629914 0.02       0.01670644
 0.01305684 0.01468531 0.01878238 0.017042   0.01747234 0.01930502
 0.01536831 0.01510574 0.01016949 0.01390176 0.01393885 0.01690507
 0.01760131 0.02655569 0.01942117 0.0177712  0.01956599 0.02229081
 0.02253148 0.02098128 0.02182163 0.02749459 0.02875974 0.03524229
 0.05237689 0.06995769 0.1551922  0.         0.25058603 0.09483765
 0.06154943 0.04214095 0.02825979 0.02849873 0.02509804 0.02086677
 0.02140078 0.02180149 0.01564345 0.01611922 0.01162425 0.01868557
 0.01537433 0.01518288 0.01601423 0.01365818 0.01031716 0.01069731
 0.01161344 0.01133391 0.01545455 0.01106833 0.00858152 0.01419558
 0.01462317 0.013261   0.014295   0.01597222 0.00988593 0.01417848
 0.01458523 0.014      0.0046729  0.01536313 0.00696864 0.0049505
 0.00452489]

[0.09734513 0.09734513 0.08050847 0.03524229 0.07859079 0.06069364
 0.07889546 0.04103672 0.07066052 0.04576271 0.06