# Parameter Shifting Visualizations
> figures demonstrating how the recency and contiguity effects shift as you shift parameters around in the 2 models

Presuppose fitted parameters from the MurdockOkada1970 dataset. Vary various parameters around their fitted values and plot the resulting simulation data in SPCs, CRPs, PFRs. 

I'll try to work out the code for this in a way that makes it easy to pick the varied parameters and value ranges. 

## Dependencies
We'll need each relevant parameter configuration, helper functions, and models. 

In [None]:
import scipy.io as sio
import numpy as np
import pandas as pd
from psifr import fr
from InstanceCMR import InstanceCMR
from PrototypeCMR import PrototypeCMR
import seaborn as sns
import matplotlib.pyplot as plt

def prepare_okadata(path):
    """
    Prepares data formatted like `data/MurdData_clean.mat` for fitting.

    Loads data from `path` with same format as `data/MurdData_clean.mat` and 
    returns a selected dataset as an array of unique recall trials and a 
    dataframe of unique study and recall events organized according to `psifr`
    specifications.  

    **Arguments**:  
    - path: source of data file  
    - dataset_index: index of the dataset to be extracted from the file

    **Returns**:
    - trials: int64-array where rows identify a unique trial of responses and 
        columns corresponds to a unique recall index.  
    - merged: as a long format table where each row describes one study or 
        recall event.  
    - list_length: length of lists studied in the considered dataset
    """
    
    with open(path) as f:
        oka_data = f.read()

    counter = 0
    trials = []
    subjects = []
    list_length = 20

    for line in oka_data.split('\n'):

        if not line:
            continue

        # build subjects array
        if counter == 0:
            subjects.append(int(line.strip().split('    ')[1]))

        # build trials array
        if counter == 1:

            trial = [int(each) for each in line.strip().split('    ')]
            trial = [each for each in trial if each <= 20]
            already = []
            for each in trial:
                if each not in already:
                    already.append(each)
            trial = already
            
            while len(trial) < 13:
                trial.append(0)

            trials.append(trial)

        # keep track of which row we are on for the given trial
        counter += 1
        if counter == 3:
            counter = 0

    trials = np.array(trials).astype('int64')
    
    data = []
    for trial_index, trial in enumerate(trials):

        # every time the subject changes, reset list_index
        if not data or data[-1][0] != subjects[trial_index]:
            list_index = 0
        list_index += 1

        # add study events
        for i in range(list_length):
            data += [[subjects[trial_index], 
                      list_index, 'study', i+1, i+1]]

        # add recall events
        for recall_index, recall_event in enumerate(trial):
            if recall_event != 0:
                data += [[subjects[trial_index], list_index, 
                          'recall', recall_index+1, recall_event]]

    data = pd.DataFrame(data, columns=[
        'subject', 'list', 'trial_type', 'position', 'item'])
    merged = fr.merge_free_recall(data)
    return trials, merged, list_length

murd_trials, murd_events, murd_length = prepare_okadata('data/mo1970.txt')

murd_events.head()

In [None]:

free_parameters = [
    'encoding_drift_rate',
    'start_drift_rate',
    'recall_drift_rate',
    'shared_support',
    'item_support',
    'learning_rate',
    'primacy_scale',
    'primacy_decay',
    'stop_probability_scale',
    'stop_probability_growth',
    'choice_sensitivity']

parameters = {
    'item_count':murd_length,
    'presentation_count': murd_length,
    'sampling_rule': 0
}

cmr_fit = np.array([ 0.67729029,  0.0789752 ,  0.84475351,  0.32843236,  0.04606376,
        0.25014697,  4.09477771, 35.20917629,  0.03838687,  0.29442883,
        5.03376164])

cmr_params = {**parameters, **{free_parameters[i]:cmr_fit[i] for i in range(len(cmr_fit))}}
cmr_params

In [None]:

free_parameters = [
    'encoding_drift_rate',
    'start_drift_rate',
    'recall_drift_rate',
    'shared_support',
    'item_support',
    'learning_rate',
    'primacy_scale',
    'primacy_decay',
    'stop_probability_scale',
    'stop_probability_growth',
    'feature_sensitivity']

parameters = {
    'item_count':murd_length,
    'presentation_count': murd_length,
    'context_sensitivity': 1,
    'choice_sensitivity': 1,
}

icmr_fit = np.array([7.04157544e-01, 2.22044605e-16, 8.42679777e-01, 6.84111237e-04,
       3.31835533e-02, 1.01371142e-02, 4.34918696e+00, 1.43883032e+00,
       2.98134948e-02, 3.42612961e-01, 2.39278982e+00])

icmr_params = {**parameters, **{free_parameters[i]:icmr_fit[i] for i in range(len(icmr_fit))}}
icmr_params

## Simulation Demo
Let's confirm that I can (efficiently) simulate the model okay and plot an example serial position curve before I try scaling up.

In [None]:

def simulate_data(model, experiment_count, first_recall_item=None):
    """
    Initialize a model with specified parameters and experience sequences and 
    then populate a psifr-formatted dataframe with the outcomes of performing `free recall`. 
    
    **Required model attributes**:
    - item_count: specifies number of items encoded into memory
    - context: vector representing an internal contextual state
    - experience: adding a new trace to the memory model
    - free_recall: function that freely recalls a given number of items or until recall stops
    """
    
    # encode items
    try:
        model.experience(np.eye(model.item_count, model.item_count + 1, 1))
    except ValueError:
        # so we can apply to CMR
        model.experience(np.eye(model.item_count, model.item_count))

    # simulate retrieval for the specified number of times, tracking results in df
    data = []
    for experiment in range(experiment_count):
        data += [[experiment, 0, 'study', i + 1, i] for i in range(model.item_count)]
    for experiment in range(experiment_count):
        if first_recall_item is not None:
            model.force_recall(first_recall_item)
        data += [[experiment, 0, 'recall', i + 1, o] for i, o in enumerate(model.free_recall())]

    data = pd.DataFrame(data, columns=['subject', 'list', 'trial_type', 'position', 'item'])
    merged = fr.merge_free_recall(data)
    
    return merged

model = PrototypeCMR(**cmr_params)
model = InstanceCMR(**icmr_params)
events = simulate_data(model, 1000)

In [None]:
spc = events.query('study').pivot_table(
    index=['subject', 'input'], values=['recall']).reset_index()
spc.reset_index(level=0, inplace=True)
spc.head()

In [None]:
sns.set(style='darkgrid')
g = sns.lineplot(data=spc, x='input', y='recall',  palette='pastel')
plt.xlabel('Study Position')
plt.ylabel('Probability Recall');

It's snappy enough that I don't have to try jit-compiling the data simulation function. Now to scale up.

## Approach
How am I gonna tackle this? Simulate data for each unique parameter configuration and then extract relevant analysis dataframe. Add a new column identified varied values across simulations so I can hue or facet based on the variable. 

Which analyses am I interested in doing? Uh, let's start with encoding drift rate and just one parameter varied per simulation.

In [None]:
from tqdm import tqdm

minimum = 0
maximum = .5
interval = .1

score_ranges = {
    'item_support': np.arange(.01, .1, .02),
}

for model in [InstanceCMR]:

    if model.__name__ == 'PrototypeCMR':
        parameters = cmr_params
    else:
        parameters = icmr_params

    for varied_parameter in score_ranges.keys():
        crps = []
        spcs = []
        pfrs = []

        for parameter_value in tqdm(score_ranges[varied_parameter]):

            # simulate data with this parameter value modified
            sub_params = parameters.copy()
            sub_params[varied_parameter] = parameter_value
            subset = simulate_data(model(**sub_params), 200)

            # accumulate spcs, crps, pfrs
            spc = subset.query('study').pivot_table(
            index=['subject', 'input'], values=['recall'])
            spc[varied_parameter] = parameter_value
            spcs.append(spc)

            crp = fr.lag_crp(subset)
            crp[varied_parameter] = parameter_value
            crps.append(crp)

            pfr = fr.pnr(subset).query('output <= 1')
            pfr[varied_parameter] = parameter_value
            pfrs.append(pfr)

        # concatenate result into a single table
        spc = pd.concat(spcs).reset_index()
        crp = pd.concat(crps).reset_index()
        pfr = pd.concat(pfrs).reset_index()

        sns.set(style='darkgrid')
        fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)

        sns.lineplot(ax=axes[0], data=spc, x='input', y='recall', hue=varied_parameter, ci=None)
        #axes[0].set_xlabel('Study Position')
        #axes[0].set_ylabel('Probability Recall')
        axes[0].set_title('SPC')
        axes[0].legend([], []);
        #plt.show()

        max_lag = 10
        filt_neg = f'{-max_lag} <= lag < 0'
        filt_pos = f'0 < lag <= {max_lag}'

        sns.lineplot(ax=axes[1], data=crp.query(filt_neg), x='lag', y='prob', hue=varied_parameter, ci=None)
        sns.lineplot(ax=axes[1], data=crp.query(filt_pos), x='lag', y='prob', hue=varied_parameter, ci=None)

        #axes[1].set_xlabel('Lag')
        #axes[1].set_ylabel('conditional response probability')
        axes[1].legend(np.round(score_ranges[varied_parameter], 5));
        axes[1].set_title('Lag-CRP')
        #plt.show()

        sns.lineplot(ax=axes[2], data=pfr, x='input', y='prob', hue=varied_parameter, ci=None)
        #axes[2].set_xlabel('Study Position')
        #axes[2].set_ylabel('Probability of First Recall')
        axes[2].set_title('PFR')
        axes[2].legend([], []);

        fig.suptitle(varied_parameter.replace('_', ' ').upper())
        plt.savefig('results/{}_{}.svg'.format(model.__name__, varied_parameter))
        plt.show()