In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline 

import glam

%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters


# Systematic parameter recovery (ADVI)

Here, we perform a structured and systematic parameter recovery study. We use the original model variant with 4 parameters ($v$, $\gamma$, $\sigma$, $\tau$). For each parameter, we define a range of sensible values and select a `low`, `medium` and `high` value, based on the individual parameter estimates we obtained by hierarchically fitting the GLAM to the data from Krajbich & Rangel (2011) in Thomas, Molter, Krajbich, Heekeren & Mohr (submitted).

We then pick one parameter, one constellation of values of the remaining three parameters (e.g., we pick $v$, and set $\gamma$, $\sigma$ and $\tau$ to their `low` values). Then, for 10 different values of $v$ along its range, we generate a synthetic dataset for a single participant with N=100 trials, random item values and gaze, fit the model and record the generating and estimated parameters. The procedure is repeated for all possible constellation of other parameter values, and other parameters, resulting in 1080 ($10 \times 4 \times 3^3$) simulated and fitted datasets total.

## Define parameter ranges from 2011 estimates

In [2]:
bounds = dict(v=[0.000015, 0.00015],
              gamma=[-1, 1],
              s=[0.004, 0.011],
              tau=[0.1, 1.25])

ranges = {parameter: np.linspace(*bounds[parameter], 10)
          for parameter in ['v', 'gamma', 's', 'tau']}
ranges

{'v': array([1.50e-05, 3.00e-05, 4.50e-05, 6.00e-05, 7.50e-05, 9.00e-05,
        1.05e-04, 1.20e-04, 1.35e-04, 1.50e-04]),
 'gamma': array([-1.        , -0.77777778, -0.55555556, -0.33333333, -0.11111111,
         0.11111111,  0.33333333,  0.55555556,  0.77777778,  1.        ]),
 's': array([0.004     , 0.00477778, 0.00555556, 0.00633333, 0.00711111,
        0.00788889, 0.00866667, 0.00944444, 0.01022222, 0.011     ]),
 'tau': array([0.1       , 0.22777778, 0.35555556, 0.48333333, 0.61111111,
        0.73888889, 0.86666667, 0.99444444, 1.12222222, 1.25      ])}

In [3]:
# Determine low, medium, high values as 10th, 50th and 90th percentile of these ranges
values = {parameter: np.percentile(np.linspace(*bounds[parameter], 100), [10, 50, 90])
          for parameter in ['v', 'gamma', 's', 'tau']}
values

{'v': array([2.850e-05, 8.250e-05, 1.365e-04]),
 'gamma': array([-8.00000000e-01,  5.55111512e-17,  8.00000000e-01]),
 's': array([0.0047, 0.0075, 0.0103]),
 'tau': array([0.215, 0.675, 1.135])}

# Recover a single GLAM

In [4]:
def recover_glam(parameters, n_trials=100, n_items=3, seed=None):

    G = glam.GLAM()

    G.simulate_group(kind='individual',
                     n_individuals=1,
                     n_trials=n_trials, 
                     n_items=n_items,
                     parameters=parameters,
                     error_weight=0,
                     value_range=[1, 10],
                     seed=seed)
    G.make_model('individual', t0_val=0, error_weight=0)
    G.fit(method='VI', n_samples=2000, n_vi=50000, progressbar=False)
    
    return G

In [None]:
parameters = dict(v=[ranges['v'][0]],
                  gamma=[values['gamma'][0]],
                  s=[values['s'][0]],
                  tau=[values['tau'][0]],
                  t0=[0])

result = recover_glam(parameters=parameters, seed=1)

Generating single subject models for 1 subjects...


INFO (theano.gof.compilelock): Waiting for existing lock by process '35413' (I am process '36753')
INFO (theano.gof.compilelock): To manually release the lock, delete /Users/felixmolter/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.6.5-64/lock_dir
INFO (theano.gof.compilelock): Waiting for existing lock by process '35314' (I am process '36753')
INFO (theano.gof.compilelock): To manually release the lock, delete /Users/felixmolter/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.6.5-64/lock_dir
INFO (theano.gof.compilelock): Waiting for existing lock by process '35414' (I am process '36753')
INFO (theano.gof.compilelock): To manually release the lock, delete /Users/felixmolter/.theano/compiledir_Darwin-17.7.0-x86_64-i386-64bit-i386-3.6.5-64/lock_dir


# Run over multiple parameter sets

In [None]:
parameter_info = {parameter: dict(low=values[parameter][0],
                                  medium=values[parameter][1],
                                  high=values[parameter][2],
                                  variable=ranges[parameter])
                  for parameter in ['v', 'gamma', 's', 'tau']}

In [None]:
def generate_parameter_sets(parameter_info):

    from itertools import product
    
    levels = ['low', 'medium', 'high']
    parameters = ['v', 'gamma', 's', 'tau']
    
    constellations = list(product(levels, levels, levels))

    index = 0
    
    for variable_parameter in parameter_info.keys():
        
        others = [p for p in parameters
                  if p != variable_parameter]
        
        for other_constellation in constellations:

            for variable_value in parameter_info[variable_parameter]['variable']:

                level_set = dict()
                level_set[variable_parameter] = 'variable'
                parameter_set = dict(t0=[0])
                parameter_set[variable_parameter] = [variable_value]

                for o, other in enumerate(others):
                    level_set[other] = other_constellation[o]
                    parameter_set[other] = [parameter_info[other][other_constellation[o]]]
                    
                index += 1
                yield index, level_set, parameter_set

In [None]:
all_generated_inputs = list(generate_parameter_sets(parameter_info=parameter_info))
all_generated_inputs[:2]

In [None]:
!mkdir results/parameter_recovery
!mkdir results/parameter_recovery/advi
!mkdir results/parameter_recovery/advi/partial

In [None]:
from os.path import join, isfile

In [None]:
def wrap_recover_glam(generated_input):
    
    from pymc3 import summary
    
    index, level_set, parameter_set = generated_input

    # check if already done:
    filename = join('results', 'parameter_recovery', 'advi', 'partial', 'parameter_recovery_advi_part{}.csv'.format(index))
    if isfile(filename):
        print("Found previous result for index {}. Skipping...".format(index))
        return
    else:
        # perform actual recovery
        result = recover_glam(parameter_set, n_trials=100, n_items=3, seed=index)

        # assemble output dataframe row
        output = pd.DataFrame(dict(index=[index],
                                   v_level=[level_set['v']],
                                   gamma_level=[level_set['gamma']],
                                   s_level=[level_set['s']],
                                   tau_level=[level_set['tau']],
                                   v_gen=parameter_set['v'],
                                   gamma_gen=parameter_set['gamma'],
                                   s_gen=parameter_set['s'],
                                   tau_gen=parameter_set['tau'],
                                   v_rec=result.estimates['v'][0],
                                   gamma_rec=result.estimates['gamma'][0],
                                   s_rec=result.estimates['s'][0],
                                   tau_rec=result.estimates['tau'][0],
                                   converged=[np.nan]))

        output.to_csv(filename,
                      index=False)

        return output

In [None]:
wrap_recover_glam(all_generated_inputs[0])

## Run the recovery in parallel

In [None]:
from multiprocessing import Pool

n_cores = 4
p = Pool(n_cores)

output = p.map(wrap_recover_glam, generate_parameter_sets(parameter_info))

### Combine outputs

In [None]:
from os import listdir

In [None]:
filenames = [file for file in listdir(join('results', 'parameter_recovery', 'advi', 'partial'))
             if file.endswith('.csv')]

partial_recoveries = []

for file in filenames:
    partial = pd.read_csv(join('results', 'parameter_recovery', 'advi', 'partial', filename))
    partial_recoveries.append(partial)

parameter_recovery = pd.concat(partial_recoveries).sort_values(index).reset_index(drop=True)
del partial_recoveries
parameter_recovery.to_csv(join('results', 'parameter_recovery', 'advi', 'parameter_recovery_advi.csv'))
parameter_recovery.head()

# Plot results

The above analysis was performed on a different multicore machine. We therefore load its results here: