# Recovery of parameter correlations

Here we evaluate whether we can recover any correlations between the $k$ and $s$ parameters in simulated participants who discount according to the modified Rachlin discount function (see [Vincent, & Stewart, 2020](https://doi.org/10.1016/j.cognition.2020.104203)).

$$
V(R, D, k) = R \cdot \frac{1}{1+(k \cdot D)^s}
$$

where $R$ is a reward, delivered at a delay $D$. The parameters are:
- $k$ is the normally interpreted as the discount rate. Although technically in this case it is the product of the discount rate and the constant term in Steven's Power Law.
- $s$ is the exponent in Steven's Power Law.

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm, bernoulli, uniform, multivariate_normal, pearsonr
import pymc3 as pm
import math

import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'retina'
plt.rcParams.update({"font.size": 14})
import matplotlib.ticker as ticker
from matplotlib.colors import hsv_to_rgb

# Initialize random number generator
np.random.seed(1234)

import sys

print(f"Python version: {sys.version}")
print(f"PyMC3 version: {pm.__version__}")

# Install Black autoformatter with: pip install nb-black
%load_ext lab_black

## Define options

In [None]:
N_PARAMETERS = 2

# group size of each simulation
N = 40

# sweep along these correlation coefficients
r_vals = np.linspace(-0.8, 0.8, 30)

# should we visualise the data and true (and recovered) discount functions for each simulation?
should_visualise = False

export_group_plots = True

# export options
export = False
out_dir = "output/"

# PyMC3 inference options
sample_options = {
    "tune": 1000,
    "draws": 1000,
    "chains": 2,
    "cores": 2,
    "nuts_kwargs": {"target_accept": 0.95},
}

## Define the high level code - what we want to do
We want to loop through a range of parameter correlations, creating a set of true parameter values with a given correlation coefficient, then inferring those parameters. Then we can plot the actual parameter correlations with the recovered correlations.

In [None]:
def r_sweep(r_vals, N):
    """Run a sweep across provided r values. Each time we generate 
    true parameters with desired correlations, and do parameter recovery"""

    actual = []
    recovered = []

    for i, r in enumerate(r_vals):

        print(f"\n\nGroup number {i} of {len(r_vals)}\n")

        params = generate_true_params(r=r, N=N)

        # visualise the parameters + discount functions for this group
        if export_group_plots:
            fig, ax = plt.subplots(1, 2, figsize=(11, 4))
            plot_params_and_dfs(ax, params)
            fig.suptitle(f"True correlation coefficient = {r}")
            plt.savefig(
                f"output/corr_recovery_group{i}.pdf", bbox_inches="tight", dpi=300
            )

        recovered_params = do_parameter_recovery(params)

        r_recovered, _ = pearsonr(recovered_params[:, 0], recovered_params[:, 1])

        # record the correlation coefficient as the underlying one
        # used to generate the true parameter values (from a multivarate
        # normal), or the actual empirical one obtained from the samples
        # from that distribution?
        r_actual, _ = pearsonr(params[:, 0], params[:, 1])
        # r_actual = r

        actual.append(r_actual)
        recovered.append(r_recovered)

    return (actual, recovered)

## Define the low-level code to achieve this
### Generate true parameters

In [None]:
def generate_true_params(
    r=0.0, logk_mean=np.log(1 / 50), logk_sigma=1.0, logs_mean=0, logs_sigma=0.2, N=20,
):
    """Generate a set of parameter values (logk, logs) from a bivariate normal distribution"""

    cov = logk_sigma * logs_sigma * r
    covariance_matrix = [[logk_sigma ** 2, cov], [cov, logs_sigma ** 2]]

    params = multivariate_normal([logk_mean, logs_mean], covariance_matrix).rvs(N)
    return params

Visualise for a sanity check

In [None]:
r = +0.25
params = generate_true_params(r=r, N=20)

In [None]:
def plot_params_and_dfs(ax, params):
    """ax must be list of 2 axes"""

    # plot true parameters
    ax[0].scatter(params[:, 0], np.exp(params[:, 1]))
    ax[0].set(xlabel=r"$\log(k)$", ylabel=r"$s$", title="params")

    # plot true discount functions
    D = np.linspace(0, 100, 1000)

    for θ in params:
        logk, logs = θ[0], θ[1]
        s, k = np.exp(logs), np.exp(logk)
        y = 1 / (1 + (k * D) ** s)
        ax[1].plot(D, y, "k", lw=3, alpha=0.1)

    ax[1].set(
        title="discount functions", xlabel="delay [sec]", ylabel="discount fraction"
    )

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(11, 4))

plot_params_and_dfs(ax, params)
fig.suptitle(f"True correlation coefficient = {r}")

Note that we are using $\log(s)$ and $\log(k)$, just we are plotting $s$ on the y axis for interpretability.

### Parameter recovery code

In [None]:
def do_parameter_recovery(params):

    N_simulations = params.shape[0]
    recovered_params = np.empty([N_simulations, N_PARAMETERS])

    for i, θ in enumerate(params):

        # get params into a tuple, get this the right way around!
        logs, logk = θ[1], θ[0]
        data_generating_params = (logs, logk)

        # simulate data
        expt_data = simulate_experiment(data_generating_params)
        recovered_params[i, :] = infer_parameters(expt_data)

        if should_visualise:
            visualise(expt_data, data_generating_params, recovered_params[i, :])

    return recovered_params

### Simulate experimental data

In [None]:
def simulate_experiment(params_true, ϵ=0.01):
    """Run a simulated experiment, returning simulated behavioural data"""
    designs = generate_designs()
    responses, _ = generate_responses(designs, params_true, ϵ)
    return pd.concat([designs, responses], axis=1)


def generate_designs():
    """Generate designs (RA, DA, RB, DB). This should precisely match the 
    set of questions we used in the actual experiment."""

    n = 50
    RA_vals = np.array([6, 12, 18, 24, 30, 36, 42, 48, 54, 60])
    DB_vals = np.array([7, 15, 29, 56, 101])

    # define constant values
    DA = np.zeros(n)
    RB = np.full(n, 60)

    # shuffle index for DB
    DB_index = np.arange(len(DB_vals))
    np.random.shuffle(DB_index)

    # fill remaining design dimensions by iterating over DB (shuffled) and RA
    DB = []
    RA = []
    for db_index in DB_index:
        for ra in RA_vals:
            DB.append(DB_vals[db_index])
            RA.append(ra)

    DB = np.array(DB)
    RA = np.array(RA)

    designs = pd.DataFrame({"RA": RA, "DA": DA, "RB": RB, "DB": DB})
    return designs


def generate_responses(designs, params_true, ϵ):
    """Generate simulated responses for the given designs and parameters"""

    # unpack designs
    RA = designs["RA"].values
    DA = designs["DA"].values
    RB = designs["RB"].values
    DB = designs["DB"].values

    # unpack parameters
    logs, logk = params_true

    k = np.exp(logk)
    s = np.exp(logs)

    VA = RA * (1 / (1 + (k * DA) ** s))
    VB = RB * (1 / (1 + (k * DB) ** s))
    decision_variable = VB - VA
    p_choose_B = ϵ + (1 - 2 * ϵ) * (1 / (1 + np.exp(-1.7 * decision_variable)))
    responses = bernoulli.rvs(p_choose_B)
    return pd.DataFrame({"R": responses}), p_choose_B

Example...

In [None]:
simulate_experiment((np.log(1), -2.0)).head()

### Parameter estimation (inference) code

In [None]:
def infer_parameters(data):
    """Infer parameter values based on response data.
    Return the posterior mean parameter estimates"""

    model = generate_model(data)

    # do the inference
    with model:
        trace = pm.sample(**sample_options)

    return np.array([np.mean(trace["logs"]), np.mean(trace["logk"])])


def generate_model(data):
    """Generate a PyMC3 model with the given observed data"""

    # decant data
    R = data["R"].values
    RA, DA = data["RA"].values, data["DA"].values
    RB, DB = data["RB"].values, data["DB"].values

    with pm.Model() as model:
        # define priors
        logk = pm.Normal("logk", mu=np.log(1 / 30), sd=3)
        logs = pm.Normal("logs", mu=0, sd=1)

        VA = pm.Deterministic("VA", value_function(RA, DA, logk, logs))
        VB = pm.Deterministic("VB", value_function(RB, DB, logk, logs))
        P_chooseB = pm.Deterministic("P_chooseB", choice_psychometric(VB - VA))

        R = pm.Bernoulli("R", p=P_chooseB, observed=R)

    return model


# helper functions for the model


def value_function(reward, delay, logk, logs):
    """Calculate the present subjective value of a given prospect"""
    k = pm.math.exp(logk)
    s = pm.math.exp(logs)
    return reward / (1.0 + (k * delay) ** s)


def choice_psychometric(x, ϵ=0.01):
    # x is the decision variable
    return ϵ + (1.0 - 2.0 * ϵ) * (1 / (1 + pm.math.exp(-1.7 * (x))))

## Code to simulate an experiment

In [None]:
def visualise(data, data_generating_params, recovered_params):
    """Visualise the results of a simulated experiment"""
    fig, ax = plt.subplots(figsize=(9, 6))
    plt.scatter(data.DB, data.RA / data.RB, c=data.R)

    D = np.linspace(0, 100, 1000)

    # plot recovered
    logs, logk = recovered_params[0], recovered_params[1]
    s, k = np.exp(logs), np.exp(logk)
    y = 1 / (1 + (k * D) ** s)
    plt.plot(D, y, "r", alpha=0.5, lw=2, label="recovered")

    # plot true
    logs, logk = data_generating_params
    s, k = np.exp(logs), np.exp(logk)
    y = 1 / (1 + (k * D) ** s)
    plt.plot(D, y, "k", lw=2, label="true")

    plt.legend()
    plt.show()

## Visualise before the simulations

In [None]:
for i, r in enumerate(r_vals):

    params = generate_true_params(r=r, N=N)

    fig, ax = plt.subplots(1, 2, figsize=(11, 4))
    plot_params_and_dfs(ax, params)
    fig.suptitle(f"True correlation coefficient = {r}")
    plt.savefig(f"output/corr_recovery_group{i}.pdf", bbox_inches="tight", dpi=300)

## Now run the actual parameter sweep over _r_ values

In [None]:
actual, recovered = r_sweep(r_vals=r_vals, N=N)

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(actual, recovered)
ax.plot([-1, 1], [-1, 1], "k")
ax.set(
    title=f"Recovery of parameter correlations\n(group size = {N})",
    xlabel="actual correlation coefficient",
    ylabel="recovered correlation coefficient",
)

plt.savefig("output/r_recovery.pdf", bbox_inches="tight", dpi=300)

# References
- Vincent, B. T., & Stewart, N. (2020). The case of muddled units in temporal discounting. _Cognition_, 198, 1-11. https://doi.org/10.1016/j.cognition.2020.104203