In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.special import logsumexp
from scipy.stats import norm
import random

# Set some defaults
plt.rc("axes.spines", top=False, right=False)

sns.set_theme(context="paper", font_scale=1.2)
sns.set_style("ticks")

# Import python library/function for function optimization
import scipy.optimize

%config InlineBackend.figure_format = "retina"

## The Plan

### Parameter and model recovery (Can I do both at same time?)
1) Simulate data using best-fit parameters from each model (or samples from within the range of best-fits). For example, 100 simulations of each model within range of best-fit parameters.
2) Re-estimate parameters.
3) Model comparison with AIC/BIC. 
4) Correlate parameters used for simulation with recovered parameters.


In [2]:
# Read in vmr data
df = pd.read_csv("../results/vmr_all.csv")

# Read in csv with MLEs from all models
# Need a function to convert string back to numpy array
def converter(input_str):
    return np.fromstring(input_str[1:-1], sep=' ')

fits = pd.read_csv("../results/params_mle_reducedmodels.csv", converters={"theta":converter})
fits.head()

Unnamed: 0,subj_num,model,theta,loglik,bic,delta_bic
0,1,pea,"[9.34023228, 0.36206566, 1.29827665]",-2650.946357,5323.229696,-209.50344
1,1,premo,"[0.843862233, 16.7663819, 0.01, 25.0, 1.0, 1.0...",-2745.029585,5532.733135,0.0
2,1,rem,"[14.2432723, 3.81197959, 9.99980445, 1.29836274]",-2650.97343,5330.39617,-202.336966
3,1,piece,"[5.95716432, 0.01, 1.01037002]",-2526.204156,5073.745295,-458.98784
4,2,pea,"[3.76251428, 0.49892211, 0.89640008]",-2854.631264,5730.599509,-186.281992


In [3]:
# Lambda functions for computing negative log-likelihoods
nll_pea = lambda x: negloglik(model="pea", sigma_int=x[0], B=x[1], bias=x[2], 
                                   sigma_motor=motor_sd, num_trials=num_trials, 
                                   vis_fb=vis_fb, rotation=rotation, x_hand=x_hand)
nll_premo = lambda x: negloglik(model="premo", B=x[0], sigma_pred=x[1], 
                                     sigma_v=x[2], sigma_p=x[3], eta_p=x[4], bias=x[5], 
                                     sigma_motor=motor_sd, num_trials=num_trials, 
                                     vis_fb=vis_fb, rotation=rotation, 
                                     x_hand=x_hand)
nll_piece = lambda x: negloglik(model="piece", sigma_pert=x[0], sigma_comb=x[1], 
                                     bias = x[2], sigma_motor=motor_sd, num_trials=num_trials, 
                                     vis_fb=vis_fb, rotation=rotation, x_hand=x_hand)
nll_rem = lambda x: negloglik(model="rem", sigma_comb=x[0], s=x[1], c=x[2], bias=x[3],
                                   sigma_motor=motor_sd, num_trials=num_trials, 
                                   vis_fb=vis_fb, rotation=rotation, x_hand=x_hand)

---
## Model recovery analysis (i.e., build confusion matrix)

In [100]:
models = ["piece", "pea", "premo", "rem"]
simulation_params = []
recovered_params = []
model = []
winner = []

# Group by model and stack params into 2d array
params = fits.groupby("model")["theta"].apply(np.stack)
params_piece = params["piece"]
params_pea = params["pea"]
params_premo = params["premo"]
params_rem = params["rem"]

# Set upper and lower bounds on simulated params
piece_lb = params_piece.min(axis=0)
piece_ub = params_piece.max(axis=0)
pea_lb = params_pea.min(axis=0)
pea_ub = params_pea.max(axis=0)
premo_lb = params_premo.min(axis=0)
premo_ub = params_premo.max(axis=0)
rem_lb = params_rem.min(axis=0)
rem_ub = params_rem.max(axis=0)

# Pull out experimental parameters (same for all 16 subjects)
rotation = df.loc[df["SN"] == 1, "rotation"].values
vis_fb = df.loc[df["SN"] == 1, "fbi"].values
num_trials = len(np.unique(df["TN"]))

In [None]:
# Model recovery analysis
for i in range(100):
    print(i)

    # Sample from range of reasonable values
    motor_sd_lb = df["motor_sd"].min()
    motor_sd_ub = df["motor_sd"].max()
    motor_sd = np.random.uniform(low=motor_sd_lb, high=motor_sd_ub)
    
    # Generate parameter values for each model through random sampling
    piece_simparams = np.random.uniform(low=piece_lb, high=piece_ub)
    pea_simparams = np.random.uniform(low=pea_lb, high=pea_ub)
    premo_simparams = np.random.uniform(low=premo_lb, high=premo_ub)
    rem_simparams = np.random.uniform(low=rem_lb, high=rem_ub)

    # Loop through models
    for j in range(len(models)):
        if j == 0:
            # Simulate with best-fit parameters
            # PIECE model
            _, xhat = piece(
                piece_simparams[0], 
                piece_simparams[1], 
                piece_simparams[2], 
                motor_sd, len(rotation), 
                vis_fb, 
                rotation,
                fit=False
            )
            simulated_model = "piece"
        elif j == 1:
            # PEA model
            _, xhat = pea(
                pea_simparams[0], 
                pea_simparams[1], 
                motor_sd, len(rotation), 
                vis_fb, 
                rotation,
                fit=False
            )
            simulated_model = "pea"
        elif j == 2:
            # PReMo 
            _, xhat = premo(
                premo_simparams[0],
                premo_simparams[1],
                premo_simparams[2], 
                premo_simparams[3],
                premo_simparams[4], 
                motor_sd, len(rotation), 
                vis_fb, 
                rotation,
                fit=False
            )
            simulated_model = "premo"
        elif j == 3:
            # REM
            _, xhat = rem(
                rem_simparams[0],
                rem_simparams[1],
                rem_simparams[2], 
                rem_simparams[3], 
                motor_sd, len(rotation), 
                vis_fb, 
                rotation,
                fit=False
            )
            simulated_model = "rem"
        
        # Assign simulated hand position to correct var name
        x_hand = xhat
        
        # Fit simulated data
        # PIECE model params: sigma_pert, sigma_comb, bias
        bounds = ((0.01, 30), (0.01, 25), (-5, 5))
        piece_results = scipy.optimize.minimize(
            fun=nll_piece, 
            bounds=bounds,
            x0=np.array([np.random.uniform(low=bounds[0][0], high=bounds[0][1]),
                         np.random.uniform(low=bounds[1][0], high=bounds[1][1]),
                         np.random.uniform(low=bounds[2][0], high=bounds[2][1])])
        )
        bic_piece = calc_bic(piece_results.fun * -1, len(piece_results.x), len(rotation))
        if simulated_model == "piece":
            recovered_params.append(piece_results.x)
        
        # PEA model params: sigma_comb, B, bias
        bounds = ((0.01, 25), (0, 1), (-5, 5))
        pea_results = scipy.optimize.minimize(
            fun=nll_pea, 
            bounds=bounds,
            x0=np.array([np.random.uniform(low=bounds[0][0], high=bounds[0][1]),
                         np.random.uniform(low=bounds[1][0], high=bounds[1][1]),
                         np.random.uniform(low=bounds[2][0], high=bounds[2][1])])
        )
        bic_pea = calc_bic(pea_results.fun * -1, len(pea_results.x), len(rotation))
        if simulated_model == "pea":
            recovered_params.append(pea_results.x)
        
        # PReMo model params: B, sigma_v, sigma_p, sigma_pred, eta_p, bias
        bounds = ((0, 1), (0.01, 25), (0.01, 25), (0.01, 25), (0, 1), (-5, 5))
        premo_results = scipy.optimize.minimize(
            fun=nll_premo,  
            bounds=bounds,
            x0=np.array([np.random.uniform(low=bounds[0][0], high=bounds[0][1]),
                         np.random.uniform(low=bounds[1][0], high=bounds[1][1]),
                         np.random.uniform(low=bounds[2][0], high=bounds[2][1]),
                         np.random.uniform(low=bounds[3][0], high=bounds[3][1]),
                         np.random.uniform(low=bounds[4][0], high=bounds[4][1]),
                         np.random.uniform(low=bounds[5][0], high=bounds[5][1])])
        )
        bic_premo = calc_bic(premo_results.fun * -1, len(premo_results.x), len(rotation))
        if simulated_model == "premo":
            recovered_params.append(premo_results.x)
        
        # REM model params: sigma_comb, s, c, bias
        bounds = ((0.01, 25), (0, 10), (0, 10), (-5, 5))
        rem_results = scipy.optimize.minimize(
            fun=nll_rem, 
            bounds=bounds,
            x0=np.array([np.random.uniform(low=bounds[0][0], high=bounds[0][1]),
                         np.random.uniform(low=bounds[1][0], high=bounds[1][1]),
                         np.random.uniform(low=bounds[2][0], high=bounds[2][1]),
                         np.random.uniform(low=bounds[3][0], high=bounds[3][1])])
        )
        bic_rem = calc_bic(rem_results.fun * -1, len(rem_results.x), len(rotation))
        if simulated_model == "rem":
            recovered_params.append(rem_results.x)
        
        # Store winning model
        winner.append(models[np.argmin([bic_piece, bic_pea, bic_premo, bic_rem])])

        # Update variables
        simulation_params.append(simulation_params)
        model.append(simulated_model)

df_model = pd.DataFrame({
    "model":model, 
    "winner":winner, 
    "simulation_params": simulation_params,
    "recovered_params": recovered_params
})

In [None]:
from pandas.api.types import CategoricalDtype

cat_dtype = CategoricalDtype(
    categories=["piece", "premo", "pea", "rem"], ordered=True)

df_model["model"] = df_model["model"].astype(cat_dtype)
df_model["model_codes"] = df_model["model"].cat.codes
df_model["winner"] = df_model["winner"].astype(cat_dtype)
df_model["winner_codes"] = df_model["winner"].cat.codes

In [None]:
y_actu = ["piece", "premo", "pea", "rem"]
y_pred = ["piece", "premo", "pea", "pea"]

labels = ["piece", "premo", "pea", "rem"]
labels_dict = dict(zip(range(len(np.unique(labels))), np.unique(labels)))
labels_dict

In [None]:
K = len(np.unique(df_model["model"])) # Number of classes 
confusion_mat = np.zeros((K, K))

for i in range(len(df_model["model"])):
    confusion_mat[df_model.loc[i, "model_codes"]][df_model.loc[i, "winner_codes"]] += 1

print(confusion_mat)

In [None]:
labels = ["piece", "premo", "pea", "rem"]
test = dict(zip(range(len(np.unique(labels))), np.unique(labels)))
test

In [None]:
def compute_confusion_matrix(true, pred):
  '''Computes a confusion matrix using numpy for two np.arrays
  true and pred.

  Results are identical (and similar in computation time) to: 
    "from sklearn.metrics import confusion_matrix"

  However, this function avoids the dependency on sklearn.'''

  K = len(np.unique(true)) # Number of classes 
  result = np.zeros((K, K))

  for i in range(len(true)):
    result[true[i]][pred[i]] += 1

  return result

compute_confusion_matrix(y_actu, y_pred)