### Linear model
- Miller et al. 2021

In [177]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view
import matplotlib.pyplot as plt
from neuropy import plotting
from scipy import stats


basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig = plotting.Fig(6, 3, size=(12, 5), num=1)

npast = 10
params_pooled = []
task_type_bool = []

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy()
    choices[choices == 2] = -1
    outcomes = data_df["reward"].to_numpy()
    outcomes[outcomes == 0] = -1
    n_trials = choices.size

    past_choices = sliding_window_view(choices, npast)[:-1, :]
    past_outcomes = sliding_window_view(outcomes, npast)[:-1, :]
    actual_choices = choices[npast:]

    x = np.hstack(
        (
            past_choices * past_outcomes,
            past_choices,
            past_outcomes,
        )
    )
    clf = LogisticRegression(random_state=0).fit(x, actual_choices)

    params = np.fliplr(clf.coef_.squeeze().reshape(3, npast))
    params_pooled.append(params)

    subfig = fig.add_subfigure(fig.gs[i])
    subfig.suptitle(f"{files[i].name[:-4]}, {task_type}")
    sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

    colors = ["orange", "purple", "blue"]
    titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
    for _, ax in enumerate(sub_axs):

        ax.plot(np.arange(1, 11), params[_], ".-", color=colors[_], zorder=1)
        ax.set_title(titles[_])
        ax.axhline(0, color="gray", zorder=0, lw=0.8)
        ax.set_xticks([1, 5, 10])

    if i == 0:
        sub_axs[0].set_xlabel("Trials in the past")
        sub_axs[0].set_ylabel("Influence on current choice")

task_type_bool = np.array(task_type_bool)
params_pooled = np.array(params_pooled)
mean_struc = params_pooled[task_type_bool < 0.2, :, :].mean(axis=0)
mean_unstruc = params_pooled[task_type_bool > 0.2, :, :].mean(axis=0)

subfig = fig.add_subfigure(fig.gs[4:, 0:2])
subfig.suptitle(f"Mean across animals by task type")
sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

# colors = ["orange", "purple", "blue"]
colors = ["#5040BF", "#AFBF40"]


titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
for _, ax in enumerate(sub_axs):

    ax.plot(np.arange(1, 11), mean_struc[_], ".-", color=colors[0], alpha=0.7, zorder=1)
    ax.plot(
        np.arange(1, 11), mean_unstruc[_], ".-", color=colors[1], alpha=0.7, zorder=1
    )
    ax.legend(["Struc", "Unstruc"])
    ax.set_title(titles[_])
    ax.axhline(0, color="gray", zorder=0, lw=0.8)
    ax.set_xticks([1, 5, 10])

### Cognitive model

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view
import matplotlib.pyplot as plt
from neuropy import plotting
from scipy import stats


basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig = plotting.Fig(6, 3, size=(12, 5), num=1)

npast = 10
params_pooled = []
task_type_bool = []

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy()
    choices[choices == 2] = -1
    outcomes = data_df["reward"].to_numpy()
    outcomes[outcomes == 0] = -1
    n_trials = choices.size

    past_choices = sliding_window_view(choices, npast)[:-1, :]
    past_outcomes = sliding_window_view(outcomes, npast)[:-1, :]
    actual_choices = choices[npast:]

    x = np.hstack(
        (
            past_choices * past_outcomes,
            past_choices,
            past_outcomes,
        )
    )
    clf = LogisticRegression(random_state=0).fit(x, actual_choices)

    params = np.fliplr(clf.coef_.squeeze().reshape(3, npast))
    params_pooled.append(params)

    subfig = fig.add_subfigure(fig.gs[i])
    subfig.suptitle(f"{files[i].name[:-4]}, {task_type}")
    sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

    colors = ["orange", "purple", "blue"]
    titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
    for _, ax in enumerate(sub_axs):

        ax.plot(np.arange(1, 11), params[_], ".-", color=colors[_], zorder=1)
        ax.set_title(titles[_])
        ax.axhline(0, color="gray", zorder=0, lw=0.8)
        ax.set_xticks([1, 5, 10])

    if i == 0:
        sub_axs[0].set_xlabel("Trials in the past")
        sub_axs[0].set_ylabel("Influence on current choice")

task_type_bool = np.array(task_type_bool)
params_pooled = np.array(params_pooled)
mean_struc = params_pooled[task_type_bool < 0.2, :, :].mean(axis=0)
mean_unstruc = params_pooled[task_type_bool > 0.2, :, :].mean(axis=0)

subfig = fig.add_subfigure(fig.gs[4:, 0:2])
subfig.suptitle(f"Mean across animals by task type")
sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

colors = ["orange", "purple", "blue"]
titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
for _, ax in enumerate(sub_axs):

    ax.plot(np.arange(1, 11), mean_struc[_], ".-", color=colors[_], zorder=1)
    ax.plot(
        np.arange(1, 11), mean_unstruc[_], ".-", color=colors[_], alpha=0.5, zorder=1
    )
    ax.set_title(titles[_])
    ax.axhline(0, color="gray", zorder=0, lw=0.8)
    ax.set_xticks([1, 5, 10])

In [131]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize

# Simulated Example Data (Each row: [choice, reward])
# data = np.array([[0, 1], [1, 0], [1, 1], [0, 0], [0, 1], [1, 0], [1, 1], [0, 0]])

# choices = data[:, 0]  # 0 or 1 (action taken)
# rewards = data[:, 1]  # 0 or 1 (reward received)


basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig = plotting.Fig(6, 3, size=(12, 5), num=1)

npast = 10
params_pooled = []
task_type_bool = []

for i, file in enumerate(files[:1]):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy().astype(int)
    choices[choices == 2] = 0
    rewards = data_df["reward"].to_numpy().astype(int)
    # rewards[rewards == 0] = -1
    n_trials = choices.size

    # Q-learning function with given alpha
    def compute_q_values(alpha):
        Q = np.zeros(2)  # Initialize Q-values for two actions
        q_values = []

        for choice, reward in zip(choices, rewards):
            Q[choice] = Q[choice] + alpha * (reward - Q[choice])
            q_values.append(Q.copy())

        return np.array(q_values)

    # Loss function to optimize alpha (maximize log-likelihood)
    def log_likelihood(alpha):
        Q_values = compute_q_values(alpha)
        X = (Q_values[:, 0] - Q_values[:, 1]).reshape(-1, 1)  # Difference in Q-values

        model = LogisticRegression()
        model.fit(X, choices)  # Fit logistic regression on choice data

        # Compute log-likelihood
        probs = model.predict_proba(X)[:, 1]  # Probability of choosing action 1
        ll = np.sum(choices * np.log(probs) + (1 - choices) * np.log(1 - probs))
        return -ll  # Negative for minimization

    # Optimize alpha using a bounded method
    result = minimize(log_likelihood, x0=0.5, bounds=[(0, 1)], method="L-BFGS-B")

    alpha_estimated = result.x[0]
    print(f"Estimated alpha: {alpha_estimated:.4f}")

  fig = plt.figure(num=num, figsize=(8.5, 11), clear=True)
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[choice] = Q[choice] + alpha * (reward - Q[choice])
  Q[ch

Estimated alpha: 0.1180


In [140]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize

basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

# fig = plotting.Fig(6, 3, size=(12, 5), num=1)
estimated_params = []
task_type = []

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type.append("unstructured" if prob_corr < 0.2 else "structured")
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy().astype(int)
    choices[choices == 2] = 0
    rewards = data_df["reward"].to_numpy().astype(int)
    # rewards[rewards == 0] = -1
    n_trials = choices.size

    # Q-learning function with different learning rates for left and right
    def compute_q_values(alpha_L, alpha_R):
        Q = np.zeros(2)  # Q-values: Q[0] for Left, Q[1] for Right
        q_values = []

        for choice, reward in zip(choices, rewards):
            if choice == 0:
                Q[0] += alpha_L * (reward - Q[0])
            else:
                Q[1] += alpha_R * (reward - Q[1])
            q_values.append(Q.copy())

        return np.array(q_values)

    # Log-likelihood function to optimize alpha_L and alpha_R
    def log_likelihood(params):
        alpha_L, alpha_R, beta = params
        Q_values = compute_q_values(alpha_L, alpha_R)

        # Predictor for logistic regression: difference in Q-values (Q_right - Q_left)
        # X = (Q_values[:, 1] - Q_values[:, 0]).reshape(-1, 1)
        # model = LogisticRegression()
        # model.fit(X, choices)  # Fit logistic regression on choice data
        # probs = model.predict_proba(X)[:, 1]  # Probability of choosing right (1)

        Q_diff = Q_values[:, 1] - Q_values[:, 0]
        probs = 1 / (1 + np.exp(-beta * Q_diff))  # Softmax choice probability

        # Compute log-likelihood
        ll = np.sum(choices * np.log(probs) + (1 - choices) * np.log(1 - probs))
        return -ll  # Negative for minimization

    # Optimize alpha_L and alpha_R using a bounded method
    result = minimize(
        log_likelihood,
        x0=[0.5, 0.5, 1],
        bounds=[(0, 1), (0, 1), (0, 10)],
        method="L-BFGS-B",
    )

    estimated_params.append(result.x)
    alpha_L_est, alpha_R_est, beta = result.x
    print(
        f"Estimated alpha_L: {alpha_L_est:.4f}, Estimated alpha_R: {alpha_R_est:.4f}, Estimated: beta: {beta}"
    )

Estimated alpha_L: 0.0921, Estimated alpha_R: 0.1707, Estimated: beta: 5.483222442460307
Estimated alpha_L: 0.1736, Estimated alpha_R: 0.9315, Estimated: beta: 2.796233073343137
Estimated alpha_L: 0.1640, Estimated alpha_R: 0.3838, Estimated: beta: 3.178154749857273
Estimated alpha_L: 0.0547, Estimated alpha_R: 0.0645, Estimated: beta: 8.093633697850429
Estimated alpha_L: 0.2819, Estimated alpha_R: 0.0004, Estimated: beta: 10.0
Estimated alpha_L: 0.1663, Estimated alpha_R: 0.1569, Estimated: beta: 7.502534759056712
Estimated alpha_L: 0.1316, Estimated alpha_R: 0.0834, Estimated: beta: 5.057157500537784
Estimated alpha_L: 0.0745, Estimated alpha_R: 0.1509, Estimated: beta: 7.607211627021995
Estimated alpha_L: 0.2599, Estimated alpha_R: 0.0607, Estimated: beta: 4.530078434819735
Estimated alpha_L: 0.2299, Estimated alpha_R: 0.1507, Estimated: beta: 4.972490685682155
Estimated alpha_L: 0.1678, Estimated alpha_R: 0.1235, Estimated: beta: 4.498972310981454
Estimated alpha_L: 0.2979, Estimat

In [173]:
fig = plotting.Fig(1, 2, size=(4, 3), num=1)
estimated_params = np.array(estimated_params)
ax1 = fig.subplot(fig.gs[0])
ax2 = fig.subplot(fig.gs[1])

for i in range(estimated_params.shape[0]):
    if task_type[i] == "structured":
        color = "#5040BF"
    else:
        color = "#AFBF40"

    x1 = np.array([1, 2]) + 0.1 * np.random.randn(2)

    ax1.plot(x1, estimated_params[i, :2], ".", color=color, alpha=0.6)
    ax1.set_xlim(0, 3)
    ax2.plot(
        1 + 0.1 * np.random.randn(1),
        estimated_params[i, 2],
        ".",
        color=color,
        alpha=0.6,
    )
    ax2.set_xlim(0, 2)

# ax1.legend(["struc", "unstruc"])
ax1.set_xticks([1, 2], ["Alpha_L", "Alpha_R"])
ax2.set_xticks([1], ["Beta"])
ax1.set_ylabel("Estimated alpha values")
ax2.set_ylabel("Estimated beta values")
fig.fig.suptitle("Q-learning in two-armed bandit task")

  fig = plt.figure(num=num, figsize=(8.5, 11), clear=True)


Text(0.5, 0.98, 'Q-learning in two-armed bandit task')