### Linear model
- Miller et al. 2021

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view
import matplotlib.pyplot as plt
from neuropy import plotting
from scipy import stats


basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig = plotting.Fig(6, 3, size=(12, 5), num=1)

npast = 10
params_pooled = []
task_type_bool = []

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy()
    choices[choices == 2] = -1
    outcomes = data_df["reward"].to_numpy()
    outcomes[outcomes == 0] = -1
    n_trials = choices.size

    past_choices = sliding_window_view(choices, npast)[:-1, :]
    past_outcomes = sliding_window_view(outcomes, npast)[:-1, :]
    actual_choices = choices[npast:]

    x = np.hstack(
        (
            past_choices * past_outcomes,
            past_choices,
            past_outcomes,
        )
    )
    clf = LogisticRegression(random_state=0).fit(x, actual_choices)

    params = np.fliplr(clf.coef_.squeeze().reshape(3, npast))
    params_pooled.append(params)

    subfig = fig.add_subfigure(fig.gs[i])
    subfig.suptitle(f"{files[i].name[:-4]}, {task_type}")
    sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

    colors = ["orange", "purple", "blue"]
    titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
    for _, ax in enumerate(sub_axs):

        ax.plot(np.arange(1, 11), params[_], ".-", color=colors[_], zorder=1)
        ax.set_title(titles[_])
        ax.axhline(0, color="gray", zorder=0, lw=0.8)
        ax.set_xticks([1, 5, 10])

    if i == 0:
        sub_axs[0].set_xlabel("Trials in the past")
        sub_axs[0].set_ylabel("Influence on current choice")

task_type_bool = np.array(task_type_bool)
params_pooled = np.array(params_pooled)
mean_struc = params_pooled[task_type_bool < 0.2, :, :].mean(axis=0)
mean_unstruc = params_pooled[task_type_bool > 0.2, :, :].mean(axis=0)

subfig = fig.add_subfigure(fig.gs[4:, 0:2])
subfig.suptitle(f"Mean across animals by task type")
sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

# colors = ["orange", "purple", "blue"]
colors = ["#5040BF", "#AFBF40"]


titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
for _, ax in enumerate(sub_axs):

    ax.plot(np.arange(1, 11), mean_struc[_], ".-", color=colors[0], alpha=0.7, zorder=1)
    ax.plot(
        np.arange(1, 11), mean_unstruc[_], ".-", color=colors[1], alpha=0.7, zorder=1
    )
    ax.legend(["Struc", "Unstruc"])
    ax.set_title(titles[_])
    ax.axhline(0, color="gray", zorder=0, lw=0.8)
    ax.set_xticks([1, 5, 10])

### Cognitive model

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view
import matplotlib.pyplot as plt
from neuropy import plotting
from scipy import stats


basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig = plotting.Fig(6, 3, size=(12, 5), num=1)

npast = 10
params_pooled = []
task_type_bool = []

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy()
    choices[choices == 2] = -1
    outcomes = data_df["reward"].to_numpy()
    outcomes[outcomes == 0] = -1
    n_trials = choices.size

    past_choices = sliding_window_view(choices, npast)[:-1, :]
    past_outcomes = sliding_window_view(outcomes, npast)[:-1, :]
    actual_choices = choices[npast:]

    x = np.hstack(
        (
            past_choices * past_outcomes,
            past_choices,
            past_outcomes,
        )
    )
    clf = LogisticRegression(random_state=0).fit(x, actual_choices)

    params = np.fliplr(clf.coef_.squeeze().reshape(3, npast))
    params_pooled.append(params)

    subfig = fig.add_subfigure(fig.gs[i])
    subfig.suptitle(f"{files[i].name[:-4]}, {task_type}")
    sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

    colors = ["orange", "purple", "blue"]
    titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
    for _, ax in enumerate(sub_axs):

        ax.plot(np.arange(1, 11), params[_], ".-", color=colors[_], zorder=1)
        ax.set_title(titles[_])
        ax.axhline(0, color="gray", zorder=0, lw=0.8)
        ax.set_xticks([1, 5, 10])

    if i == 0:
        sub_axs[0].set_xlabel("Trials in the past")
        sub_axs[0].set_ylabel("Influence on current choice")

task_type_bool = np.array(task_type_bool)
params_pooled = np.array(params_pooled)
mean_struc = params_pooled[task_type_bool < 0.2, :, :].mean(axis=0)
mean_unstruc = params_pooled[task_type_bool > 0.2, :, :].mean(axis=0)

subfig = fig.add_subfigure(fig.gs[4:, 0:2])
subfig.suptitle(f"Mean across animals by task type")
sub_axs = subfig.subplots(1, 3, width_ratios=[1, 1, 1], sharey=True, sharex=True)

colors = ["orange", "purple", "blue"]
titles = ["Reward Seeking", "Choice Preservation", "Main effect of Outcome"]
for _, ax in enumerate(sub_axs):

    ax.plot(np.arange(1, 11), mean_struc[_], ".-", color=colors[_], zorder=1)
    ax.plot(
        np.arange(1, 11), mean_unstruc[_], ".-", color=colors[_], alpha=0.5, zorder=1
    )
    ax.set_title(titles[_])
    ax.axhline(0, color="gray", zorder=0, lw=0.8)
    ax.set_xticks([1, 5, 10])

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize

# Simulated Example Data (Each row: [choice, reward])
# data = np.array([[0, 1], [1, 0], [1, 1], [0, 0], [0, 1], [1, 0], [1, 1], [0, 0]])

# choices = data[:, 0]  # 0 or 1 (action taken)
# rewards = data[:, 1]  # 0 or 1 (reward received)


basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig = plotting.Fig(6, 3, size=(12, 5), num=1)

npast = 10
params_pooled = []
task_type_bool = []

for i, file in enumerate(files[:1]):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"
    task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy().astype(int)
    choices[choices == 2] = 0
    rewards = data_df["reward"].to_numpy().astype(int)
    # rewards[rewards == 0] = -1
    n_trials = choices.size

    # Q-learning function with given alpha
    def compute_q_values(alpha):
        Q = np.zeros(2)  # Initialize Q-values for two actions
        q_values = []

        for choice, reward in zip(choices, rewards):
            Q[choice] = Q[choice] + alpha * (reward - Q[choice])
            q_values.append(Q.copy())

        return np.array(q_values)

    # Loss function to optimize alpha (maximize log-likelihood)
    def log_likelihood(alpha):
        Q_values = compute_q_values(alpha)
        X = (Q_values[:, 0] - Q_values[:, 1]).reshape(-1, 1)  # Difference in Q-values

        model = LogisticRegression()
        model.fit(X, choices)  # Fit logistic regression on choice data

        # Compute log-likelihood
        probs = model.predict_proba(X)[:, 1]  # Probability of choosing action 1
        ll = np.sum(choices * np.log(probs) + (1 - choices) * np.log(1 - probs))
        return -ll  # Negative for minimization

    # Optimize alpha using a bounded method
    result = minimize(log_likelihood, x0=0.5, bounds=[(0, 1)], method="L-BFGS-B")

    alpha_estimated = result.x[0]
    print(f"Estimated alpha: {alpha_estimated:.4f}")

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize
from pathlib import Path
import pandas as pd
from scipy import stats
from pybads import BADS

basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

# fig = plotting.Fig(6, 3, size=(12, 5), num=1)
estimated_params = []
task_type = []

for i, file in enumerate(files[3:]):
    data_df = pd.read_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type.append("unstructured" if prob_corr < 0.2 else "structured")
    # task_type_bool.append(prob_corr)

    choices = data_df["port"].to_numpy().astype(int)
    choices[choices == 2] = 0
    rewards = data_df["reward"].to_numpy().astype(int)
    session_id = data_df["session#"].to_numpy()
    session_starts = np.diff(session_id, prepend=session_id[0])
    # rewards[rewards == 0] = -1
    n_trials = choices.size

    # Q-learning function with different learning rates for left and right
    # def compute_q_values(alpha_L, alpha_R):
    #     Q = np.zeros(2)  # Q-values: Q[0] for Left, Q[1] for Right
    #     q_values = []

    #     for choice, reward in zip(choices, rewards):
    #         if choice == 0:
    #             Q[0] += alpha_L * (reward - Q[0])
    #         else:
    #             Q[1] += alpha_R * (reward - Q[1])
    #         q_values.append(Q.copy())

    #     return np.array(q_values)

    # def compute_q_values(alpha_c, alpha_u):
    #     print(alpha_c, alpha_u)
    #     Q = np.zeros(2)  # Q-values: Q[0] for Left, Q[1] for Right
    #     q_values = []
    #     q_diff = []

    #     for choice, reward, start in zip(choices, rewards, session_starts):
    #         # If Left (0) is chosen, Right (1) is unchosen, and vice versa
    #         unchosen = 1 - choice

    #         # Update Q-values for chosen and unchosen arms
    #         if start > 0:
    #             Q[choice] += 0
    #             Q[unchosen] += 0
    #         else:
    #             # Chosen action update
    #             a_ = np.around(alpha_c * (reward - Q[choice]), 4)
    #             b_ = np.around(alpha_u * (reward - Q[choice]), 4)

    #             # if b_ < -100.0:
    #             #     break
    #             # print(a_, b_)
    #             Q[choice] += a_
    #             Q[unchosen] += b_

    #         q_values.append(Q.copy())
    #         q_diff.append(Q[choice] - Q[unchosen])

    #     # print(np.array(q_values).shape)
    #     return np.array(q_values), np.array(q_diff)

    def compute_q_values(alpha_c, alpha_u):
        Q = np.zeros(2)
        q_values = []

        for choice, reward, start in zip(choices, rewards, session_starts):
            if start > 0:
                Q[:] = 0.0  # Reset Q-values at session start

            unchosen = 1 - choice

            # Q-learning update
            Q[choice] += alpha_c * (reward - Q[choice])
            # Q[unchosen] += alpha_u * ((1 - reward) - Q[unchosen])
            Q[unchosen] += alpha_u * (Q[choice] - reward)

            q_values.append(Q.copy())

        return np.array(q_values)

    # Log-likelihood function to optimize alpha_L and alpha_R
    def log_likelihood(params):
        alpha_c, alpha_u, beta = params
        Q_values = compute_q_values(alpha_c, alpha_u)
        # Compute softmax probabilities
        betaQ = beta * Q_values
        betaQ = np.clip(betaQ, -500, 500)  # Prevent overflow
        exp_Q = np.exp(betaQ)
        probs = exp_Q / np.sum(exp_Q, axis=1, keepdims=True)
        print(probs.shape)
        # Get the probability of the chosen action
        chosen_probs = probs[np.arange(len(choices)), choices]

        # Numerical stability
        eps = 1e-9
        chosen_probs = np.clip(chosen_probs, eps, 1 - eps)

        # Log-likelihood
        ll = np.sum(np.log(chosen_probs))
        return -ll  # For minimization

    # Optimize alpha_L and alpha_R using a bounded method
    # result = minimize(
    #     log_likelihood,
    #     x0=[0.63, 0.32, 1.2],
    #     bounds=[(0, 1), (-0.3, 1), (1, 10)],
    #     method="L-BFGS-B",
    #     # method="BFGS",
    # )
    bads = BADS(
        log_likelihood,
        x0=np.array([0.3, 0.2, 1.2]),
        lower_bounds=np.array([-0.9, -0.9, 1]),
        upper_bounds=np.array([0.9, 0.9, 10]),
        plausible_lower_bounds=np.array([0, -0.5, 3]),
        plausible_upper_bounds=np.array([0.5, 0.1, 5]),
    )
    result = bads.optimize()

    estimated_params.append(result.x)
    alpha_L_est, alpha_R_est, beta = result.x
    print(
        f"Chosen alpha: {alpha_L_est:.4f}, Unchosen alpha: {alpha_R_est:.4f}, Estimated: beta: {beta}"
    )

In [None]:
choices.size, session_starts.size

In [None]:
session_starts[session_starts != 0]

In [None]:
a = np.array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3])

np.diff(a, prepend=a[0])

In [None]:
a, b = 0, 0

In [None]:
result

In [None]:
from neuropy import plotting

fig = plotting.Fig(1, 2, size=(4, 3), num=1)
estimated_params = np.array(estimated_params)
ax1 = fig.subplot(fig.gs[0])
ax2 = fig.subplot(fig.gs[1])

for i in range(estimated_params.shape[0]):
    if task_type[i] == "structured":
        color = "#5040BF"
    else:
        color = "#AFBF40"

    x1 = np.array([1, 2]) + 0.1 * np.random.randn(2)

    ax1.plot(x1, estimated_params[i, :2], ".", color=color, alpha=0.6)
    ax1.set_xlim(0, 3)
    ax2.plot(
        1 + 0.1 * np.random.randn(1),
        estimated_params[i, 2],
        ".",
        color=color,
        alpha=0.6,
    )
    ax2.set_xlim(0, 2)

# ax1.legend(["struc", "unstruc"])
ax1.set_xticks([1, 2], ["Alpha_L", "Alpha_R"])
ax2.set_xticks([1], ["Beta"])
ax1.set_ylabel("Estimated alpha values")
ax2.set_ylabel("Estimated beta values")
fig.fig.suptitle("Q-learning in two-armed bandit task")

In [None]:
from pybads import BADS
import numpy as np


def noisy_sphere(x, sigma=1.0):
    """Simple quadratic function with added noise."""
    x_2d = np.atleast_2d(x)
    f = np.sum(x_2d**2, axis=1)
    noise = sigma * np.random.normal(size=x_2d.shape[0])
    return f + noise


x0 = np.array([-3, -3])
# Starting point
lower_bounds = np.array([-5, -5])
upper_bounds = np.array([5, 5])
plausible_lower_bounds = np.array([-2, -2])
plausible_upper_bounds = np.array([2, 2])

options = {
    "uncertainty_handling": True,
    "max_fun_evals": 300,
    "noise_final_samples": 100,
}
bads = BADS(
    noisy_sphere,
    x0,
    lower_bounds,
    upper_bounds,
    plausible_lower_bounds,
    plausible_upper_bounds,
    options=options,
)
optimize_result = bads.optimize()

In [None]:
val = 2
exec("D" + "=val")

In [None]:
def f(x):
    evaluation_parameters = {"D": 2}
    for key, val in evaluation_parameters.items():
        exec(key + "=val")
    m = D / 2

    return m

In [None]:
f(3)

In [None]:
a = {"M": 3}
for k, val in a.items():
    print(k, val)
    exec(k + "=val")
print(M)

In [None]:
val = 2
exec("D=val")

print(D)