In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from scipy.ndimage import gaussian_filter1d
from pathlib import Path
from sklearn.linear_model import LogisticRegression
import bandit_utils
from neuropy import plotting
import matplotlib as mpl

basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

### Trials and reward raw data plot

In [None]:
fig, axs = plt.subplots(3, 1, sharey=True)
axs = axs.reshape(-1)

for i, file in enumerate(files[:3]):
    data_df = pd.read_csv(basepath / file)

    choice_arr, is_choice_high, session_id, is_reward, rwd_corr, dt_time = (
        bandit_utils.from_csv(data_df)
    )

    good_trials = dt_time > (dt_time[0] + pd.Timedelta(days=10))
    choice_arr = choice_arr[good_trials, :]
    dt_time = dt_time[good_trials]
    is_choice_high = is_choice_high[good_trials]
    session_id = session_id[good_trials]
    is_reward = is_reward[good_trials].astype(bool)
    # is_reward_bool = is_reward.astype(bool)
    trials = np.arange(choice_arr.shape[0])
    port1_high = choice_arr[:, 0] > choice_arr[:, 1]
    session_start_indx = np.unique(session_id, return_index=True)[1]

    axs[i].scatter(
        trials[is_reward],
        choice_arr[is_reward, 3],
        color="k",
        s=20,
        alpha=0.5,
        edgecolors="w",
    )
    axs[i].vlines(session_start_indx, 0, 3, color="k", linestyle="--")
    axs[i].scatter(trials[~is_reward], choice_arr[~is_reward, 3], color="k", s=2)
    axs[i].fill_between(trials, 0, 3, where=port1_high, color="r", alpha=0.3, ec=None)
    axs[i].set_ylim(0, 3)

In [None]:
data_df["datetime"][0].day

### Assess learning within sessions and compare early to late
- Also based on this, find a systematic way to choose good sessions.

In [30]:
plt.close()
fig = plotting.Fig(4, 3, size=(13, 6))
cmap = mpl.cm.get_cmap("Reds")

for i, file in enumerate(files):
    data_df = bandit_utils.from_csv(basepath / file)
    prob_corr = np.abs(
        stats.pearsonr(data_df["rewprobfull1"], data_df["rewprobfull2"])[0]
    )

    task_type = "unstructured" if prob_corr < 0.2 else "structured"

    ntrials_by_session, mean_ntrials, std_ntrials = bandit_utils.get_trial_metrics(
        data_df
    )

    sess_div_perf_arr = bandit_utils.get_performance_2ab(
        data_df, min_trials_per_sess=20, roll_window=30, smooth=5
    )

    sess_div_perf_high_diff_arr = bandit_utils.get_performance_2ab(
        data_df, min_trials_per_sess=20, roll_window=30, smooth=5, delta_prob=40
    )

    port_bias, centers = bandit_utils.get_port_bias_2ab(data_df)
    nan_idx = np.isnan(port_bias)

    x_cuttoff = mean_ntrials + 2 * std_ntrials
    # x_cutoff_bool = trial_x < x_cuttoff

    subfig = fig.add_subfigure(fig.gs[i])
    subfig.suptitle(f"{files[i].name[:-4]}, {task_type}")
    sub_axs = subfig.subplots(1, 4, width_ratios=[1, 2, 2, 2])

    # ===== trials histogram =======
    n_trials_hist, trial_edges = np.histogram(
        ntrials_by_session, bins=range(0, 1500, 10)
    )
    sub_axs[0].stairs(
        values=n_trials_hist, edges=trial_edges, fill=True, color="#9E9E9E", alpha=0.7
    )
    sub_axs[0].axvline(mean_ntrials, color="k", ls="--")
    # sub_axs[0].axvline(x_cuttoff, color="r", ls="--")
    sub_axs[0].set_xlim(0, x_cuttoff + 10)
    sub_axs[0].set_title("Trials histogram")

    # ==== All sessions performance ======
    # sub_axs[1].plot(trial_x[x_cutoff_bool], prob_correct_per_trial[x_cutoff_bool], "g")

    perf_arrays = [sess_div_perf_arr, sess_div_perf_high_diff_arr]
    perf_titles = [
        "Performance\n(All sessions)",
        "Performance\n" r"($\Delta$P$\geq$40)",
    ]

    for arr_i, arr in enumerate(perf_arrays):
        colors = [cmap(_) for _ in np.linspace(0.2, 0.8, arr.shape[0])]
        n_trials = arr.shape[1]

        for c in range(len(colors)):
            sub_axs[arr_i + 1].plot(
                np.arange(n_trials), arr[c], color=colors[c], lw=0.7
            )
        sub_axs[arr_i + 1].set_xlim(0, x_cuttoff)
        sub_axs[arr_i + 1].set_ylim(0, 1.2)
        sub_axs[arr_i + 1].set_yticks([0, 0.5, 1])
        sub_axs[arr_i + 1].set_title(perf_titles[arr_i])

    # ====== port bias=========

    sub_axs[3].plot(centers[~nan_idx], port_bias[~nan_idx], color="g")
    sub_axs[3].spines["bottom"].set_position("zero")
    sub_axs[3].spines["left"].set_position("zero")
    sub_axs[3].set_ylim(-1, 1)
    sub_axs[3].set_xticks([-80, 80])
    sub_axs[3].set_yticks([-1, 1])
    sub_axs[3].set_title("Port bias")

    if i == 0:
        sub_axs[0].set_xlabel("nTrials")
        sub_axs[0].set_ylabel("Counts")
        sub_axs[1].set_xlabel("Trials")
        sub_axs[1].set_ylabel("Choice P(high)")
        sub_axs[2].set_xlabel("Trials")
        sub_axs[2].set_ylabel("Choice P(high)")
        sub_axs[3].set_xlabel("Reward P(A)-P(B)")
        sub_axs[3].set_ylabel("Choice P(A)")

figpath = Path(
    "C:/Users/asheshlab/OneDrive/academia/analyses/adlab/anirudh_data/figures/fig1"
)
fig.savefig(figpath)

  cmap = mpl.cm.get_cmap("Reds")


In [None]:
_, ax = plt.subplots()
ax.plot(centers, bias, ".")

In [67]:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4, 6, 3, 1, 2]})
df.rolling(window=4, min_periods=2, closed="right").sum()[3::4]

Unnamed: 0,B
3,3.0
7,14.0


In [49]:
df

Unnamed: 0,B
0,0.0
1,1.0
2,2.0
3,
4,4.0
5,6.0
6,3.0
7,1.0
8,2.0


### High port probability with respect to start of session

In [None]:
fig, axs = plt.subplots(4, 3, sharey=True)
axs = axs.reshape(-1)

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    choice_arr, is_choice_high, session_id, is_reward, rwd_corr, dt_time = get_params(
        data_df
    )

    good_trials = dt_time > (dt_time[0] + pd.Timedelta(days=10))
    choice_arr = choice_arr[good_trials, :]
    dt_time = dt_time[good_trials]
    is_choice_high = is_choice_high[good_trials]
    session_id = session_id[good_trials]
    is_reward = is_reward[good_trials].astype(bool)
    # is_reward_bool = is_reward.astype(bool)
    trials = np.arange(choice_arr.shape[0])
    port1_high = choice_arr[:, 0] > choice_arr[:, 1]
    session_start_indx = np.unique(session_id, return_index=True)[1]

    choice_around_session = np.vstack(
        [is_choice_high[session_start_indx[:-20] - d] for d in np.arange(-10, 20)]
    ).T

    prob_high_choice = np.mean(choice_around_session, axis=0)
    x = np.arange(-10, 20)

    axs[i].plot(x, prob_high_choice)
    axs[i].set_ylim(0, 1)
    axs[i].set_xticks([-10, 0, 10, 20])

In [None]:
np.arange(-10, 20).size

In [None]:
fig, axs = plt.subplots(3, 4, sharey=True)
axs = axs.reshape(-1)

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    choice_arr, is_choice_high, session_id, *_, rwd_corr, dt_time = get_params(data_df)

    good_trials = dt_time > (dt_time[0] + pd.Timedelta(days=10))
    choice_arr = choice_arr[good_trials, :]
    dt_time = dt_time[good_trials]
    is_choice_high = is_choice_high[good_trials]
    session_id = session_id[good_trials]

    session_id_unq, n_trials = np.unique(session_id, return_counts=True)

    ax = axs[i]
    high_trial_id = session_id_unq[np.where(n_trials > 150)[0][11]]

    indx = session_id == high_trial_id

    ax.plot(choice_arr[indx, 0], color="r")
    ax.plot(choice_arr[indx, 1], color="k")
    ax2 = ax.twinx()
    ax2.plot(is_choice_high[indx], color="gray")
    ax2.set_ylim(-0.2, 1.2)

    # alpha_var = np.absolute(session_id)

    # is_choice_high_per_session = np.split(is_choice_high, np.cumsum(n_trials)[:-1])
    # perf = [np.mean(arr) for arr in is_choice_high_per_session]

    # perf_slide_view = np.lib.stride_tricks.sliding_window_view(perf, 10)[::5, :]
    # perf_mean = np.mean(perf_slide_view, axis=1)
    # perf_std = np.std(perf_slide_view, axis=1)

    # whr_reward_trial = np.where(is_reward == 1)[0]
    # rwd_mov_avg = np.convolve(is_reward, np.ones(100) / 100, mode="same")
    # rwd_mov_avg_smth = gaussian_filter1d(rwd_mov_avg, 20)

    # axs[i].fill_between(
    #     np.arange(len(perf_mean)), perf_mean - perf_std, perf_mean + perf_std, alpha=0.3
    # )
    # axs[i].plot(np.arange(len(perf_mean)), perf_mean, "k")
    # axs[i].set_ylim(0.3)
    # axs[i].set_title(np.round(rwd_corr[0], 2))
    # axs[0, i].plot(rwd_prob1, color="g")
    # axs[0].vlines(whr_reward_trial,10,20,alpha=0.1)
    # axs[0, i].plot(rwd_prob2, color="r")
    # axs[1, i].plot(rwd_mov_avg_smth, "k")
    # axs[1, i].vlines(np.where(port_choice == 1), 0.1, 0.2, alpha=0.5, color="g")
    # axs[1, i].vlines(np.where(port_choice == 2), 0.3, 0.4, alpha=0.5, color="r")

In [None]:
high_trial_indx

In [None]:
fig, axs = plt.subplots(2, 1)
indx = session_id == 61

axs[0].plot(choice_arr[indx, 0])
axs[0].plot(choice_arr[indx, 1])
axs[1].plot(is_choice_high[indx])

In [None]:
performance

In [None]:
a = np.ones(10)
split_arr = np.array([3, 3, 3])

b = np.hsplit(a, np.cumsum(split_arr))

[np.mean(_) for _ in b]

In [None]:
np.dstack(b, axis=2)

In [None]:
np.unique(session, return_counts=True)

In [None]:
data_df

In [None]:
plt.plot(rwd_prob1, ".")

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Simulate some synthetic data for 1000 trials
np.random.seed(42)
n_trials = 1000
n_history = 5  # Number of past trials considered (5 as in the equation)

# Simulate reward history (1 for reward, 0 for no reward)
reward_left = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Left rewards history
reward_right = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Right rewards history

# Simulate choice history (1 for chosen, 0 for unchosen)
choice_left = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Left choices history
choice_right = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Right choices history

# Simulate the actual choice (1 for left, 0 for right, as the target variable)
# The actual choice depends on some hidden logic; we'll just generate random choices for this example
actual_choice = np.random.randint(0, 2, size=n_trials)  # 1 for left, 0 for right

# Prepare the feature matrix (X) for the logistic regression model
# We combine reward and choice histories for both left and right
X = np.hstack(
    [
        reward_left - reward_right,  # reward history difference
        choice_left - choice_right,  # choice history difference
    ]
)

# Standardize the data (important for logistic regression)

X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, actual_choice, test_size=0.2, random_state=42
)

# Logistic Regression Model
log_reg = LogisticRegression(solver="lbfgs", max_iter=1000)
log_reg.fit(X_train, y_train)

# Print model coefficients (βr, βc, and βbias)
print("Model coefficients:")
print("Beta (Reward history weights):", log_reg.coef_[:, :n_history])
print("Beta (Choice history weights):", log_reg.coef_[:, n_history:])
print("Intercept (Bias term):", log_reg.intercept_)

# Predict on test data
y_pred = log_reg.predict(X_test)

# Evaluate model performance
accuracy = np.mean(y_pred == y_test)
print("Test accuracy:", accuracy)

In [None]:
actual_choice