In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from scipy.ndimage import gaussian_filter1d
from pathlib import Path
from sklearn.linear_model import LogisticRegression

In [None]:
def get_params(df):
    port1_prob = df.loc[:, "rewprobfull1"].to_numpy()
    port2_prob = df.loc[:, "rewprobfull2"].to_numpy()
    port_choice = df.loc[:, "port"].to_numpy().astype(int)
    prob_choice = df.loc[:, "rw"].to_numpy()
    is_choice_high = np.max(np.vstack((port1_prob, port2_prob)), axis=0) == prob_choice

    is_reward = df.loc[:, "reward"].to_numpy()
    whr_reward_trial = np.where(is_reward == 1)[0]
    rwd_mov_avg = np.convolve(is_reward, np.ones(150) / 150, mode="same")
    # rwd_mov_avg_smth = gaussian_filter1d(rwd_mov_avg, 20)
    rwd_prob_corr = stats.pearsonr(port1_prob, port2_prob)
    dt_time = pd.to_datetime(df["eptime"].to_numpy(), unit="s")
    session = df.loc[:, "session#"].to_numpy()

    choice_arr = np.vstack((port1_prob, port2_prob, prob_choice)).T

    return choice_arr, is_choice_high, session, is_reward, rwd_prob_corr, dt_time

In [135]:
f = basepath / "gronckle.csv"
df = pd.read_csv(f)
arr, *_, is_choice_high = get_params(df)

In [None]:
np.max(arr[:, :2], axis=1) == arr[:, 2]

array([ True,  True,  True, ...,  True,  True,  True], shape=(119127,))

In [None]:
eptime = df["eptime"].to_numpy()
dt_time = pd.to_datetime(eptime, unit="s")

In [None]:
plt.plot(dt_time.hour, ".")

[<matplotlib.lines.Line2D at 0x2188b03f390>]

In [None]:
bad_choice = arr[np.where(~is_choice_high)]
bad_choice

array([[30, 70, 70],
       [30, 70, 70],
       [30, 70, 70],
       ...,
       [20, 80, 80],
       [20, 80, 80],
       [20, 80, 80]], shape=(107711, 3))

In [133]:
basepath = Path("D:\\Data")
# files = ["gronckle.csv", "grump.csv"]
files = sorted(basepath.glob("*.csv"))

fig, axs = plt.subplots(3, 4, sharey=True)
axs = axs.reshape(-1)

for i, file in enumerate(files):
    data_df = pd.read_csv(basepath / file)
    choice_arr, is_choice_high, session_id, *_, rwd_corr, dt_time = get_params(data_df)

    good_trials = dt_time > (dt_time[0] + pd.Timedelta(days=10))
    choice_arr = choice_arr[good_trials, :]
    dt_time = dt_time[good_trials]
    is_choice_high = is_choice_high[good_trials]
    session_id = session_id[good_trials]

    _, n_trials = np.unique(session_id, return_counts=True)

    is_choice_high_per_session = np.split(is_choice_high, np.cumsum(n_trials)[:-1])
    perf = [np.mean(arr) for arr in is_choice_high_per_session]

    perf_slide_view = np.lib.stride_tricks.sliding_window_view(perf, 10)[::5, :]
    perf_mean = np.mean(perf_slide_view, axis=1)
    perf_std = np.std(perf_slide_view, axis=1)

    # whr_reward_trial = np.where(is_reward == 1)[0]
    # rwd_mov_avg = np.convolve(is_reward, np.ones(100) / 100, mode="same")
    # rwd_mov_avg_smth = gaussian_filter1d(rwd_mov_avg, 20)

    axs[i].fill_between(
        np.arange(len(perf_mean)), perf_mean - perf_std, perf_mean + perf_std, alpha=0.3
    )
    axs[i].plot(np.arange(len(perf_mean)), perf_mean, "k")
    axs[i].set_ylim(0.3)
    axs[i].set_title(np.round(rwd_corr[0], 2))
    # axs[0, i].plot(rwd_prob1, color="g")
    # axs[0].vlines(whr_reward_trial,10,20,alpha=0.1)
    # axs[0, i].plot(rwd_prob2, color="r")
    # axs[1, i].plot(rwd_mov_avg_smth, "k")
    # axs[1, i].vlines(np.where(port_choice == 1), 0.1, 0.2, alpha=0.5, color="g")
    # axs[1, i].vlines(np.where(port_choice == 2), 0.3, 0.4, alpha=0.5, color="r")

array([0, 1, 2])

In [99]:
performance

[np.float64(0.5),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.005780346820809248),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.05319148936170213),
 np.float64(0.006622516556291391),
 np.float64(0.0),
 np.float64(0.001694915254237288),
 np.float64(0.002527379949452401),
 np.float64(1.0),
 np.float64(0.0025526483726866626),
 np.float64(0.009900990099009901),
 np.float64(0.0),
 np.float64(0.001288659793814433),
 np.float64(0.002890173410404624),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.4175824175824176),
 np.float64(0.0),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(1.0),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.00624512099921936),
 np.float64(0.9220404234841193),
 np.float64(0.1423841059602649),
 np.float64(0.019981834695731154),
 np.float64(0.4152542372881356),
 np.float64(0.9909470752089137),
 np.float64(0.045454545454545456),
 np.float64(0.09203980099502487),
 np.

In [None]:
a = np.ones(10)
split_arr = np.array([3, 3, 3])

b = np.hsplit(a, np.cumsum(split_arr))

[np.mean(_) for _ in b]

[np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0)]

In [None]:
np.dstack(b, axis=2)

TypeError: dstack() got an unexpected keyword argument 'axis'

In [None]:
np.unique(session, return_counts=True)

(array([ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
         74,  75,  76,  77,  78,  79,  80,  81,  82,  84,  85,  86,  87,
         88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100,
        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
        114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
        127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
        140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
        153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,
        166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178,
        179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 191, 192,
        193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205,
        206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
        219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231,
        232, 233, 234, 235, 236, 237, 238, 239, 240

In [None]:
data_df

Unnamed: 0.1,Unnamed: 0,trial#,trialstart,port,reward,trialend,session#,eptime,task,rewprobfull1,rewprobfull2,rw,animal,datetime
0,0,0,4092,2.0,1,4119,1,1696509517,13,40,70,70,Grump,2023-10-05 12:38:37
1,1,1,5721,2.0,0,5721,1,1696509519,13,40,70,70,Grump,2023-10-05 12:38:39
2,2,2,6790,2.0,1,6873,1,1696509520,13,40,70,70,Grump,2023-10-05 12:38:40
3,3,3,10714,2.0,1,10744,1,1696509524,13,40,70,70,Grump,2023-10-05 12:38:44
4,4,4,12221,2.0,1,12221,1,1696509526,13,40,70,70,Grump,2023-10-05 12:38:46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76104,76104,76104,1162679,2.0,1,1163022,167,1702264773,13,80,90,90,Grump,2023-12-11 03:19:33
76105,76105,76105,1164634,2.0,1,1164880,167,1702264775,13,80,90,90,Grump,2023-12-11 03:19:35
76106,76106,76106,1166300,2.0,1,1166421,167,1702264777,13,80,90,90,Grump,2023-12-11 03:19:37
76107,76107,76107,1167738,2.0,1,1167836,167,1702264779,13,80,90,90,Grump,2023-12-11 03:19:39


In [None]:
plt.plot(rwd_prob1, ".")

[<matplotlib.lines.Line2D at 0x1c114055950>]

In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Simulate some synthetic data for 1000 trials
np.random.seed(42)
n_trials = 1000
n_history = 5  # Number of past trials considered (5 as in the equation)

# Simulate reward history (1 for reward, 0 for no reward)
reward_left = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Left rewards history
reward_right = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Right rewards history

# Simulate choice history (1 for chosen, 0 for unchosen)
choice_left = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Left choices history
choice_right = np.random.randint(
    0, 2, size=(n_trials, n_history)
)  # Right choices history

# Simulate the actual choice (1 for left, 0 for right, as the target variable)
# The actual choice depends on some hidden logic; we'll just generate random choices for this example
actual_choice = np.random.randint(0, 2, size=n_trials)  # 1 for left, 0 for right

# Prepare the feature matrix (X) for the logistic regression model
# We combine reward and choice histories for both left and right
X = np.hstack(
    [
        reward_left - reward_right,  # reward history difference
        choice_left - choice_right,  # choice history difference
    ]
)

# Standardize the data (important for logistic regression)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, actual_choice, test_size=0.2, random_state=42
)

# Logistic Regression Model
log_reg = LogisticRegression(solver="lbfgs", max_iter=1000)
log_reg.fit(X_train, y_train)

# Print model coefficients (βr, βc, and βbias)
print("Model coefficients:")
print("Beta (Reward history weights):", log_reg.coef_[:, :n_history])
print("Beta (Choice history weights):", log_reg.coef_[:, n_history:])
print("Intercept (Bias term):", log_reg.intercept_)

# Predict on test data
y_pred = log_reg.predict(X_test)

# Evaluate model performance
accuracy = np.mean(y_pred == y_test)
print("Test accuracy:", accuracy)

Model coefficients:
Beta (Reward history weights): [[-0.03594947 -0.09515691  0.04508803 -0.08742038 -0.02923884]]
Beta (Choice history weights): [[ 0.0133873   0.0166049   0.07716208 -0.04840661 -0.0341045 ]]
Intercept (Bias term): [0.07619375]
Test accuracy: 0.515


In [6]:
actual_choice

array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,