In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat


def mat_to_pd(mat):
    """load matlab file and select trial variables"""
    mat = loadmat(mat)
    mat = {
        k: v.squeeze()
        for k, v in mat.items()
        if isinstance(v, np.ndarray) and v.shape == mat["RT"].shape
    }
    return pd.DataFrame(mat)


"""
variable descriptions
---------------------
coherence : float
    motion coherence of the trial, between 0-1
correct : 0 or 1
    0 if correct
    1 if error
RT : float
    response time to L/R from stimulus onset
dotDirection : 0 or pi
    direction of the motion, in radians
selectedDuration : float
    duration of the dot display, in seconds
    NaN = unlimited duration
waterSoFar : int
    amount of water consumed from start of session, in a.u.
proposedReward : float
    amount of water rewarded for correct trial, in a.u.
correctionTrial : 0 or 1
    0 if the motion direction was chosen randomly
    1 if the motion direction was the same as the previous failed trial
selectedDotSize : float
    size of dots, in stixels
selectedContrast : float
    constrast of the dots against the background, between 0-1
selectedSpeed : float
    speed of dots, in stixels per frame
proposedPenalty : float
    duration of timeout if error
pctCorrectionTrials : float
    probability of entering correction mode after error, between 0-1
isLoop : 0 or 1
    0 if movie does not loop until response
    1 if movie loops until rat responds
sessionNumber : int
    session is the span of time when rat had access to the game
"""


df195 = mat_to_pd("datasets/Rat195Vectors_241025.mat").assign(rat=195)
df196 = mat_to_pd("datasets/Rat196Vectors_241025.mat").assign(rat=196)
df = (
    pd.concat([df195, df196])
    .query("Valid == 1")  # remove disqualified trials (for now)
    .assign(trialDate=lambda x: pd.to_datetime(x["trialDate"] - 719529, unit="D"))
    .set_index("trialDate")
)