In [1]:
import json
#Pandas for saving datasets
import pandas as pd
#matplotlib for rendering
import matplotlib.pyplot as plt
#numpy for handeling matrix operations
import numpy as np
#time, to, well... keep track of time
import time
#iPython display for making sure we can render the frames
from IPython import display
#seaborn for rendering
import seaborn
import math
import os
import statistics as stat
import glob
import seaborn as sns

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
#code from scipy documentation
from scipy.stats import t
from scipy.stats import norm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf


# Load Data

In [3]:
# Get data file names
cur_path = os.getcwd()
filenames = glob.glob(cur_path + "/data/*.csv")
# Read dframes into a list
dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

# Filter - mixed effects

In [24]:
# get first dataframe and filter columns

filt_dfs = []
# concat all dframes into one dframe
for sub_num, d in enumerate(dfs):
    my_df = d.filter(items=['congruent', 'Rand Tim', 'key_resp.rt','block_num'])
    my_df['subject_number'] = [sub_num] * len(my_df)  # Add subject number to the DataFrame

    # drop na, only 10s
    filt_dfs.append(my_df.dropna())

## concat #
full_dat = pd.concat(filt_dfs)


In [25]:
print(f'num subs: {len(dfs)}')

num subs: 26


In [26]:
# Filter 
dat= full_dat[full_dat['congruent']==0]
dat_inc = full_dat[full_dat['congruent']==1]

In [27]:
data = full_dat[full_dat['block_num'] == '5s']

In [28]:
full_dat

Unnamed: 0,congruent,Rand Tim,key_resp.rt,block_num,subject_number
1,0.0,1.3132,0.636110,5s,0
2,1.0,0.7037,0.709031,5s,0
3,0.0,0.8309,0.434464,5s,0
4,1.0,4.6764,0.553009,5s,0
5,0.0,4.6441,0.518856,5s,0
...,...,...,...,...,...
416,1.0,4.1302,0.473883,5s,25
419,0.0,4.1825,0.864461,5s,25
420,0.0,3.5785,0.386026,5s,25
421,0.0,1.9596,0.395578,5s,25


# Mixed effects model

In [29]:
# Create a DataFrame
df = pd.DataFrame(full_dat)

# Rename columns for clarity
df.rename(columns={
    'congruent': 'condition_type',
    'Rand Tim': 'stimulus_onset',
    'key_resp.rt': 'reaction_time',
    'block_num': 'task'
}, inplace=True)

# Convert categorical variables to the appropriate type
df['condition_type'] = df['condition_type'].astype('category')
#df['task'] = df['task'].astype('category')
df['task'] = pd.Categorical(df['task'], categories=['1s', '3s', '5s', '10s'], ordered=True)  # Set reference condition here


# Build the fixed effects model
model = smf.mixedlm('reaction_time ~ C(task) * C(condition_type) * stimulus_onset', 
                    data=df, 
                    groups=df['subject_number']).fit()
# Display the model summary
print(model.summary())

                              Mixed Linear Model Regression Results
Model:                          MixedLM             Dependent Variable:             reaction_time
No. Observations:               9350                Method:                         REML         
No. Groups:                     26                  Scale:                          0.0098       
Min. group size:                250                 Log-Likelihood:                 8223.9148    
Max. group size:                394                 Converged:                      Yes          
Mean group size:                359.6                                                            
-------------------------------------------------------------------------------------------------
                                                       Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------------------------------
Intercept                                         



# Normalize

In [33]:
# Create a DataFrame
df = pd.DataFrame(full_dat)

# Rename columns for clarity
df.rename(columns={
    'congruent': 'condition_type',
    'Rand Tim': 'stimulus_onset',
    'key_resp.rt': 'reaction_time',
    'block_num': 'task'
}, inplace=True)

# Convert categorical variables to the appropriate type
df['condition_type'] = df['condition_type'].astype('category')
#df['task'] = df['task'].astype('category')
df['task'] = pd.Categorical(df['task'], categories=['1s', '3s', '5s', '10s'], ordered=True)  # Set reference condition here
df['task'] = df['task'].str.replace('s', '', regex=False).astype(float)
df['norm_onset'] = df['stimulus_onset'] / df['task']


# Build the fixed effects model
model = smf.mixedlm('reaction_time ~ C(task) * C(condition_type) * norm_onset', 
                    data=df, 
                    groups=df['subject_number']).fit()
# Display the model summary
print(model.summary())

                            Mixed Linear Model Regression Results
Model:                         MixedLM            Dependent Variable:            reaction_time
No. Observations:              9350               Method:                        REML         
No. Groups:                    26                 Scale:                         0.0098       
Min. group size:               250                Log-Likelihood:                8233.9361    
Max. group size:               394                Converged:                     Yes          
Mean group size:               359.6                                                          
----------------------------------------------------------------------------------------------
                                                    Coef.  Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------------------------
Intercept                                            0.401    0.010 41.722 0.00



## last half

### complete model

In [37]:
# Create a DataFrame
df = pd.DataFrame(full_dat)

# Rename columns for clarity
df.rename(columns={
    'congruent': 'condition_type',
    'Rand Tim': 'stimulus_onset',
    'key_resp.rt': 'reaction_time',
    'block_num': 'task'
}, inplace=True)

# Convert categorical variables to the appropriate type
df['condition_type'] = df['condition_type'].astype('category')
#df['task'] = df['task'].astype('category')
df['task'] = pd.Categorical(df['task'], categories=['1s', '3s', '5s', '10s'], ordered=True)  # Set reference condition here


# Define the cutoff times for each task duration
cutoff_times = {
    '1s': 0.6,
    '3s': 1.5,
    '5s': 2.5,
    '10s': 5.0
}

# Filter the DataFrame for stimulus onsets in the last half of the task
# < less than cut off 
# > greater than cut off
df_filtered = df[df.apply(lambda row: row['stimulus_onset'] > cutoff_times[row['task']], axis=1)]

'''
# Build the fixed effects model on the filtered DataFrame
model_filtered = smf.ols('reaction_time ~ C(task) * C(condition_type) * stimulus_onset + (1|subject_number)', data=df_filtered).fit()
'''
model_filtered = smf.mixedlm('reaction_time ~ C(task) * C(condition_type) * stimulus_onset', 
                    data=df_filtered, 
                    groups=df_filtered['subject_number']).fit()

# Display the model summary for the filtered data
print(model_filtered.summary())

                              Mixed Linear Model Regression Results
Model:                          MixedLM             Dependent Variable:             reaction_time
No. Observations:               4698                Method:                         REML         
No. Groups:                     26                  Scale:                          0.0089       
Min. group size:                130                 Log-Likelihood:                 4315.9352    
Max. group size:                211                 Converged:                      Yes          
Mean group size:                180.7                                                            
-------------------------------------------------------------------------------------------------
                                                       Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-------------------------------------------------------------------------------------------------
Intercept                                         



### partial model (last half or first half of task)

In [132]:
## cond
cond_type = 0

# Filter the DataFrame for stimulus onsets in the last half of the task
df_filtered = df[df.apply(lambda row: row['stimulus_onset'] > cutoff_times[row['task']], axis=1)]

df_filtered = df_filtered[df_filtered['condition_type'] == cond_type]

# Build the fixed effects model on the filtered DataFrame
model_filtered = smf.ols('reaction_time ~ C(task) * stimulus_onset', data=df_filtered).fit()

# Display the model summary for the filtered data
print(model_filtered.summary())

                            OLS Regression Results                            
Dep. Variable:          reaction_time   R-squared:                       0.070
Model:                            OLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     42.67
Date:                Fri, 27 Sep 2024   Prob (F-statistic):           2.08e-58
Time:                        14:01:02   Log-Likelihood:                 3567.1
No. Observations:                3964   AIC:                            -7118.
Df Residuals:                    3956   BIC:                            -7068.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
Intercept     

## Sequential effect

In [12]:
df = full_dat

# Group by subject and compare Rand Tim to previous trial
df['previous Rand Tim'] = df.groupby('subject_number')['Rand Tim'].shift(1)
df['previous_trial'] = (df['Rand Tim'] >= df['previous Rand Tim']).astype(int) ## 0 is longer before, 1 is shorter before

# Drop the first trial per subject (no previous trial)
df = df.dropna(subset=['previous Rand Tim']).reset_index(drop=True)

# Optional: drop the helper column if you no longer need it
df = df.drop(columns='previous Rand Tim')
df

Unnamed: 0,congruent,Rand Tim,key_resp.rt,block_num,subject_number,previous_trial
0,1.0,0.7037,0.709031,5s,0,0
1,0.0,0.8309,0.434464,5s,0,1
2,1.0,4.6764,0.553009,5s,0,1
3,0.0,4.6441,0.518856,5s,0,0
4,0.0,3.2655,0.552214,5s,0,0
...,...,...,...,...,...,...
9319,1.0,4.1302,0.473883,5s,25,1
9320,0.0,4.1825,0.864461,5s,25,1
9321,0.0,3.5785,0.386026,5s,25,0
9322,0.0,1.9596,0.395578,5s,25,0


## one model

In [14]:
# Create a DataFrame
df = pd.DataFrame(full_dat)

# Rename columns for clarity
df.rename(columns={
    'congruent': 'condition_type',
    'Rand Tim': 'stimulus_onset',
    'key_resp.rt': 'reaction_time',
    'block_num': 'task'
}, inplace=True)

# Convert categorical variables to the appropriate type
df['condition_type'] = df['condition_type'].astype('category')
df['previous_trial'] = df['previous_trial'].astype('category')

#df['task'] = df['task'].astype('category')
df['task'] = pd.Categorical(df['task'], categories=['1s', '3s', '5s', '10s'], ordered=True)  # Set reference condition here

## set to congruent trials ##
df = df[df['condition_type'] == 0]


# Build the mixed effects model
## 
model = smf.mixedlm('reaction_time ~ C(task) * previous_trial * stimulus_onset', 
                    data=df, 
                    groups=df['subject_number']).fit()
# Display the model summary
print(model.summary())

                           Mixed Linear Model Regression Results
Model:                       MixedLM            Dependent Variable:            reaction_time
No. Observations:            7485               Method:                        REML         
No. Groups:                  26                 Scale:                         0.0088       
Min. group size:             197                Log-Likelihood:                6969.1913    
Max. group size:             318                Converged:                     Yes          
Mean group size:             287.9                                                          
--------------------------------------------------------------------------------------------
                                                  Coef.  Std.Err.   z    P>|z| [0.025 0.975]
--------------------------------------------------------------------------------------------
Intercept                                          0.415    0.010 39.993 0.000  0.395  0.435
C(tas



## separate model for longer before and shorter before

In [16]:
# Create a DataFrame
df = pd.DataFrame(full_dat)

# Rename columns for clarity
df.rename(columns={
    'congruent': 'condition_type',
    'Rand Tim': 'stimulus_onset',
    'key_resp.rt': 'reaction_time',
    'block_num': 'task'
}, inplace=True)

# Convert categorical variables to the appropriate type
df['condition_type'] = df['condition_type'].astype('category')
df['previous_trial'] = df['previous_trial'].astype('category')

#df['task'] = df['task'].astype('category')
df['task'] = pd.Categorical(df['task'], categories=['1s', '3s', '5s', '10s'], ordered=True)  # Set reference condition here

## set to congruent trials ##
df = df[df['condition_type'] == 0]
df = df[df['previous_trial'] == 1] ## CHANGE ME -- 0 is longer before, 1 is shorter before


# Build the mixed effects model
## 
model = smf.mixedlm('reaction_time ~ C(task) * stimulus_onset', 
                    data=df, 
                    groups=df['subject_number']).fit()
# Display the model summary
print(model.summary())

                 Mixed Linear Model Regression Results
Model:                 MixedLM     Dependent Variable:     reaction_time
No. Observations:      3719        Method:                 REML         
No. Groups:            26          Scale:                  0.0079       
Min. group size:       103         Log-Likelihood:         3641.3048    
Max. group size:       155         Converged:              Yes          
Mean group size:       143.0                                            
------------------------------------------------------------------------
                              Coef.  Std.Err.   z    P>|z| [0.025 0.975]
------------------------------------------------------------------------
Intercept                      0.348    0.013 26.833 0.000  0.323  0.373
C(task)[T.3s]                  0.017    0.014  1.209 0.227 -0.011  0.045
C(task)[T.5s]                  0.036    0.014  2.586 0.010  0.009  0.063
C(task)[T.10s]                 0.084    0.014  6.035 0.000  0.057  0.

