In [1]:
import os
import sys 

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)

In [2]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt

from src.stat_utils import *

In [3]:
data_path = '../data/'
experimental_data = pd.read_csv(data_path + 'experimental_data.csv')

In [4]:
# define distribution and covariance structure for GEE
family = sm.families.Binomial()
covariance_structure = sm.cov_struct.Autoregressive(grid=True)

#### Generalized Estimating Equations (GEE)
##### Full Model

In [5]:
# ignore transfer (final) session
data = experimental_data[experimental_data['SessionID'] != 9]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity + GridCoarseness + SessionID + SessionID*ContrastHeterogeneity + SessionID*GridCoarseness", "SubjectID", data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                48000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                6000
                      Estimating Equations   Max. cluster size:                6000
Family:                           Binomial   Mean cluster size:              6000.0
Dependence structure:       Autoregressive   Num. iterations:                     5
Date:                     Thu, 23 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         15:44:53
                                      coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
Intercept                           2.9204  

##### Simple Effects
Session 1

In [18]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 1]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 23 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         15:55:56
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 1.3686      0.264      5.178      0.00



<class 'statsmodels.stats.contrast.WaldTestResults'>
                                         chi2                  P>chi2  df constraint
Intercept               [[26.81624672533115]]  2.2374832923151046e-07              1
ContrastHeterogeneity  [[25.929093113090833]]   3.541906563073527e-07              1

##### Session 2

In [7]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 2]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     5
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 1.9408      0.336      5.783      0.00

##### Session 3

In [8]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 3]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.1601      0.301      7.179      0.00

##### Session 4

In [9]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 4]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.1708      0.337      6.444      0.00

##### Session 5

In [10]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 5]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.4108      0.456      5.287      0.00

##### Session 6

In [11]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 6]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     5
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:46
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.6448      0.402      6.585      0.00

##### Session 7

In [12]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 7]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:46
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.5531      0.283      9.021      0.00

##### Session 8

In [13]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 8]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:46
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.8654      0.364      7.881      0.00

#### Equivalence Tests

In [15]:
session_data = experimental_data[experimental_data['SessionID'] == 1]
accuracy = session_data.groupby(['SubjectID']).mean()['Correct']
equivalence_bound = bootstrap(accuracy, num_repeats=100000, percentile=95, abs_diff=True)

0.06450000000000002
