# Preparation

In [1]:
# Add the parent directory of the current working directory to the Python path at runtime. 
# In order to import modules from the src directory.
import os
import sys 

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)

In [2]:
# Import the required packages.
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

from src.stat_utils import *

In [7]:
# Define the path to the data files.
data_path = '../data'
data_file = os.path.join(data_path, 'main.csv')
metadata_file = os.path.join(data_path, 'meta.csv')

In [8]:
# load metadata and data.
metadata = pd.read_csv(metadata_file)
experimental_data = pd.read_csv(data_file)

# Hypotheses

**Hypothesis 1**: Discrimination accuracy in a figure-ground segregation task will vary systematically with changes in contrast heterogeneity and grid coarseness.

**Hypothesis 2**: As participants undergo perceptual learning, their ability to segregate figure from ground will improve. Therefore, discrimination accuracy should increase over sessions.

#### Expected Main Effects
- ContrastHeterogeneity
- GridCoarseness
- SessionID

#### Covariates
- SessionID x ContrastHeterogeneity
- SessionID x GridCoarseness

# Sample Information

In [9]:
# Provide sample information.
print(f'Number of participants: {len(metadata)}')
print(f'Number of females: {metadata["Sex"].value_counts().iloc[0]}')
print(f'Mean age: {metadata["Age"].mean():.2f}')

Number of participants: 8
Number of females: 6
Mean age: 23.75


# GEE Analyses

In [10]:
# define distribution and covariance structure for GEE
family = sm.families.Binomial()
covariance_structure = sm.cov_struct.Exchangeable()

#### Full Model

In [11]:
# ignore transfer (final) session
data = experimental_data[experimental_data['SessionID'] != 9]

In [12]:
# fit model and print results
model = smf.gee("Correct ~ ContrastHeterogeneity + GridCoarseness + SessionID + SessionID*ContrastHeterogeneity + SessionID*GridCoarseness", "SubjectID", data, cov_struct=covariance_structure, family=family)
results = model.fit()

print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                48000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                6000
                      Estimating Equations   Max. cluster size:                6000
Family:                           Binomial   Mean cluster size:              6000.0
Dependence structure:         Exchangeable   Num. iterations:                    56
Date:                     Mon, 05 Feb 2024   Scale:                           1.000
Covariance type:                    robust   Time:                         10:46:55
                                      coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
Intercept                           1.6459  

#### Simple Effects

##### Session 1

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 1]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Wed, 24 Jan 2024   Scale:                           1.000
Covariance type:                    robust   Time:                         15:03:40
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 1.3686      0.264      5.178      0.00

##### Session 2

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 2]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     5
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 1.9408      0.336      5.783      0.00

##### Session 3

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 3]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.1601      0.301      7.179      0.00

##### Session 4

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 4]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.1708      0.337      6.444      0.00

##### Session 5

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 5]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:45
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.4108      0.456      5.287      0.00

##### Session 6

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 6]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     5
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:46
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.6448      0.402      6.585      0.00

##### Session 7

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 7]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:46
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.5531      0.283      9.021      0.00

##### Session 8

In [None]:
# select session
session_data = experimental_data[experimental_data['SessionID'] == 8]

# fit model
model = smf.gee("Correct ~ ContrastHeterogeneity", "SubjectID", session_data, cov_struct=covariance_structure, family=family)
results = model.fit()

# print results
print(results.summary())
print('\n\n')
print_wald_chi_square(results)

                               GEE Regression Results                              
Dep. Variable:                     Correct   No. Observations:                 6000
Model:                                 GEE   No. clusters:                        8
Method:                        Generalized   Min. cluster size:                 750
                      Estimating Equations   Max. cluster size:                 750
Family:                           Binomial   Mean cluster size:               750.0
Dependence structure:       Autoregressive   Num. iterations:                     4
Date:                     Thu, 09 Nov 2023   Scale:                           1.000
Covariance type:                    robust   Time:                         17:28:46
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 2.8654      0.364      7.881      0.00

In [None]:
session_data = experimental_data[experimental_data['SessionID'] == 1]
accuracy = session_data.groupby(['SubjectID']).mean()['Correct']
equivalence_bound = bootstrap(accuracy, num_repeats=100000, percentile=95, abs_diff=True)

0.06450000000000002
