# Notebook 04: Reading Cost vs KEC Models
This notebook merges the KEC metrics (from Notebook 02) with the ZuCo reading data (from Notebook 03), and fits statistical models to evaluate how well KEC predicts neurophysiological reading costs.
- **Response Variables:** Eye-tracking measures (e.g., FFD, GD) and EEG features (e.g., theta power) for each word.
- **Predictors:** KEC metrics (entropy, curvature, coherence) for the word, plus control covariates (word length, frequency, etc.).
- **Model:** Linear mixed-effects regression (subjects and sentences as random effects) to account for repeated measures. Also compute cluster-robust standard errors for subjects.
- **Output:** Model summary tables, effect size estimates, and diagnostic plots (partial residual plots saved to `figures/`).

In [1]:
# Merge KEC metrics with aligned ZuCo data
import pandas as pd

kec_df = pd.read_csv('data/processed/kec/metrics_en.csv')  # assuming English KEC metrics
zuco_df = pd.read_csv('data/processed/zuco_aligned.csv')
data = pd.merge(zuco_df, kec_df, how='left', left_on='Word', right_on='word')
print(f"Merged dataset: {len(data)} word instances, columns: {list(data.columns)[:10]}...")

In [2]:
# Fit a linear mixed model for First Fixation Duration as example
import statsmodels.formula.api as smf

model = smf.mixedlm("FFD ~ entropy + curvature + coherence + WordLength + LogFreq",
                     data, groups=data["Subject"], re_formula="~1")
result = model.fit()
print(result.summary())

In [3]:
# Plot partial effect of KEC entropy on FFD (for example)
import matplotlib.pyplot as plt
import numpy as np

# Assuming we have model predictions or coefficients
entropy_vals = np.linspace(data['entropy'].min(), data['entropy'].max(), 100)
beta_entropy = result.params.get('entropy', 0)
intercept = result.params.get('Intercept', 0)
y_pred = intercept + beta_entropy * entropy_vals

plt.figure()
plt.plot(entropy_vals, y_pred, label='Predicted FFD')
plt.scatter(data['entropy'], data['FFD'], alpha=0.2, label='Observed')
plt.xlabel('Transition Entropy')
plt.ylabel('First Fixation Duration (ms)')
plt.title('Partial effect of entropy on FFD')
plt.legend()
plt.savefig('figures/F2_entropy_vs_FFD.png')
plt.show()