[a]

In [24]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd

import statsmodels.api as sm
import statsmodels.formula.api as smf

In [26]:
# Load data from csv file
df = pd.read_csv("autism.csv")

# Define Age and Grp variables
df['Age'] = df['age'] - 2
df.loc[df['sicdegp'] == 3, 'Grp'] = 0
df.loc[df['sicdegp'] == 1, 'Grp'] = 1
df.loc[df['sicdegp'] == 2, 'Grp'] = 2
df['Grp'] = df['Grp'].astype('category')
df = df.dropna()

df.head()

Unnamed: 0,age,vsae,sicdegp,childid,Age,Grp
0,2,6.0,3,1,0,0.0
1,3,7.0,3,1,1,0.0
2,5,18.0,3,1,3,0.0
3,9,25.0,3,1,7,0.0
4,13,27.0,3,1,11,0.0


In [38]:
# Define model formulas
model_a_formula = "vsae ~ Age + np.power(Age, 2) + Grp + Age*Grp + np.power(Age, 2)*Grp"
model_b_formula = "vsae ~ Age + np.power(Age, 2) + Grp + Age*Grp + np.power(Age, 2)*Grp"
model_c_formula = "vsae ~ Age + np.power(Age, 2) + Grp + Age*Grp"

# Fit models using mixed_model
model_a = smf.mixedlm(model_a_formula, data=df, groups=df['childid'], re_formula="~0+Age+np.power(Age, 2)")
model_b = smf.mixedlm(model_b_formula, data=df, groups=df['childid'], re_formula="~0+Age", vc_formula = {'Age':'0 + Age', 'Grp':'0 + Grp'})
model_c = smf.mixedlm(model_c_formula, data=df, groups=df['childid'], re_formula="~0+Age+np.power(Age, 2)")

# Calculate log-likelihood for each model
fit_modelA = model_a.fit(method=['lbfgs'],reml=False)
fit_modelB =  model_b.fit(reml=False)
fit_modelC = model_c.fit(method=['lbfgs'],reml=False)

# Report log-likelihood for each model
print("Model A log-likelihood:", fit_modelA.llf)
print("Model B log-likelihood:", fit_modelB.llf)
print("Model C log-likelihood:", fit_modelC.llf)

Model A log-likelihood: -2305.2220473509706
Model B log-likelihood: -2346.8311556068556
Model C log-likelihood: -2306.156899480725


[b]

In [39]:
# Perform model selection
models = {'A': model_a, 'B': model_b, 'C': model_c}
aic_values = {k: v.fit(reml=False).aic for k, v in models.items()}
best_model = min(aic_values, key=aic_values.get)
print("Best model according to AIC:", best_model)

Best model according to AIC: C


In [41]:
# Report the test statistics and the corresponding p-values
fit_modelC.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,vsae
No. Observations:,610,Method:,ML
No. Groups:,158,Scale:,38.4864
Min. group size:,1,Log-Likelihood:,-2306.1569
Max. group size:,5,Converged:,Yes
Mean group size:,3.9,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,13.463,0.779,17.273,0.000,11.935,14.990
Grp[T.1.0],-4.987,1.035,-4.818,0.000,-7.015,-2.958
Grp[T.2.0],-3.622,0.974,-3.717,0.000,-5.532,-1.712
Age,6.149,0.686,8.963,0.000,4.805,7.494
Age:Grp[T.1.0],-4.069,0.876,-4.644,0.000,-5.787,-2.352
Age:Grp[T.2.0],-3.496,0.825,-4.236,0.000,-5.114,-1.879
"np.power(Age, 2)",0.109,0.043,2.562,0.010,0.026,0.193
Age Var,14.199,0.477,,,,
"Age x np.power(Age, 2) Cov",-0.409,0.034,,,,
