In [1]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np

<h1 style="text-align:center">Dataset Loading</h1>

In [2]:
data = pd.read_excel("../data/Telomere MDD.xlsx")

In [48]:
log_res_temp = data[
    [
        "hamd_response",
        "phq9_response",
        "sex",
        "age",
        "telomere_length",
        "physical_activity_level",
        "bmi_asian",
        "ace_score",
        "average_sleep",
        "treatment_mode",
    ]
].copy()

log_res_temp["hamd_response"] = log_res_temp.hamd_response.map({2: 0, 1: 1})
log_res_temp["phq9_response"] = log_res_temp.phq9_response.map({2: 0, 1: 1})

<h1 style="text-align:center">Multiple Logistic Regression</h1>

<h2>HAMD Response</h2>

In [28]:
hamd_log_res_model = smf.logit(
    formula="hamd_response ~ age + telomere_length + ace_score + average_sleep + C(sex) + C(treatment_mode, Treatment(reference=3)) + C(bmi_asian, Treatment(reference=2))",
    data=log_res_temp,
)

In [29]:
hamd_result = hamd_log_res_model.fit()

Optimization terminated successfully.
         Current function value: 0.447645
         Iterations 7


In [30]:
hamd_result.summary()

0,1,2,3
Dep. Variable:,hamd_response,No. Observations:,64.0
Model:,Logit,Df Residuals:,53.0
Method:,MLE,Df Model:,10.0
Date:,"Mon, 09 Jun 2025",Pseudo R-squ.:,0.3309
Time:,08:11:54,Log-Likelihood:,-28.649
converged:,True,LL-Null:,-42.818
Covariance Type:,nonrobust,LLR p-value:,0.001594

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0577,2.239,-0.026,0.979,-4.446,4.331
C(sex)[T.2],-0.8779,0.790,-1.112,0.266,-2.426,0.670
"C(treatment_mode, Treatment(reference=3))[T.1]",-3.4940,1.249,-2.797,0.005,-5.942,-1.046
"C(treatment_mode, Treatment(reference=3))[T.2]",1.6452,0.942,1.746,0.081,-0.202,3.492
"C(bmi_asian, Treatment(reference=2))[T.1]",0.1469,1.565,0.094,0.925,-2.920,3.214
"C(bmi_asian, Treatment(reference=2))[T.3]",0.4096,1.068,0.384,0.701,-1.683,2.503
"C(bmi_asian, Treatment(reference=2))[T.4]",-2.5581,1.451,-1.763,0.078,-5.403,0.287
age,-0.0150,0.040,-0.371,0.710,-0.094,0.064
telomere_length,-0.0293,0.043,-0.684,0.494,-0.113,0.055


In [31]:
hamd_odds_ratio = np.exp(hamd_result.params)

In [32]:
hamd_odds_ratio

Intercept                                         0.943952
C(sex)[T.2]                                       0.415667
C(treatment_mode, Treatment(reference=3))[T.1]    0.030380
C(treatment_mode, Treatment(reference=3))[T.2]    5.182066
C(bmi_asian, Treatment(reference=2))[T.1]         1.158288
C(bmi_asian, Treatment(reference=2))[T.3]         1.506207
C(bmi_asian, Treatment(reference=2))[T.4]         0.077455
age                                               0.985081
telomere_length                                   0.971170
ace_score                                         1.123791
average_sleep                                     1.223302
dtype: float64

<h2>PHQ-9 Response</h2>

In [51]:
phq9_log_res_model = smf.logit(
    formula="phq9_response ~ age + telomere_length + ace_score + average_sleep + C(sex) + C(treatment_mode, Treatment(reference=3)) + C(bmi_asian, Treatment(reference=2))",
    data=log_res_temp,
)

In [59]:
phq9_result = phq9_log_res_model.fit(method="bfgs", maxiter=100)

Optimization terminated successfully.
         Current function value: 0.535265
         Iterations: 77
         Function evaluations: 81
         Gradient evaluations: 81


In [60]:
phq9_result.summary()

0,1,2,3
Dep. Variable:,phq9_response,No. Observations:,64.0
Model:,Logit,Df Residuals:,53.0
Method:,MLE,Df Model:,10.0
Date:,"Mon, 09 Jun 2025",Pseudo R-squ.:,0.1682
Time:,08:27:17,Log-Likelihood:,-34.257
converged:,True,LL-Null:,-41.183
Covariance Type:,nonrobust,LLR p-value:,0.1798

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.9371,2.029,0.955,0.340,-2.039,5.914
C(sex)[T.2],-0.2869,0.679,-0.423,0.672,-1.617,1.043
"C(treatment_mode, Treatment(reference=3))[T.1]",-0.9356,0.670,-1.396,0.163,-2.249,0.378
"C(treatment_mode, Treatment(reference=3))[T.2]",1.7165,1.181,1.453,0.146,-0.599,4.032
"C(bmi_asian, Treatment(reference=2))[T.1]",17.2688,6469.274,0.003,0.998,-1.27e+04,1.27e+04
"C(bmi_asian, Treatment(reference=2))[T.3]",0.4875,1.006,0.485,0.628,-1.483,2.459
"C(bmi_asian, Treatment(reference=2))[T.4]",-0.6798,1.393,-0.488,0.626,-3.410,2.050
age,-0.0294,0.039,-0.753,0.451,-0.106,0.047
telomere_length,0.0131,0.034,0.383,0.702,-0.054,0.080


In [61]:
phq9_odds_ratio = np.exp(phq9_result.params)

In [62]:
phq9_odds_ratio

Intercept                                         6.938655e+00
C(sex)[T.2]                                       7.505788e-01
C(treatment_mode, Treatment(reference=3))[T.1]    3.923596e-01
C(treatment_mode, Treatment(reference=3))[T.2]    5.564837e+00
C(bmi_asian, Treatment(reference=2))[T.1]         3.160433e+07
C(bmi_asian, Treatment(reference=2))[T.3]         1.628278e+00
C(bmi_asian, Treatment(reference=2))[T.4]         5.067279e-01
age                                               9.710071e-01
telomere_length                                   1.013137e+00
ace_score                                         1.077503e+00
average_sleep                                     9.422597e-01
dtype: float64