In [1]:
import pandas as pd
import numpy as np
from typing import List, Optional

import statsmodels.api as sm

# plt.style.use('dark_background')
random_state=42

In [2]:
tesis = pd.read_csv("../Datos/tesis_final.csv")
tesis["Adherencia_Perfecta"] = tesis["Adherencia_Total"] == 1
tesis.head()

Unnamed: 0,idPaciente,Mes,TAS,Adherencia,Peso,Altura,IMC,DBT,Sexo,Edad,Fuma,ant_HTA,tas_basal,ICC,Adherencia_Acumulada,Adherencia_Total,TAS_Media_Acumulada,Adherencia_Perfecta
0,4026,1,119.0,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,119.0,True
1,4026,2,127.0,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,123.0,True
2,4026,3,140.0,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,128.666667,True
3,4026,4,146.71271,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,133.178178,True
4,4026,5,177.708084,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,142.084159,True


In [3]:
tesis["Mes"] = tesis["Mes"] - 1

In [4]:
tesis.isnull().sum()

idPaciente                0
Mes                       0
TAS                       0
Adherencia                0
Peso                     63
Altura                   63
IMC                      63
DBT                     126
Sexo                      0
Edad                    119
Fuma                    126
ant_HTA                   0
tas_basal                 0
ICC                     266
Adherencia_Acumulada      0
Adherencia_Total          0
TAS_Media_Acumulada       0
Adherencia_Perfecta       0
dtype: int64

### Imputar valores faltantes

In [5]:
categorical = ["Adherencia", "Sexo", "Adherencia_Perfecta"]
numerical = ["Mes", "TAS", "Edad", "tas_basal", "Adherencia_Acumulada", "Adherencia_Total"]

In [6]:
tesis = tesis[categorical + numerical + ["idPaciente"]]

In [7]:
from statistics import mean, median, mode

for cat in categorical:
    tesis[cat].fillna(mode(tesis[tesis["Mes"] == 1][cat]), inplace=True)

for num in numerical:
    tesis[num].fillna(np.mean(tesis[tesis["Mes"] == 1][num]), inplace=True)

In [8]:
tesis["Adherencia_lag1"] = tesis.groupby("idPaciente")["Adherencia"].shift(1).fillna(0)
tesis["Adherencia_lag2"] = tesis.groupby("idPaciente")["Adherencia"].shift(2).fillna(0)
tesis["Adherencia_Acumulada_lag1"] = tesis.groupby("idPaciente")["Adherencia_Acumulada"].shift(1).fillna(0)
tesis["TAS_lag1"] = tesis.groupby("idPaciente")["TAS"].shift(1).fillna(tesis["tas_basal"])
tesis["TAS_lag2"] = tesis.groupby("idPaciente")["TAS"].shift(2).fillna(tesis["tas_basal"])

In [9]:
from sklearn.preprocessing import PolynomialFeatures

columns = tesis.columns

poly = PolynomialFeatures(interaction_only=True, degree=2)
tesis = poly.fit_transform(tesis)
new_columns = poly.get_feature_names_out(columns)
new_columns = [column.replace(' ', '*') for column in new_columns]

tesis = pd.DataFrame(tesis, columns=new_columns)

tesis['Intercept'] = 1

In [10]:
def model(data, effects):
    mixed = sm.MixedLM(endog=data['TAS'], exog=data[effects], exog_re=data[['Intercept', "Mes"]], groups=data['idPaciente']).fit(reml=False)
    return mixed

def model_gee(data, effects):
    mixed = sm.GEE(endog=data['TAS'], exog=data[effects], exog_re=data[['Intercept', "Mes"]], groups=data['idPaciente']).fit()
    print(mixed.summary())
    return mixed

### Modelo base

In [11]:
fixed_effects = [
    "Intercept",
    "Sexo",
    "Edad",
    "Mes",
]

### Probar efectos aleatorios

In [12]:
mixed_both = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects + ["Adherencia", "Adherencia*Mes"]], exog_re=tesis[['Intercept', 'Mes']], groups=tesis['idPaciente']).fit(reml=True)
mixed_intercept = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects + ["Adherencia", "Adherencia*Mes"]], exog_re=tesis[['Intercept']], groups=tesis['idPaciente']).fit(reml=True)
mixed_time = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects + ["Adherencia", "Adherencia*Mes"]], exog_re=tesis[['Mes']], groups=tesis['idPaciente']).fit(reml=False)

print(mixed_both.summary())
print(-2*mixed_time.llf, -2*mixed_both.llf, (-2*mixed_time.llf)-(-2*mixed_both.llf))
print(-2*mixed_intercept.llf, -2*mixed_both.llf, (-2*mixed_intercept.llf)-(-2*mixed_both.llf))
# Ambos efectos aleatorios son significativos

              Mixed Linear Model Regression Results
Model:                MixedLM   Dependent Variable:   TAS        
No. Observations:     3920      Method:               REML       
No. Groups:           560       Scale:                113.2872   
Min. group size:      7         Log-Likelihood:       -15393.9298
Max. group size:      7         Converged:            Yes        
Mean group size:      7.0                                        
-----------------------------------------------------------------
                     Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------
Intercept           122.641    2.432 50.422 0.000 117.874 127.408
Sexo                  3.760    0.749  5.022 0.000   2.292   5.227
Edad                  0.167    0.039  4.344 0.000   0.092   0.243
Mes                   0.371    0.240  1.545 0.122  -0.100   0.841
Adherencia           -2.317    0.924 -2.508 0.012  -4.127  -0.506
Adherencia*Mes       -0.

### Selección de modelos

In [30]:
mixed = model(tesis, fixed_effects + ["Adherencia", "Adherencia*Mes"])
print(mixed.summary())
round(mixed.aic, 2), round(mixed.bic, 2)

              Mixed Linear Model Regression Results
Model:                MixedLM   Dependent Variable:   TAS        
No. Observations:     3920      Method:               ML         
No. Groups:           560       Scale:                113.2361   
Min. group size:      7         Log-Likelihood:       -15390.7343
Max. group size:      7         Converged:            Yes        
Mean group size:      7.0                                        
-----------------------------------------------------------------
                     Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------
Intercept           122.641    2.426 50.555 0.000 117.886 127.395
Sexo                  3.760    0.747  5.036 0.000   2.297   5.223
Edad                  0.167    0.038  4.357 0.000   0.092   0.243
Mes                   0.371    0.240  1.547 0.122  -0.099   0.841
Adherencia           -2.316    0.923 -2.509 0.012  -4.125  -0.506
Adherencia*Mes       -0.

(30801.47, 30864.21)