In [31]:
cd "c:\Users\ecometto001\Documents\Personal\Tesis"

c:\Users\ecometto001\Documents\Personal\Tesis


In [32]:
import pandas as pd
import numpy as np

import statsmodels.api as sm

# plt.style.use('dark_background')
random_state=42

In [33]:
tesis = pd.read_csv("Datos/tesis_final.csv")
tesis.head()

Unnamed: 0,idPaciente,tpo_programa,TAS,Adherencia,Peso,Altura,IMC,DBT,Sexo,Edad,Fuma,ant_HTA,tas_basal,ICC,Adherencia_Acumulada,Adherencia_Total,TAS_Media_Acumulada
0,4026,0,119.0,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,119.0
1,4026,1,127.0,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,123.0
2,4026,2,133.811536,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,126.603845
3,4026,3,144.377175,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,131.047178
4,4026,4,188.0,1,82.0,152.0,35.0,0.0,0,76.0,0.0,1,116,0.852459,1.0,1.0,142.437742


In [34]:
tesis.isnull().sum()

idPaciente                0
tpo_programa              0
TAS                       0
Adherencia                0
Peso                     63
Altura                   63
IMC                      63
DBT                     126
Sexo                      0
Edad                    119
Fuma                    126
ant_HTA                   0
tas_basal                 0
ICC                     266
Adherencia_Acumulada      0
Adherencia_Total          0
TAS_Media_Acumulada       0
dtype: int64

### Imputar valores faltantes

In [35]:
categorical = ["Adherencia", "DBT", "Sexo", "Fuma"]
numerical = ["tpo_programa", "TAS", "Peso", "Altura", "IMC", "Edad", "ICC", "tas_basal", "Adherencia_Acumulada", "Adherencia_Total"]

In [36]:
from statistics import mean, median, mode

for cat in categorical:
    tesis[cat].fillna(mode(tesis[tesis["tpo_programa"] == 0][cat]), inplace=True)

for num in numerical:
    tesis[num].fillna(np.mean(tesis[tesis["tpo_programa"] == 0][num]), inplace=True)

In [37]:
tesis["Adherencia_lag1"] = tesis.groupby("idPaciente")["Adherencia"].shift(1).fillna(0)
tesis["Adherencia_lag2"] = tesis.groupby("idPaciente")["Adherencia"].shift(2).fillna(0)
tesis["TAS_lag1"] = tesis.groupby("idPaciente")["TAS"].shift(1).fillna(tesis["tas_basal"])
tesis["TAS_lag2"] = tesis.groupby("idPaciente")["TAS"].shift(2).fillna(tesis["tas_basal"])

In [38]:
from sklearn.preprocessing import PolynomialFeatures

columns = tesis.columns

poly = PolynomialFeatures(interaction_only=True, degree=3)
tesis = poly.fit_transform(tesis)
new_columns = poly.get_feature_names(columns)
new_columns = [column.replace(' ', '*') for column in new_columns]

tesis = pd.DataFrame(tesis, columns=new_columns)

tesis['Intercept'] = 1



### Probar efectos aleatorios

In [39]:
fixed_effects = [
    'Intercept', 'Sexo', 'Edad', 'DBT', 'IMC', 'tpo_programa', "Adherencia",
    'tpo_programa*Sexo', 'tpo_programa*Edad', 'tpo_programa*DBT', 'tpo_programa*IMC', 'tpo_programa*Adherencia'
]

mixed_both = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=True)
mixed_intercept = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['Intercept']], groups=tesis['idPaciente']).fit(reml=True)
mixed_time = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['tpo_programa']], groups=tesis['idPaciente']).fit(reml=True)

print(mixed_intercept.summary())
print((-2*mixed_intercept.llf)-(-2*mixed_both.llf))
print((-2*mixed_time.llf)-(-2*mixed_both.llf))
# Ambos efectos aleatorios son significativos

                Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     TAS        
No. Observations:     3920        Method:                 REML       
No. Groups:           560         Scale:                  123.4577   
Min. group size:      7           Log-Likelihood:         -15426.6674
Max. group size:      7           Converged:              Yes        
Mean group size:      7.0                                            
---------------------------------------------------------------------
                         Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
---------------------------------------------------------------------
Intercept               116.953    4.389 26.644 0.000 108.350 125.556
Sexo                      3.884    0.924  4.203 0.000   2.073   5.695
Edad                      0.158    0.048  3.266 0.001   0.063   0.253
DBT                       2.564    1.369  1.873 0.061  -0.119   5.246
IMC                       0.175    0

### Selección de modelos

In [40]:
def contrast(fixed_effects, contrast, inplace=True):
    from scipy.stats.distributions import chi2

    # Agregar variables si no están en el modelo
    for x in contrast:
        if x not in fixed_effects:
            fixed_effects.append(x)

    fixed_effects_aux = fixed_effects
    fixed_effects_reduced = [x for x in fixed_effects if x not in contrast]
    mixed_complete = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
    mixed_reduced = sm.MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects_reduced], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
    print(mixed_complete.summary())
    chi_value = (-2*mixed_reduced.llf)-(-2*mixed_complete.llf)
    p_value = chi2.sf(chi_value, len(contrast))
    print('\n', contrast, 'p-value =', p_value)
    if inplace == False:
        return fixed_effects_aux
    elif p_value > 0.05 and inplace == True:
        print('\nSe remueven los efectos de', contrast)
        return fixed_effects_reduced
    else:
        print('\nEl modelo continua igual')
        return fixed_effects

In [41]:
def model(effects, returned=False):
    mixed = sm.MixedLM(endog=tesis['TAS'], exog=tesis[effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
    print(mixed.summary())
    if returned:
        return mixed

def model_gee(effects, returned=False):
    mixed = sm.GEE(endog=tesis['TAS'], exog=tesis[effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente'])
    print(mixed.summary())
    if returned:
        return mixed

In [42]:
fixed_effects = ["Intercept", "Sexo", "Edad", "DBT", "Fuma", "IMC", "ICC", "tas_basal", "tpo_programa",
                 "tpo_programa*Sexo", "tpo_programa*Edad", "tpo_programa*DBT", "tpo_programa*Fuma", "tpo_programa*IMC", "tpo_programa*ICC", "tpo_programa*tas_basal", "tpo_programa*Adherencia", "tpo_programa*Adherencia_Total"]

In [43]:
mixed = sm.MixedLM(endog=tesis['TAS'], exog=tesis[["Intercept", "tpo_programa", "Adherencia", "tpo_programa*Adherencia"]], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
print(mixed.summary())

                   Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       TAS        
No. Observations:       3920          Method:                   ML         
No. Groups:             560           Scale:                    112.2067   
Min. group size:        7             Log-Likelihood:           -15410.9553
Max. group size:        7             Converged:                Yes        
Mean group size:        7.0                                                
---------------------------------------------------------------------------
                              Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
---------------------------------------------------------------------------
Intercept                    134.307    0.903 148.662 0.000 132.537 136.078
tpo_programa                   0.381    0.242   1.575 0.115  -0.093   0.855
Adherencia                    -2.275    0.925  -2.458 0.014  -4.088  -0.461
tpo_programa*Adherencia       -

In [44]:
mixed = sm.MixedLM.from_formula(data=tesis, formula="TAS ~ tpo_programa + tpo_programa*Adherencia", groups="idPaciente", re_formula="~tpo_programa").fit(reml=False)
print(mixed.summary())

                   Mixed Linear Model Regression Results
Model:                   MixedLM       Dependent Variable:       TAS        
No. Observations:        3920          Method:                   ML         
No. Groups:              560           Scale:                    112.2067   
Min. group size:         7             Log-Likelihood:           -15410.9553
Max. group size:         7             Converged:                Yes        
Mean group size:         7.0                                                
----------------------------------------------------------------------------
                               Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
----------------------------------------------------------------------------
Intercept                     134.307    0.903 148.662 0.000 132.537 136.078
tpo_programa                    0.381    0.242   1.575 0.115  -0.093   0.855
Adherencia                     -2.275    0.925  -2.458 0.014  -4.088  -0.461
tpo_programa:Adhere

In [45]:
mixed.cov_re

Unnamed: 0,idPaciente,tpo_programa
idPaciente,100.070186,-8.805123
tpo_programa,-8.805123,2.401242
