In [3]:
cd "c:\Users\ecometto001\Documents\Personal\Tesis"

c:\Users\ecometto001\Documents\Personal\Tesis


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.regression.mixed_linear_model import MixedLM

# plt.style.use('dark_background')
random_state=42

In [5]:
tesis = pd.read_csv("Datos/tesis_final.csv")
tesis.head()

Unnamed: 0,idPaciente,tpo_programa,TAS,Adherencia,Peso,Altura,IMC,DBT,Sexo,Edad,Fuma,ant_HTA,tas_basal,ICC,Adherencia_Acumulada,Adherencia_Total,TAS_Media_Acumulada
0,4234,0,145,0,87.0,165.0,32.0,1.0,1,67.0,1.0,1,151,1.023256,0.0,0.857143,145.0
1,4234,1,129,1,87.0,165.0,32.0,1.0,1,67.0,1.0,1,151,1.023256,0.5,0.857143,137.0
2,4234,2,140,1,87.0,165.0,32.0,1.0,1,67.0,1.0,1,151,1.023256,0.666667,0.857143,138.0
3,4234,3,118,1,87.0,165.0,32.0,1.0,1,67.0,1.0,1,151,1.023256,0.75,0.857143,133.0
4,4234,4,135,1,87.0,165.0,32.0,1.0,1,67.0,1.0,1,151,1.023256,0.8,0.857143,133.4


In [6]:
tesis = tesis[tesis["tpo_programa"] <= 10]

In [7]:
tesis.isnull().sum()

idPaciente                0
tpo_programa              0
TAS                       0
Adherencia                0
Peso                     42
Altura                   42
IMC                      42
DBT                     119
Sexo                      0
Edad                    105
Fuma                    119
ant_HTA                   0
tas_basal                 0
ICC                     203
Adherencia_Acumulada      0
Adherencia_Total          0
TAS_Media_Acumulada       0
dtype: int64

### Imputar valores faltantes

In [8]:
categorical = ["Adherencia", "DBT", "Sexo", "Fuma", "ant_HTA"]
numerical = ["tpo_programa", "TAS", "Peso", "Altura", "IMC", "Edad", "ICC", "tas_basal", "Adherencia_Acumulada", "Adherencia_Total"]

In [9]:
from statistics import mean, median, mode

for cat in categorical:
    tesis[cat].fillna(mode(tesis[tesis["tpo_programa"] == 0][cat]), inplace=True)

for num in numerical:
    tesis[num].fillna(np.mean(tesis[tesis["tpo_programa"] == 0][num]), inplace=True)

In [10]:
tesis["Adherencia_lag1"] = tesis.groupby("idPaciente")["Adherencia"].shift(1).fillna(0)
tesis["Adherencia_lag2"] = tesis.groupby("idPaciente")["Adherencia"].shift(2).fillna(0)
tesis["TAS_lag1"] = tesis.groupby("idPaciente")["TAS"].shift(1).fillna(0)
tesis["TAS_lag2"] = tesis.groupby("idPaciente")["TAS"].shift(2).fillna(0)

In [11]:
from sklearn.preprocessing import PolynomialFeatures

columns = tesis.columns

poly = PolynomialFeatures(interaction_only=True, degree=3)
tesis = poly.fit_transform(tesis)
new_columns = poly.get_feature_names(columns)
new_columns = [column.replace(' ', '*') for column in new_columns]

tesis = pd.DataFrame(tesis, columns=new_columns)

tesis['Intercept'] = 1



### Probar efectos aleatorios

In [12]:
fixed_effects = ['Intercept', 'Sexo', 'Edad', 'DBT', 'Fuma', 'IMC', 'ICC', 'tas_basal', 'tpo_programa',
                 'tpo_programa*Sexo', 'tpo_programa*Edad', 'tpo_programa*DBT', 'tpo_programa*Fuma', 'tpo_programa*IMC', 'tpo_programa*ICC', 'tpo_programa*tas_basal', 'tpo_programa*Adherencia',
                 'tpo_programa*Adherencia_lag1', 'tpo_programa*Adherencia_lag2', 'tpo_programa*TAS_lag1', 'tpo_programa*TAS_lag2']

mixed_both = MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=True)
mixed_intercept = MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['Intercept']], groups=tesis['idPaciente']).fit(reml=True)
mixed_time = MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['tpo_programa']], groups=tesis['idPaciente']).fit(reml=True)

print(mixed_intercept.summary())
print((-2*mixed_intercept.llf)-(-2*mixed_both.llf))
print((-2*mixed_time.llf)-(-2*mixed_both.llf))
# Ambos efectos aleatorios son significativos



                  Mixed Linear Model Regression Results
Model:                   MixedLM      Dependent Variable:      TAS        
No. Observations:        3430         Method:                  REML       
No. Groups:              490          Scale:                   129.6072   
Min. group size:         7            Log-Likelihood:          -13490.8403
Max. group size:         7            Converged:               Yes        
Mean group size:         7.0                                              
--------------------------------------------------------------------------
                              Coef.  Std.Err.   z    P>|z|  [0.025  0.975]
--------------------------------------------------------------------------
Intercept                     94.782    7.246 13.080 0.000  80.579 108.985
Sexo                           2.990    1.082  2.763 0.006   0.869   5.111
Edad                           0.075    0.048  1.552 0.121  -0.020   0.169
DBT                            2.141    1.35

### Selección de modelos

In [13]:
def contrast(fixed_effects, contrast, inplace=True):
    from scipy.stats.distributions import chi2

    # Agregar variables si no están en el modelo
    for x in contrast:
        if x not in fixed_effects:
            fixed_effects.append(x)

    fixed_effects_aux = fixed_effects
    fixed_effects_reduced = [x for x in fixed_effects if x not in contrast]
    mixed_complete = MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
    mixed_reduced = MixedLM(endog=tesis['TAS'], exog=tesis[fixed_effects_reduced], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
    print(mixed_complete.summary())
    chi_value = (-2*mixed_reduced.llf)-(-2*mixed_complete.llf)
    p_value = chi2.sf(chi_value, len(contrast))
    print('\n', contrast, 'p-value =', p_value)
    if inplace == False:
        return fixed_effects_aux
    elif p_value > 0.05 and inplace == True:
        print('\nSe remueven los efectos de', contrast)
        return fixed_effects_reduced
    else:
        print('\nEl modelo continua igual')
        return fixed_effects

In [14]:
def model(effects, returned=False):
    mixed = MixedLM(endog=tesis['TAS'], exog=tesis[effects], exog_re=tesis[['Intercept', 'tpo_programa']], groups=tesis['idPaciente']).fit(reml=False)
    print(mixed.summary())
    if returned:
        return mixed

In [15]:
fixed_effects = ["Intercept", "Sexo", "Edad", "DBT", "Fuma", "IMC", "ICC", "tas_basal", "tpo_programa",
                 "tpo_programa*Sexo", "tpo_programa*Edad", "tpo_programa*DBT", "tpo_programa*Fuma", "tpo_programa*IMC", "tpo_programa*ICC", "tpo_programa*tas_basal", "tpo_programa*Adherencia", "tpo_programa*Adherencia_Total"]

In [16]:
model(["Intercept", "tpo_programa", "tpo_programa*Adherencia"])

                   Mixed Linear Model Regression Results
Model:                  MixedLM       Dependent Variable:       TAS        
No. Observations:       3430          Method:                   ML         
No. Groups:             490           Scale:                    119.3885   
Min. group size:        7             Log-Likelihood:           -13520.5598
Max. group size:        7             Converged:                Yes        
Mean group size:        7.0                                                
---------------------------------------------------------------------------
                              Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
---------------------------------------------------------------------------
Intercept                    131.404    0.545 241.008 0.000 130.335 132.473
tpo_programa                   0.690    0.175   3.957 0.000   0.348   1.032
tpo_programa*Adherencia       -0.903    0.162  -5.578 0.000  -1.220  -0.586
Intercept Var                 9