In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")
plt.style.use("dark_background")
random_state=42

In [4]:
tesis = pd.read_csv("Datos/tesis_final.csv")
tesis = tesis[tesis["tpo_programa"] <= 12]
tesis["Intercept"] = 1

In [5]:
tesis_lag = tesis[["idPaciente", "Intercept", "TAS", "tpo_programa", "Adherencia", "Adherencia_Acumulada", "TAS_Media_Acumulada"]]
lagged_vars = ["Adherencia", "TAS", "Adherencia_Acumulada", "TAS_Media_Acumulada"]
for var in lagged_vars:
    for i in range(1, 4):
        tesis_lag[f"{var}_lag{i}"] = tesis_lag.groupby("idPaciente")[var].shift(i)

In [6]:
from statsmodels.discrete.discrete_model import Logit
fixed_effects = ["Intercept", "Adherencia_lag1", "Adherencia_Acumulada_lag2",  "TAS_lag1", "TAS_Media_Acumulada_lag2"]
lr = Logit(tesis_lag.dropna()["Adherencia"], tesis_lag.dropna()[fixed_effects], groups=tesis_lag["idPaciente"]).fit()
print(lr.summary())
# Las covariables para TAS no son significativas

Optimization terminated successfully.
         Current function value: 0.350278
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             Adherencia   No. Observations:                 2240
Model:                          Logit   Df Residuals:                     2235
Method:                           MLE   Df Model:                            4
Date:                Tue, 27 Jul 2021   Pseudo R-squ.:                  0.2120
Time:                        22:41:55   Log-Likelihood:                -784.62
converged:                       True   LL-Null:                       -995.73
Covariance Type:            nonrobust   LLR p-value:                 4.396e-90
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept                    -0.1008      0.853     -0.118      0.906      -1.

In [7]:
max_range = tesis["tpo_programa"].max()+1

# Modelos para cada tiempo con 1 variables rezagada

In [13]:
# Exogeneidad por tiempo con un solo lag
from statsmodels.discrete.discrete_model import Logit
tiempos = []
for i in range(1,max_range):
    tesis_aux = tesis_lag[tesis_lag["tpo_programa"] == i]
    print("\n", "*"*50, "Target time:", i, "*"*50, "\n")
    fixed_effects = ["Intercept", "Adherencia_lag1", "TAS_lag1"]
    lr = Logit(tesis_aux["Adherencia"], tesis_aux[fixed_effects], groups=tesis_aux["idPaciente"]).fit()
    print(lr.summary())
    if lr.pvalues["TAS_lag1"] < 0.05:
        tiempos.append(str(i))


 ************************************************** Target time: 1 ************************************************** 

Optimization terminated successfully.
         Current function value: 0.438515
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             Adherencia   No. Observations:                  560
Model:                          Logit   Df Residuals:                      557
Method:                           MLE   Df Model:                            2
Date:                Tue, 27 Jul 2021   Pseudo R-squ.:                  0.1237
Time:                        22:57:12   Log-Likelihood:                -245.57
converged:                       True   LL-Null:                       -280.23
Covariance Type:            nonrobust   LLR p-value:                 8.885e-16
                      coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------

# Modelos para cada tiempo con 1 variables rezagada y promedio acumulado

In [9]:
# Exogeneidad por tiempo con un solo lag
from statsmodels.discrete.discrete_model import Logit
tiempos = []
for i in range(2,max_range):
    tesis_aux = tesis_lag[tesis_lag["tpo_programa"] == i]
    print("\n", "*"*50, "Target time:", i, "*"*50, "\n")
    fixed_effects = ["Intercept", "Adherencia_lag1", "Adherencia_Acumulada_lag2", "TAS_lag1", "TAS_Media_Acumulada_lag2"]
    lr = Logit(tesis_aux["Adherencia"], tesis_aux[fixed_effects], groups=tesis_aux["idPaciente"]).fit()
    print(lr.summary())
    if (lr.pvalues["TAS_lag1"] < 0.05) or (lr.pvalues["TAS_Media_Acumulada_lag2"] < 0.05):
        tiempos.append(str(i))


 ************************************************** Target time: 2 ************************************************** 

Optimization terminated successfully.
         Current function value: 0.370078
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             Adherencia   No. Observations:                  560
Model:                          Logit   Df Residuals:                      555
Method:                           MLE   Df Model:                            4
Date:                Tue, 27 Jul 2021   Pseudo R-squ.:                  0.2113
Time:                        22:41:56   Log-Likelihood:                -207.24
converged:                       True   LL-Null:                       -262.76
Covariance Type:            nonrobust   LLR p-value:                 4.367e-23
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------

In [10]:
print(f"De los modelos para cada tiempo con un solo lag en las covariables, la variables TAS es significativa en el/los tiempos {', '.join(tiempos)}")

De los modelos para cada tiempo con un solo lag en las covariables, la variables TAS es significativa en el/los tiempos 


# Modelos para cada tiempo con 2 variables rezagadas y promedio acumulado

In [11]:
# Exogeneidad por tiempo con dos lag
from statsmodels.discrete.discrete_model import Logit
tiempos = []
for i in range(3,max_range):
    tesis_aux = tesis_lag[tesis_lag["tpo_programa"] == i]
    print("\n", "*"*50, "Target time:", i, "*"*50, "\n")
    fixed_effects = ["Intercept", "Adherencia_lag1", "Adherencia_lag2", "Adherencia_Acumulada_lag3", "TAS_lag1", "TAS_lag2", "TAS_Media_Acumulada_lag3"]
    lr = Logit(tesis_aux["Adherencia"], tesis_aux[fixed_effects], groups=tesis_aux["idPaciente"]).fit()
    print(lr.summary())
    if (lr.pvalues["TAS_lag1"] < 0.05) or (lr.pvalues["TAS_lag2"] < 0.05) or (lr.pvalues["TAS_Media_Acumulada_lag3"] < 0.05):
        tiempos.append(str(i))


 ************************************************** Target time: 3 ************************************************** 

Optimization terminated successfully.
         Current function value: 0.376001
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             Adherencia   No. Observations:                  560
Model:                          Logit   Df Residuals:                      553
Method:                           MLE   Df Model:                            6
Date:                Tue, 27 Jul 2021   Pseudo R-squ.:                  0.1892
Time:                        22:41:56   Log-Likelihood:                -210.56
converged:                       True   LL-Null:                       -259.69
Covariance Type:            nonrobust   LLR p-value:                 5.808e-19
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------

In [12]:
print(f"De los modelos para cada tiempo con un solo lag en las covariables, la variables TAS es significativa en el/los tiempos {', '.join(tiempos)}")

De los modelos para cada tiempo con un solo lag en las covariables, la variables TAS es significativa en el/los tiempos 6
