In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

from matplotlib.colors import ListedColormap
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

# plt.style.use('dark_background')
random_state=42
YLIM = (125, 140)

PARAMS_MAPPING = {
    "(Intercept)": "$intercepto$",
    "Sexo": "$sexo_i$",
    "Edad": "$edad_i$",
    "Mes": "$mes_j$",
    "Adherencia": "$adherencia_{ij}$",
    "Adherencia_Perfecta": "$adherencia\ perfecta_i$",
    "Adherencia_Total": "$\overline{adherencia}_i$",
    "Adherencia_lag1": "$adherencia_{ij-1}$",
    "Adherencia_Acumulada": "$\overline{adherencia}_{ij}$",
}

nlme = importr('nlme')
stats = importr("stats")
base = importr("base")

warnings.filterwarnings("ignore")

SAVE_MODELS = True

In [2]:
def add_minor_if_zero(num):
    return num if num != "0.0" else "<0.001"

In [3]:
def model_to_latex(model) -> str:
    model_summary = base.summary(model)
    cov_names = model_summary.rx2("coefficients").rx2("fixed").names
    n_covs = len(cov_names)
    fe_params = dict(zip(cov_names, model_summary.rx2("tTable")[:n_covs]))
    se_params = dict(zip(cov_names, model_summary.rx2("tTable")[n_covs:n_covs*2]))
    tvalues = dict(zip(cov_names, model_summary.rx2("tTable")[n_covs*3:n_covs*4]))
    pvalues = dict(zip(cov_names, model_summary.rx2("tTable")[n_covs*4:n_covs*5]))
    pvalues = {key: f"${add_minor_if_zero(str(round(value, 3)))}$" for key, value in pvalues.items()}
    table = ""
    for name in cov_names:
        table += f"\n\t    {PARAMS_MAPPING[name]} & {round(fe_params[name], 3)} & {round(se_params[name], 3)} & {round(tvalues[name], 3)} & ".replace(".", ",") + f"{pvalues[name]} \\\\"
    return fr"""
    \begin{{tabular}}{{*{{5}}{{|c}}|}}
        \hline
        \multicolumn{{3}}{{|c}}{{Log-Likelihood}} & \multicolumn{{2}}{{|c|}}{{{round(model_summary.rx2("logLik")[0], 2)}}} \\
        \multicolumn{{3}}{{|c}}{{AIC}} & \multicolumn{{2}}{{|c|}}{{{round(model_summary.rx2("AIC")[0], 2)}}} \\
        \multicolumn{{3}}{{|c}}{{BIC}} & \multicolumn{{2}}{{|c|}}{{{round(model_summary.rx2("BIC")[0], 2)}}} \\
        \hline
        Covariable				   & Coef.                         & Std. Err.                  & z                           & $P<|z|$  \\
        \hline{table}
        \hline
    \end{{tabular}}
"""

In [4]:
def create_sample_df(df):
    adherencias = [
        "Adherencia",
        "Adherencia_Perfecta",
        "Adherencia_Total",
        "Adherencia_lag1",
        "Adherencia_Acumulada",
    ]
    df["Intercept"] = 1
    df["Sexo"] = 0
    df["Edad"] = EDAD
    for adh in adherencias:
        if adh in df.columns:
            df[f"Mes*{adh}"] = df["Mes"] * df[adh]
    return df

In [5]:
def get_predictions(model, df: pd.DataFrame):
    names = model.rx2("coefficients").rx2("fixed").names
    names = [name.replace("(", "").replace(")", "").replace(":", "*") for name in names]
    covs = model.rx2("coefficients").rx2("fixed")
    coefs = dict(zip(names, covs))
    preds = df["Intercept"] * coefs["Intercept"]
    for name, coef in coefs.items():
        if name == "Intercept":
            continue
        if name not in df.columns:
            raw_names = name.split("*")
            raw_names.reverse()
            name = "*".join(raw_names)
        preds += df[name] * coef
    return preds

In [6]:
tesis = pd.read_csv("../Datos/tesis_final_preprocesado.csv")
tesis.head()

Unnamed: 0,idPaciente,Mes,TAS,Adherencia,Sexo,Edad,tas_basal,Adherencia_Acumulada,Adherencia_Total,Adherencia_Perfecta,...,Intercept,Adherencia_lag1,TAS_lag1,Adherencia_Acumulada_lag2,TAS_Media_Acumulada_lag2,Mes*Adherencia,Mes*Adherencia_Perfecta,Mes*Adherencia_Total,Mes*Adherencia_lag1,Mes*Adherencia_Acumulada
0,4026,1,119.0,1,0,76.0,116,1.0,1.0,1,...,1,0.0,116.0,0.0,116.0,1,1,1.0,0.0,1.0
1,4026,2,127.0,1,0,76.0,116,1.0,1.0,1,...,1,1.0,119.0,0.0,116.0,2,2,2.0,2.0,2.0
2,4026,3,140.0,1,0,76.0,116,1.0,1.0,1,...,1,1.0,127.0,1.0,119.0,3,3,3.0,3.0,3.0
3,4026,4,146.71271,1,0,76.0,116,1.0,1.0,1,...,1,1.0,140.0,1.0,123.0,4,4,4.0,4.0,4.0
4,4026,5,177.708084,1,0,76.0,116,1.0,1.0,1,...,1,1.0,146.71271,1.0,128.666667,5,5,5.0,5.0,5.0


In [7]:
EDAD = tesis["Edad"].mean()

In [8]:
with (ro.default_converter + pandas2ri.converter).context():
    tesis = ro.conversion.get_conversion().py2rpy(tesis)

In [9]:
random_formula = ro.Formula("~ 1 | idPaciente")

# Modelo 1

In [10]:
formula = ro.Formula("TAS ~ Sexo + Edad + Adherencia_Perfecta + Mes")
modelo_1 = nlme.lme(formula, random=random_formula, data=tesis, method="ML", corr=nlme.corAR1())

In [11]:
if SAVE_MODELS:
    with open("../Tesina/Mio/tables/modelo_1.tex", "w") as f:
        f.write(model_to_latex(modelo_1))

# Modelo 2

In [12]:
formula = ro.Formula("TAS ~ Sexo + Edad + Adherencia_Total + Mes")
modelo_2 = nlme.lme(formula, random=random_formula, data=tesis, method="ML", corr=nlme.corAR1())

In [13]:
if SAVE_MODELS:
    with open("../Tesina/Mio/tables/modelo_2.tex", "w") as f:
        f.write(model_to_latex(modelo_2))

# Modelo 3

In [14]:
formula = ro.Formula("TAS ~ Sexo + Edad + Adherencia + Mes")
modelo_3 = nlme.lme(formula, random=random_formula, data=tesis, method="ML", corr=nlme.corAR1())

In [15]:
if SAVE_MODELS:
    with open("../Tesina/Mio/tables/modelo_3.tex", "w") as f:
        f.write(model_to_latex(modelo_3))

# Modelo 4

In [16]:
formula = ro.Formula("TAS ~ Sexo + Edad + Adherencia_lag1 + Mes")
modelo_4 = nlme.lme(formula, random=random_formula, data=tesis, method="ML", corr=nlme.corAR1())

In [17]:
if SAVE_MODELS:
    with open("../Tesina/Mio/tables/modelo_4.tex", "w") as f:
        f.write(model_to_latex(modelo_4))

# Modelo 5

In [18]:
formula = ro.Formula("TAS ~ Sexo + Edad + Adherencia_Acumulada + Mes")
modelo_5 = nlme.lme(formula, random=random_formula, data=tesis, method="ML", corr=nlme.corAR1())

In [19]:
if SAVE_MODELS:
    with open("../Tesina/Mio/tables/modelo_5.tex", "w") as f:
        f.write(model_to_latex(modelo_5))

# Modelo 6

In [20]:
formula = ro.Formula("TAS ~ Sexo + Edad + Adherencia + Adherencia_Total + Mes")
modelo_6 = nlme.lme(formula, random=random_formula, data=tesis, method="ML", corr=nlme.corAR1())

In [21]:
if SAVE_MODELS:
    with open("../Tesina/Mio/tables/modelo_6.tex", "w") as f:
        f.write(model_to_latex(modelo_6))

# Comparacion

In [22]:
modelos = {
    "Modelo 1": modelo_1,
    "Modelo 2": modelo_2,
    "Modelo 3": modelo_3,
    "Modelo 4": modelo_4,
    "Modelo 5": modelo_5,
    "Modelo 6": modelo_6,
}

FORMAS = {
    "Modelo 1": "Adherencia perfecta (CNVT)",
    "Modelo 2": "Proporción de adherencia (CNVT)",
    "Modelo 3": "Adherencia al tratamiento sin transformar",
    "Modelo 4": "Adherencia al tratamiento en la visita anterior",
    "Modelo 5": "Proporción de adherencia al tratamiento hasta la visita actual",
    "Modelo 6": "",
}

f"""
\begin{{table}}[H]
	\centering
	\caption{{Resumen de los modelos ajustados}}
	\label{{comparacion}}
	\begin{{tabular}}{{*{{5}}{{|c}}|}}
		\hline
		Modelo & Forma de incorporar la adherencia al tratamiento & Parámetro estimado asociado a la adherencia(prob asoc) & AIC & BIC \\
		\hline
		Modelo 1		 & 30773.19 & 30829.66 \\
		Modelo 2 		 & 30820.06 & 30876.52 \\
		Modelo 3 		 & 30825.27 & 30881.73 \\
		Modelo 4 		 & 30838.47 & 30894.94 \\
		Modelo 5 		 & 30810.56 & 30867.02 \\
		Modelo 6 		 & 30775.39 & 30844.4  \\
		\hline
	\end{{tabular}}
\end{{table}}
"""

'\n\x08egin{table}[H]\n\t\\centering\n\t\\caption{Resumen de los modelos ajustados}\n\t\\label{comparacion}\n\t\x08egin{tabular}{*{5}{|c}|}\n\t\t\\hline\n\t\tModelo & Forma de incorporar la adherencia al tratamiento & Parámetro estimado asociado a la adherencia(prob asoc) & AIC & BIC \\\n\t\t\\hline\n\t\tModelo 1\t\t & 30773.19 & 30829.66 \\\n\t\tModelo 2 \t\t & 30820.06 & 30876.52 \\\n\t\tModelo 3 \t\t & 30825.27 & 30881.73 \\\n\t\tModelo 4 \t\t & 30838.47 & 30894.94 \\\n\t\tModelo 5 \t\t & 30810.56 & 30867.02 \\\n\t\tModelo 6 \t\t & 30775.39 & 30844.4  \\\n\t\t\\hline\n\t\\end{tabular}\n\\end{table}\n'

In [23]:
modelo_1

idPaciente,Mes,TAS,...,Mes*Adherencia_Total,Mes*Adherencia_lag1,Mes*Adherencia_Acumulada
modelStruct,ListVector with 2 elements.  reStruct  [RTYPES.VECSXP]  corStruct  [RTYPES.REALSXP],,,,,
reStruct,[RTYPES.VECSXP],,,,,
corStruct,[RTYPES.REALSXP],,,,,
dims,ListVector with 5 elements.  N  [RTYPES.INTSXP]  Q  [RTYPES.INTSXP]  qvec  [RTYPES.REALSXP]  ngrps  [RTYPES.INTSXP]  ncol  [RTYPES.REALSXP],,,,,
N,[RTYPES.INTSXP],,,,,
Q,[RTYPES.INTSXP],,,,,
qvec,[RTYPES.REALSXP],,,,,
ngrps,[RTYPES.INTSXP],,,,,
ncol,[RTYPES.REALSXP],,,,,
contrasts,ListVector with 0 elements.,,,,,

0,1
reStruct,[RTYPES.VECSXP]
corStruct,[RTYPES.REALSXP]

0,1
N,[RTYPES.INTSXP]
Q,[RTYPES.INTSXP]
qvec,[RTYPES.REALSXP]
ngrps,[RTYPES.INTSXP]
ncol,[RTYPES.REALSXP]

0,1
X,[RTYPES.REALSXP]
terms,[RTYPES.REALSXP]

idPaciente,Mes,TAS,...,Mes*Adherencia_Total,Mes*Adherencia_lag1,Mes*Adherencia_Acumulada
4026,1,119.000000,...,1.000000,0.000000,1.000000
4026,2,127.000000,,2.000000,2.000000,2.000000
4026,3,140.000000,,3.000000,3.000000,3.000000
4026,4,146.712710,,4.000000,4.000000,4.000000
...,...,...,,...,...,...
18423,4,119.000000,,4.000000,4.000000,4.000000
18423,5,147.473037,,5.000000,5.000000,5.000000
18423,6,152.000000,,6.000000,6.000000,6.000000
18423,7,143.000000,,7.000000,7.000000,7.000000
