In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

import os
from pathlib import Path

In [None]:
DATA_DIR = Path("../../data/")
DATA_DIR.mkdir(parents=True, exist_ok=True)

PLOTS_DIR = Path("../../plots/python/")
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

TAB_OUT = Path("../../data/processed")
TAB_OUT.mkdir(parents=True, exist_ok=True)

LATEX_OUT = Path("../../docs/latex_utils/tables")
LATEX_OUT.mkdir(parents=True, exist_ok=True)

In [14]:
def save_plot(
    plot: plt.Figure,
    filename: str,
    format: str = "png",
    dpi: int = 300,
    close: bool = True,
):
    PLOTS_DIR.mkdir(parents=True, exist_ok=True)
    filepath = PLOTS_DIR / f"{filename}.{format}"
    try:
        plot.savefig(filepath, format=format, dpi=dpi, bbox_inches="tight")
        if close:
            plt.close(plot)
        print(
            f"\nPlot {filename}.{format} saved correctly in {PLOTS_DIR}/{filename}.{format}"
        )
    except Exception as e:
        print(f"\nCould not save plot {filename}.{format}. Reason: {e}")

In [15]:
def save_latex_table(df, filename: str, rename_map: dict, caption: str, label: str):
    try:
        table_tex = df.rename(columns=rename_map).to_latex(
            index=False,
            float_format="%.4f",
            caption=caption,
            label=label,
        )
        with open(LATEX_OUT / filename, "w") as f:
            f.write(table_tex)
        print(f"\nFile {filename} exported correctly in {LATEX_OUT}/{filename}")
    except Exception as e:
        print(f"\nCould not export {filename}. Reason: {e}")

In [16]:
df = pd.read_excel(os.path.join(DATA_DIR, "DatosPracticaRLM.xlsx"), header=1)
df = df.rename(columns={"educación": "educacion", "Unnamed: 0": "estado"})
print(df.head())

rename_map = {
    "estado": "Estado",
    "educación": "Educación",
    "ingreso": "Ingreso",
    "menores": "Menores",
    "urbano": "Urbano",
}

save_latex_table(
    df.head(),
    filename="tabla_head_datos.tex",
    rename_map=rename_map,
    caption="Primeras filas del conjunto de datos utilizado en el análisis",
    label="tab:head_datos",
)

  estado  educacion  ingreso  menores  urbano
0     ME        189     2824    350.7     508
1     NH        169     3259    345.9     564
2     VT        230     3072    348.5     322
3     MA        168     3835    335.3     846
4     RI        180     3549    327.1     871

File tabla_head_datos.tex exported correctly in ../../docs/latex_utils/tables/tabla_head_datos.tex


In [17]:
def get_avg_per_column(df: pd.DataFrame, column_name: str) -> pd.Series:
    print("Promedio de la columna", column_name, "es:", df[column_name].mean())


get_avg_per_column(df, "ingreso")
get_avg_per_column(df, "menores")
get_avg_per_column(df, "urbano")

Promedio de la columna ingreso es: 3225.294117647059
Promedio de la columna menores es: 358.8862745098039
Promedio de la columna urbano es: 664.5098039215686


In [19]:
model = smf.ols("educacion ~ ingreso + menores + urbano", data=df).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:              educacion   R-squared:                       0.690
Model:                            OLS   Adj. R-squared:                  0.670
Method:                 Least Squares   F-statistic:                     34.81
Date:                Sun, 07 Sep 2025   Prob (F-statistic):           5.34e-12
Time:                        12:54:30   Log-Likelihood:                -237.79
No. Observations:                  51   AIC:                             483.6
Df Residuals:                      47   BIC:                             491.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   -286.8388     64.920     -4.418      0.0

In [21]:
coef_table = pd.DataFrame(
    {
        "Coeficiente": model.params,
        "Error Std.": model.bse,
        "t": model.tvalues,
        "p-valor": model.pvalues,
    }
)
coef_table["IC 2.5%"] = model.conf_int()[0]
coef_table["IC 97.5%"] = model.conf_int()[1]
coef_table.index.name = "Parámetro"
print("\nTabla de coeficientes:\n", coef_table)

rename_map = {
    "Coeficiente": "Coef.",
    "Error Std.": "Err. Std.",
    "p-valor": "p",
    "IC 2.5%": "IC 2.5\\%",
    "IC 97.5%": "IC 97.5\\%",
}

coef_table_out = coef_table.reset_index().rename(columns={"Parámetro": "Parámetro"})
save_latex_table(
    df=coef_table_out.rename(columns=rename_map),
    filename="reg_multiple_coeficientes.tex",
    rename_map={},
    caption="Coeficientes del modelo OLS: Educación vs Ingreso, Menores y Urbano",
    label="tab:ols_coef_educacion",
)


Tabla de coeficientes:
            Coeficiente  Error Std.         t       p-valor     IC 2.5%  \
Parámetro                                                                
Intercept  -286.838763   64.919932 -4.418347  5.823463e-05 -417.440819   
ingreso       0.080653    0.009299  8.673756  2.563747e-11    0.061947   
menores       0.817338    0.159790  5.115084  5.694503e-06    0.495882   
urbano       -0.105806    0.034282 -3.086334  3.392812e-03   -0.174773   

             IC 97.5%  
Parámetro              
Intercept -156.236706  
ingreso      0.099359  
menores      1.138793  
urbano      -0.036839  

File reg_multiple_coeficientes.tex exported correctly in ../../docs/latex_utils/tables/reg_multiple_coeficientes.tex


In [23]:
df["y_hat"] = model.fittedvalues
df["resid"] = model.resid
print(df[["educacion", "y_hat", "resid"]].head())

   educacion       y_hat      resid
0        189  173.816810  15.183190
1        169  199.052606 -30.052606
2        230  211.700632  18.299368
3        168  207.007743 -39.007743
4        180  174.593588   5.406412


In [24]:
metrics = {
    "R²": model.rsquared,
    "R² ajustado": model.rsquared_adj,
    "Estadístico F": model.fvalue,
    "p-valor (F)": model.f_pvalue,
    "AIC": model.aic,
    "BIC": model.bic,
    "Observaciones": model.nobs,
}

metrics_df = pd.DataFrame(list(metrics.items()), columns=["Métrica", "Valor"])
print(metrics_df)

save_latex_table(
    df=metrics_df,
    filename="tabla_metricas_regresion.tex",
    rename_map={"Métrica": "Métrica", "Valor": "Valor"},
    caption="Métricas globales del modelo de regresión lineal múltiple",
    label="tab:metricas_regresion",
)

         Métrica         Valor
0             R²  6.896288e-01
1    R² ajustado  6.698179e-01
2  Estadístico F  3.481053e+01
3    p-valor (F)  5.336770e-12
4            AIC  4.835767e+02
5            BIC  4.913040e+02
6  Observaciones  5.100000e+01

File tabla_metricas_regresion.tex exported correctly in ../../docs/latex_utils/tables/tabla_metricas_regresion.tex
