In [12]:
import pandas as pd
from pathlib import Path
import statsmodels.formula.api as smf

In [13]:
MASTER = Path("../data/processed/master_teams_2000_2019.csv")
TAB_OUT = Path("../data/processed")
TAB_OUT.mkdir(parents=True, exist_ok=True)
LATEX_OUT = Path("../latex/tables")
LATEX_OUT.mkdir(parents=True, exist_ok=True)

In [14]:
df = pd.read_csv(MASTER, parse_dates=["season_date"])

In [15]:
formulas = [
    ("W ~ RunDiff", "Lineal: W ~ RunDiff"),
    ("W ~ ERA", "Lineal: W ~ ERA"),
    ("W ~ HR", "Lineal: W ~ HR"),
    ("W ~ logHR1", "Lineal: W ~ log(HR+1)"),
    ("W ~ RunDiff + I(RunDiff**2)", "Cuadrático: W ~ RunDiff + RunDiff^2"),
]

In [16]:
rows = []
for fml, nice in formulas:
    model = smf.ols(fml, data=df).fit()

    F = float(model.fvalue)
    pF = float(model.f_pvalue)
    df_model = int(model.df_model)
    df_resid = int(model.df_resid)

    ci = model.conf_int()
    for term in model.params.index:
        beta = model.params[term]
        se = model.bse[term]
        tval = model.tvalues[term]
        pval = model.pvalues[term]
        lo, hi = ci.loc[term, 0], ci.loc[term, 1]
        rows.append(
            {
                "Modelo": nice,
                "Fórmula": fml,
                "gl (modelo,resid)": f"({df_model},{df_resid})",
                "F global": F,
                "p(F)": pF,
                "Término": term,
                "β": beta,
                "EE(β)": se,
                "t": tval,
                "p(t)": pval,
                "CI95% inf": lo,
                "CI95% sup": hi,
                "Signif. 5%": "Sí" if pval < 0.05 else "No",
                "R2": model.rsquared,
                "R2_adj": model.rsquared_adj,
                "N": int(model.nobs),
            }
        )

sig_df = pd.DataFrame(rows)

In [17]:
sig_csv = TAB_OUT / "significancia_ols.csv"
sig_df.to_csv(sig_csv, index=False, float_format="%.6f")
print(f"Tabla de significancia guardada en: {sig_csv}")

Tabla de significancia guardada en: ../data/processed/significancia_ols.csv


In [18]:
compact = (
    sig_df.groupby(["Modelo", "Fórmula", "gl (modelo,resid)"], as_index=False)
    .agg(
        {
            "F global": "first",
            "p(F)": "first",
            "R2": "first",
            "R2_adj": "first",
            "N": "first",
        }
    )
    .sort_values("R2", ascending=False)
)

compact_csv = TAB_OUT / "significancia_global_ols.csv"
compact.to_csv(compact_csv, index=False, float_format="%.6f")
print(f"Resumen global guardado en: {compact_csv}")

Resumen global guardado en: ../data/processed/significancia_global_ols.csv


In [19]:
try:
    with open(LATEX_OUT / "significancia_ols.tex", "w") as f:
        f.write(
            sig_df.rename(
                columns={
                    "Modelo": "Modelo",
                    "Término": "Término",
                    "β": "$\\beta$",
                    "EE(β)": "EE($\\beta$)",
                    "t": "$t$",
                    "p(t)": "$p(t)$",
                    "CI95% inf": "CI95\\% inf",
                    "CI95% sup": "CI95\\% sup",
                    "F global": "$F$",
                    "p(F)": "$p(F)$",
                    "R2": "$R^2$",
                    "R2_adj": "$R^2_{adj}$",
                }
            ).to_latex(
                index=False,
                float_format="%.4f",
                caption="Pruebas de significancia: F global y t por coeficiente",
                label="tab:significancia_ols",
            )
        )
    with open(LATEX_OUT / "significancia_global_ols.tex", "w") as f:
        f.write(
            compact.rename(
                columns={
                    "Fórmula": "Fórmula",
                    "F global": "$F$",
                    "p(F)": "$p(F)$",
                    "R2": "$R^2$",
                    "R2_adj": "$R^2_{adj}$",
                    "gl (modelo,resid)": "gl (modelo,resid)",
                }
            ).to_latex(
                index=False,
                float_format="%.4f",
                caption="Significancia global por modelo (prueba F)",
                label="tab:significancia_global_ols",
            )
        )
    print(
        "Archivos LaTeX exportados (significancia_ols.tex y significancia_global_ols.tex)."
    )
except Exception as e:
    print("No se exportó LaTeX (opcional). Razón:", e)

Archivos LaTeX exportados (significancia_ols.tex y significancia_global_ols.tex).
