In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# ===============================
# CONFIG
# ===============================
INPUT_DIR = Path("INPUT_GLOB")    # <-- cambia SEGUN NOMBRE DE LA CARPETA DONDE ESTAN LOS ARCHIVOS RESULTADOS PUNTO 3

METRICS = {
    "Índice Global de Madurez": "portal_maturity",
    "Trazabilidad Global": "traceability_score",
    "Interoperabilidad Semántica": "interoperability_semantics",
    "Accesibilidad": "accessibility_score",
}

PORTAL_COL = "portal"

OUT_FIG_DIR = Path("Punto5_Boxplots")
OUT_FIG_DIR.mkdir(exist_ok=True)

# ===============================
# LOAD ALL DATASETS
# ===============================
dfs = []
for f in INPUT_DIR.glob("Punto3_*_datasetlevel_*.xlsx"):
    df = pd.read_excel(f)
    dfs.append(df)

df_all = pd.concat(dfs, ignore_index=True)

print("Datasets totales:", df_all.shape)
print("Portales:", df_all[PORTAL_COL].unique())

# ===============================
# BOXPLOTS + STD
# ===============================
summary_stats = []

for label, col in METRICS.items():
    if col not in df_all.columns:
        print(f"[SKIP] {label}: columna no encontrada")
        continue

    data = df_all[[PORTAL_COL, col]].dropna()

    # ---- Desviación típica por portal
    stats = (
        data
        .groupby(PORTAL_COL)[col]
        .agg(["mean", "median", "std", "count"])
        .reset_index()
    )
    stats["metric"] = label
    summary_stats.append(stats)

    # ---- Boxplot

    plt.figure(figsize=(11, 5))
    data.boxplot(column=col, by=PORTAL_COL, grid=False)

    plt.title(f"{label} — Distribución por portal")
    plt.suptitle("")
    plt.ylabel("Score (0–100)")
    plt.xticks(rotation=30)
    plt.tight_layout()

    # ---- GUARDAR IMAGEN
    out_path = OUT_FIG_DIR / f"boxplot_{col}.png"
    plt.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close()

    print(f" Boxplot guardado en: {out_path}")


# ===============================
# EXPORT DESCRIPTIVE STATS
# ===============================
summary_df = pd.concat(summary_stats, ignore_index=True)
summary_df.to_excel("Punto5_Boxplot_descriptivos.xlsx", index=False)    # <-- cambia SEGUN EL USUARIO A USAR EL SCRIPT

print(" Tabla de medias y desviación típica exportada")