# Ensemble Final repouso + exercicio




In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import mean_absolute_error
from scipy.stats import pearsonr

ROOT = Path("/Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac")

# ------------------------------------------------------------
# üîç Auto-discovery de predictions
# ------------------------------------------------------------
def find_preds(base_dir):
    files = list(base_dir.rglob("*pred*.csv"))
    if not files:
        raise FileNotFoundError(f"Nenhum arquivo de predictions em {base_dir}")
    return files[0]

EXERCISE_PREDS = find_preds(ROOT / "exercicio")
REPOUSO_PREDS  = find_preds(ROOT / "repouso")

print("Exercise preds:", EXERCISE_PREDS)
print("Repouso preds :", REPOUSO_PREDS)

# ------------------------------------------------------------
# üì• LOAD
# ------------------------------------------------------------
df_ex = pd.read_csv(EXERCISE_PREDS)
df_rep = pd.read_csv(REPOUSO_PREDS)

print("\nColumns EX:", df_ex.columns.tolist())
print("Columns RP:", df_rep.columns.tolist())

# ------------------------------------------------------------
# üß† NORMALIZA√á√ÉO DE SCHEMA
# ------------------------------------------------------------
def normalize_preds(df):
    df = df.copy()

    # ---- target
    target_map = ["hr_true", "y_true", "target"]
    pred_map   = ["hr_pred", "y_pred", "prediction", "pred"]

    target_col = next(c for c in target_map if c in df.columns)
    pred_col   = next(c for c in pred_map if c in df.columns)

    df = df.rename(columns={
        target_col: "hr_true",
        pred_col: "hr_pred"
    })

    # ---- garantir Id/window
    if "Id" not in df.columns:
        df["Id"] = np.arange(len(df))
    if "window" not in df.columns:
        df["window"] = df.index

    return df[["Id", "window", "hr_true", "hr_pred"]]

df_ex_n = normalize_preds(df_ex)
df_rep_n = normalize_preds(df_rep)

print("\n‚úÖ Schema normalizado")
print(df_ex_n.head())
print(df_rep_n.head())

# ------------------------------------------------------------
# üîó ALIGNMENTO
# ------------------------------------------------------------
df_ens = df_ex_n.merge(
    df_rep_n,
    on=["Id", "window", "hr_true"],
    how="inner",
    suffixes=("_ex", "_rep")
)

assert len(df_ens) > 0, "‚ùå Nenhuma amostra comum ap√≥s alinhamento"

print(f"\nAmostras usadas no ensemble: {len(df_ens)}")

# ------------------------------------------------------------
# üß† ENSEMBLE
# ------------------------------------------------------------
df_ens["hr_pred_ens"] = (
    df_ens["hr_pred_ex"] + df_ens["hr_pred_rep"]
) / 2

# ------------------------------------------------------------
# üìä M√âTRICAS
# ------------------------------------------------------------
mae = mean_absolute_error(df_ens["hr_true"], df_ens["hr_pred_ens"])
rmse = np.sqrt(np.mean((df_ens["hr_true"] - df_ens["hr_pred_ens"]) ** 2))
corr, _ = pearsonr(df_ens["hr_true"], df_ens["hr_pred_ens"])

print("\nüìä ENSEMBLE RESULTS")
print(f"MAE  : {mae:.3f}")
print(f"RMSE : {rmse:.3f}")
print(f"Corr : {corr:.3f}")

# ------------------------------------------------------------
# üíæ SAVE
# ------------------------------------------------------------
OUT_DIR = ROOT / "ensemble" / "final"
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_FILE = OUT_DIR / "ensemble_round02_ex_rep.csv"

df_ens.to_csv(OUT_FILE, index=False)

print("\nüíæ Ensemble salvo em:")
print(OUT_FILE)

print("\nüèÅ ENSEMBLE FINAL CONCLU√çDO")

üîé Procurando predictions de EXERC√çCIO...
Encontrados:
  /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/exercicio/predictions/round_02/round_02_test_predictions.csv
  /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/exercicio/predictions/round_03/round_03_test_predictions.csv
  /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/exercicio/predictions/round_01/round_01_test_predictions.csv

üîé Procurando predictions de REPOUSO...
Encontrados:
  /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/repouso/results/xgboost_baseline_preds.csv

‚úÖ Usando:
Exercise: /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/exercicio/predictions/round_02/round_02_test_predictions.csv
Repouso : /Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/repouso/results/xgboost_baseline_preds.csv


AssertionError: ‚ùå Colunas ausentes no exerc√≠cio

In [2]:
print(EXERCISE_PREDS)
print(EXERCISE_PREDS.exists())

print(REPOUSO_PREDS)
print(REPOUSO_PREDS.exists())

/Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/ensemble/exercicio/predictions/round_02/round_02_test_predictions.csv
False
/Users/edmundobrown/Documents/MLGeral/AI-HealthCare/HREstimationMac/ensemble/repouso/results/xgboost_baseline_preds.csv
False
