In [None]:
 =========================================================
# ⚙️ INSTALAÇÃO SIMPLES E DIRETA (RECOMENDADO)
# =========================================================

# 1️⃣ Limpar instalações anteriores
!pip uninstall aequitas -y --quiet

# 2️⃣ Instalar versão estável mais recente
!pip install aequitas --upgrade --quiet

# 3️⃣ Instalar dependências específicas se necessário
!pip install "scikit-learn>=1.0" "pandas>=1.3" "numpy>=1.21" --quiet

# 4️⃣ Testar a instalação
try:
    import aequitas
    from aequitas.group import Group
    from aequitas.plotting import Plot
    from aequitas.bias import Bias
    from aequitas.fairness import Fairness

    print("✅ Aequitas instalado com sucesso!")
    print(f"📦 Versão do Aequitas: {aequitas.__version__ if hasattr(aequitas, '__version__') else 'Não informada'}")

    # Testar funcionalidades básicas
    import pandas as pd
    test_df = pd.DataFrame({
        'score': [0.1, 0.2, 0.8, 0.9],
        'label_value': [0, 0, 1, 1],
        'gender': ['M', 'F', 'M', 'F']
    })

    g = Group()
    xtab, _ = g.get_crosstabs(test_df)
    print("✅ Funcionalidades básicas testadas com sucesso!")

except Exception as e:
    print(f"❌ Erro: {e}")

In [None]:
# =========================================================
# 🔍 VERIFICAR VERSÃO E DETALHES DA INSTALAÇÃO
# =========================================================

import pkg_resources

try:
    # Verificar versão do Aequitas
    aequitas_version = pkg_resources.get_distribution("aequitas").version
    print(f"📦 Versão do Aequitas instalada: {aequitas_version}")
except:
    print("📦 Aequitas não encontrado via pkg_resources")

# Listar todos os pacotes relacionados
print("\n🔍 Pacotes instalados relacionados:")
for package in ['aequitas', 'scikit-learn', 'pandas', 'numpy', 'matplotlib']:
    try:
        version = pkg_resources.get_distribution(package).version
        print(f"   {package}: {version}")
    except:
        print(f"   {package}: Não instalado")

# Testar importações completas
print("\n🧪 Testando importações...")
try:
    from aequitas.group import Group
    from aequitas.plotting import Plot
    from aequitas.bias import Bias
    from aequitas.fairness import Fairness
    from aequitas.preprocessing import preprocess_input_df
    print("✅ Todas as importações funcionando!")

    # Verificar métodos disponíveis
    g = Group()
    print(f"✅ Group class: {hasattr(g, 'get_crosstabs')}")

except ImportError as e:
    print(f"❌ Erro de importação: {e}")

### Auditando o arquivo saida do EDA


### Auditando o modelo

In [None]:
 =========================================================
# ⚖️ AEQUITAS FAIRNESS AUDIT - Heart Disease Binary Model
# =========================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from aequitas.group import Group
from aequitas.fairness import Fairness # Keep import for completeness, but won't use get_group_value_fairness
from aequitas.bias import Bias
from aequitas.plotting import Plot
from datetime import datetime

# ----------------------------------------
# 1️⃣ Caminhos e dados
# ----------------------------------------
drive_path = "/content/drive/Othercomputers/My Mac/HealthProjects/heart-disease-fairness"
data_path = f"{drive_path}/data/heart_resampled.csv"
results_path = f"{drive_path}/results"
os.makedirs(results_path, exist_ok=True)

df = pd.read_csv(data_path)
df["label"] = (df["num"] > 0).astype(int)

# Criar atributos sensíveis (para fairness, não treino)
df["sex_label"] = df["sex"].map({0: "female", 1: "male"})
# Ensure age_group is categorical
df["age_group"] = pd.cut(df["age"], bins=[0, 40, 55, 70, 100],
                         labels=["<40", "40-55", "55-70", "70+"], right=False).astype(str) # Use right=False for inclusivity


# ----------------------------------------
# 2️⃣ Dados para treino
# ----------------------------------------
X = df.drop(columns=["num", "label", "sex_label", "age_group"])
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# ----------------------------------------
# 3️⃣ Treinar modelo
# ----------------------------------------
# Check if model 'model' already exists in the kernel's global scope
# If it exists and is an XGBClassifier, skip training to save time
# You might want to add more checks here if needed
try:
    if 'model' in globals() and isinstance(model, XGBClassifier):
        print("✅ Usando modelo existente do kernel.")
    else:
        print("🏋️‍♂️ Treinando novo modelo...")
        model = XGBClassifier(
            objective="binary:logistic",
            # device="cuda", # Let XGBoost decide based on availability
            eval_metric="logloss",
            learning_rate=0.03,
            n_estimators=150,
            max_depth=4,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        )
        model.fit(X_train, y_train)
        print("✅ Modelo treinado com sucesso (sem atributos sensíveis).")
except Exception as e:
    print(f"❌ Erro ao treinar ou usar modelo existente: {e}")
    # If training fails, you might want to handle it or stop execution
    raise


y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = (y_pred_proba >= 0.5).astype(int)

acc = round(accuracy_score(y_test, y_pred), 4)
print(f"\n🎯 Accuracy: {acc}")


# ----------------------------------------
# 4️⃣ Dataset para auditoria Aequitas
# ----------------------------------------
# Ensure indices match after split and adding sensitive attributes
audit_df = pd.DataFrame({
    "score": y_pred_proba,
    "label_value": y_test.values,
    "sex": df.loc[y_test.index, "sex_label"].values,
    "age_group": df.loc[y_test.index, "age_group"].values
})

# Ensure 'score' and 'label_value' are numeric and sensitive attributes are strings
audit_df['score'] = pd.to_numeric(audit_df['score'], errors='coerce')
audit_df['label_value'] = pd.to_numeric(audit_df['label_value'], errors='coerce')
audit_df['sex'] = audit_df['sex'].astype(str)
audit_df['age_group'] = audit_df['age_group'].astype(str)

# Drop rows with NaNs if coercion failed
audit_df.dropna(inplace=True)


# ----------------------------------------
# 5️⃣ Executar Aequitas
# ----------------------------------------
g = Group()
# Removed score_threshold_grouping as it's not supported in this Aequitas version
xtab, _ = g.get_crosstabs(audit_df)


b = Bias()
# Added original_df parameter as required in Aequitas 0.42.0+
bias_df = b.get_disparity_predefined_groups(
    xtab,
    original_df=audit_df, # Pass the audit_df here
    ref_groups_dict={'sex': 'male', 'age_group': '40-55'},
    alpha=0.05
)

# ----------------------------------------
# 6️⃣ Exibir métricas (Focus on bias_df)
# ----------------------------------------
print("\n📈 Bias DataFrame (disparidades):")

# Check which columns exist in bias_df
available_bias_cols = bias_df.columns.tolist()
print("Colunas disponíveis no bias_df:")
print(available_bias_cols)

# Define core disparity columns to display
core_disparity_cols = [
    'ppr_disparity', 'pprev_disparity', 'precision_disparity', 'fdr_disparity',
    'for_disparity', 'fpr_disparity', 'fnr_disparity', 'tpr_disparity',
    'tnr_disparity', 'npv_disparity'
]

# Filter for existing core disparity columns
cols_bias_display = ["attribute_name", "attribute_value"] + [
    col for col in core_disparity_cols if col in available_bias_cols
]

# Add related reference group values if they exist
for metric in ['ppr', 'pprev', 'precision', 'fdr', 'for', 'fpr', 'fnr', 'tpr', 'tnr', 'npv']:
     if f'{metric}_ref_group_value' in available_bias_cols:
         cols_bias_display.append(f'{metric}_ref_group_value')

# Display bias_df using the filtered columns
display(bias_df[cols_bias_display].head(10))

## =========================================================
# 7️⃣ Gráficos de disparidade – versão estendida com FOR
# =========================================================
import matplotlib.pyplot as plt
import numpy as np

# Métricas de disparidade para visualização padrão
candidate_metrics = ["for_disparity", "fpr_disparity", "ppr_disparity"]

def plot_disparities(bias_df, attr, metrics, save_prefix=None):
    df_attr = bias_df[bias_df["attribute_name"] == attr].copy()
    if df_attr.empty:
        print(f"⚠️ Nada para plotar em '{attr}' (sem linhas no bias_df).")
        return

    metrics = [m for m in metrics if m in df_attr.columns]
    if not metrics:
        print(f"⚠️ Nenhuma métrica de disparidade disponível para '{attr}'.")
        return

    for m in metrics:
        plot_df = df_attr[["attribute_value", m]].dropna()
        if plot_df.empty:
            continue

        plot_df = plot_df.sort_values(m)
        plt.figure(figsize=(8, 4.5))
        plt.bar(plot_df["attribute_value"].astype(str), plot_df[m].astype(float))
        plt.axhline(1.0, linestyle="--", color="red")
        plt.title(f"{m.replace('_', ' ').upper()} por {attr}")
        plt.ylabel("Disparidade")
        plt.xlabel(attr)
        plt.tight_layout()
        if save_prefix:
            fname = f"{save_prefix}_{attr}_{m}_{timestamp}.png"
            plt.savefig(os.path.join(results_path, fname), dpi=120)
        plt.show()

# Rodar gráficos de FPR/PPR/FOR por atributo
for attr in ["sex", "age_group"]:
    print(f"\n📊 Plotando disparidades para '{attr}'…")
    plot_disparities(bias_df, attr, candidate_metrics, save_prefix="fairness_disparity")

## =========================================================
# 🩺 Gráfico adicional: FOR disparity + volume de FN e TN
# =========================================================
for attr in ["sex", "age_group"]:
    df_attr = bias_df[bias_df["attribute_name"] == attr].copy()
    if "for_disparity" not in df_attr.columns:
        continue

    # Combinar FOR disparity + volume de FN e TN
    df_plot = df_attr[["attribute_value", "for_disparity", "fn", "tn"]].copy()
    df_plot = df_plot.dropna().sort_values("for_disparity")

    if df_plot.empty:
        continue

    fig, ax1 = plt.subplots(figsize=(8, 5))

    # Barras = disparidade FOR
    ax1.bar(df_plot["attribute_value"], df_plot["for_disparity"], color="steelblue", alpha=0.7)
    ax1.axhline(1.0, color="red", linestyle="--", linewidth=1)
    ax1.set_ylabel("FOR Disparity", color="steelblue")
    ax1.set_xlabel(attr)
    ax1.tick_params(axis='y', labelcolor="steelblue")
    plt.xticks(rotation=0)

    # Linha = volume (FN + TN)
    ax2 = ax1.twinx()
    volumes = df_plot["fn"] + df_plot["tn"]
    ax2.plot(df_plot["attribute_value"], volumes, color="darkorange", marker="o", linewidth=2)
    ax2.set_ylabel("Volume (FN+TN)", color="darkorange")
    ax2.tick_params(axis='y', labelcolor="darkorange")

    plt.title(f"FOR Disparity e Volume (FN+TN) por {attr.capitalize()}")
    plt.tight_layout()

    # Salvar gráfico
    fname = f"for_disparity_volume_{attr}_{timestamp}.png"
    plt.savefig(os.path.join(results_path, fname), dpi=120)
    plt.show()

    print(f"💾 Gráfico salvo: {fname}")



# ----------------------------------------
# 8️⃣ Gerar Relatório HTML automático (Using bias_df)
# ----------------------------------------
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = f"{results_path}/fairness_report_{timestamp}.html"

with open(report_path, "w") as fhtml:
    fhtml.write(f"<html><head><title>Fairness Report - Heart Disease</title></head><body>")
    fhtml.write(f"<h1>🏥 Heart Disease Fairness Report</h1>")
    fhtml.write(f"<p><b>Model Accuracy:</b> {acc}</p>")
    fhtml.write(f"<p><b>Timestamp:</b> {timestamp}</p>")

    fhtml.write("<h2>Bias Disparities</h2>")

    # Prepare bias_df for HTML display - only include relevant columns
    bias_df_display = bias_df[cols_bias_display].head(15).to_html(index=False)
    fhtml.write(bias_df_display)


    fhtml.write("<h2>Interpretation</h2>")
    fhtml.write("""
    <p><b>How to read Disparity values:</b></p>
    <ul>
      <li>Values close to <b>1.0</b> indicate fairness parity between the group and the reference group.</li>
      <li><b>FNR Disparity</b>: If > 1.0, the model has a higher False Negative Rate (misses more positives) for this group compared to the reference group (possible underdiagnosis).</li>
      <li><b>FPR Disparity</b>: If > 1.0, the model has a higher False Positive Rate (incorrectly predicts positives) for this group compared to the reference group (possible overdiagnosis).</li>
      <li><b>TPR Disparity</b>: If < 1.0, the model has a lower True Positive Rate (detects fewer true positives) for this group compared to the reference group.</li>
      <li><b>PPR Disparity</b>: Predicted Positive Rate disparity. If > 1.0, the model predicts positives more often for this group compared to the reference group.</li>
      <li><b>Pprev Disparity</b>: Predicted Prevalence disparity. Similar to PPR, but relative to the group size.</li>
      <li><b>Reference Group Value</b>: The value of the metric for the reference group used in the disparity calculation (e.g., male for sex, 40-55 for age_group).</li>
    </ul>
    <p>Check Aequitas documentation for detailed interpretation of other metrics.</p>
    """)

    fhtml.write("<h2>Available Metrics in Bias DataFrame</h2>")
    fhtml.write(f"<p><b>Bias columns:</b> {', '.join(available_bias_cols)}</p>")


    fhtml.write("</body></html>")

print(f"\n💾 Relatório HTML salvo em: {report_path}")
print(f"📊 Colunas disponíveis no bias_df: {bias_df.columns.tolist()}")

In [None]:
# =========================================================
# ⚖️ AEQUITAS FAIRNESS AUDIT - Heart Disease Binary Model
# =========================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from aequitas.group import Group
from aequitas.fairness import Fairness # Keep import for completeness, but won't use get_group_value_fairness
from aequitas.bias import Bias
from aequitas.plotting import Plot
from datetime import datetime

# ----------------------------------------
# 1️⃣ Caminhos e dados
# ----------------------------------------
drive_path = "/content/drive/Othercomputers/My Mac/HealthProjects/heart-disease-fairness"
data_path = f"{drive_path}/data/heart_resampled.csv"
results_path = f"{drive_path}/results"
os.makedirs(results_path, exist_ok=True)

df = pd.read_csv(data_path)
df["label"] = (df["num"] > 0).astype(int)

# Criar atributos sensíveis (para fairness, não treino)
df["sex_label"] = df["sex"].map({0: "female", 1: "male"})
# Ensure age_group is categorical
df["age_group"] = pd.cut(df["age"], bins=[0, 40, 55, 70, 100],
                         labels=["<40", "40-55", "55-70", "70+"], right=False).astype(str) # Use right=False for inclusivity


# ----------------------------------------
# 2️⃣ Dados para treino
# ----------------------------------------
X = df.drop(columns=["num", "label", "sex_label", "age_group"])
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# ----------------------------------------
# 3️⃣ Treinar modelo
# ----------------------------------------
# Check if model 'model' already exists in the kernel's global scope
# If it exists and is an XGBClassifier, skip training to save time
# You might want to add more checks here if needed
try:
    if 'model' in globals() and isinstance(model, XGBClassifier):
        print("✅ Usando modelo existente do kernel.")
    else:
        print("🏋️‍♂️ Treinando novo modelo...")
        model = XGBClassifier(
            objective="binary:logistic",
            # device="cuda", # Let XGBoost decide based on availability
            eval_metric="logloss",
            learning_rate=0.03,
            n_estimators=150,
            max_depth=4,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        )
        model.fit(X_train, y_train)
        print("✅ Modelo treinado com sucesso (sem atributos sensíveis).")
except Exception as e:
    print(f"❌ Erro ao treinar ou usar modelo existente: {e}")
    # If training fails, you might want to handle it or stop execution
    raise


y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = (y_pred_proba >= 0.5).astype(int)

acc = round(accuracy_score(y_test, y_pred), 4)
print(f"\n🎯 Accuracy: {acc}")


# ----------------------------------------
# 4️⃣ Dataset para auditoria Aequitas
# ----------------------------------------
# Ensure indices match after split and adding sensitive attributes
audit_df = pd.DataFrame({
    "score": y_pred_proba,
    "label_value": y_test.values,
    "sex": df.loc[y_test.index, "sex_label"].values,
    "age_group": df.loc[y_test.index, "age_group"].values
})

# Ensure 'score' and 'label_value' are numeric and sensitive attributes are strings
audit_df['score'] = pd.to_numeric(audit_df['score'], errors='coerce')
audit_df['label_value'] = pd.to_numeric(audit_df['label_value'], errors='coerce')
audit_df['sex'] = audit_df['sex'].astype(str)
audit_df['age_group'] = audit_df['age_group'].astype(str)

# Drop rows with NaNs if coercion failed
audit_df.dropna(inplace=True)


# ----------------------------------------
# 5️⃣ Executar Aequitas
# ----------------------------------------
g = Group()
# Removed score_threshold_grouping as it's not supported in this Aequitas version
xtab, _ = g.get_crosstabs(audit_df)


b = Bias()
# Added original_df parameter as required in Aequitas 0.42.0+
bias_df = b.get_disparity_predefined_groups(
    xtab,
    original_df=audit_df, # Pass the audit_df here
    ref_groups_dict={'sex': 'male', 'age_group': '40-55'},
    alpha=0.05
)

# ----------------------------------------
# 6️⃣ Exibir métricas (Focus on bias_df)
# ----------------------------------------
print("\n📈 Bias DataFrame (disparidades):")

# Check which columns exist in bias_df
available_bias_cols = bias_df.columns.tolist()
print("Colunas disponíveis no bias_df:")
print(available_bias_cols)

# Define core disparity columns to display
core_disparity_cols = [
    'ppr_disparity', 'pprev_disparity', 'precision_disparity', 'fdr_disparity',
    'for_disparity', 'fpr_disparity', 'fnr_disparity', 'tpr_disparity',
    'tnr_disparity', 'npv_disparity'
]

# Filter for existing core disparity columns
cols_bias_display = ["attribute_name", "attribute_value"] + [
    col for col in core_disparity_cols if col in available_bias_cols
]

# Add related reference group values if they exist
for metric in ['ppr', 'pprev', 'precision', 'fdr', 'for', 'fpr', 'fnr', 'tpr', 'tnr', 'npv']:
     if f'{metric}_ref_group_value' in available_bias_cols:
         cols_bias_display.append(f'{metric}_ref_group_value')

# Display bias_df using the filtered columns
display(bias_df[cols_bias_display].head(10))

## =========================================================
# 7️⃣ Gráficos de disparidade – versão estendida com FOR
# =========================================================
import matplotlib.pyplot as plt
import numpy as np

# Métricas de disparidade para visualização padrão
candidate_metrics = ["for_disparity", "fpr_disparity", "ppr_disparity"]

def plot_disparities(bias_df, attr, metrics, save_prefix=None):
    df_attr = bias_df[bias_df["attribute_name"] == attr].copy()
    if df_attr.empty:
        print(f"⚠️ Nada para plotar em '{attr}' (sem linhas no bias_df).")
        return

    metrics = [m for m in metrics if m in df_attr.columns]
    if not metrics:
        print(f"⚠️ Nenhuma métrica de disparidade disponível para '{attr}'.")
        return

    for m in metrics:
        plot_df = df_attr[["attribute_value", m]].dropna()
        if plot_df.empty:
            continue

        plot_df = plot_df.sort_values(m)
        plt.figure(figsize=(8, 4.5))
        plt.bar(plot_df["attribute_value"].astype(str), plot_df[m].astype(float))
        plt.axhline(1.0, linestyle="--", color="red")
        plt.title(f"{m.replace('_', ' ').upper()} por {attr}")
        plt.ylabel("Disparidade")
        plt.xlabel(attr)
        plt.tight_layout()
        if save_prefix:
            fname = f"{save_prefix}_{attr}_{m}_{timestamp}.png"
            plt.savefig(os.path.join(results_path, fname), dpi=120)
        plt.show()

# Rodar gráficos de FPR/PPR/FOR por atributo
for attr in ["sex", "age_group"]:
    print(f"\n📊 Plotando disparidades para '{attr}'…")
    plot_disparities(bias_df, attr, candidate_metrics, save_prefix="fairness_disparity")

## =========================================================
# 🩺 Gráfico adicional: FOR disparity + volume de FN e TN
# =========================================================
for attr in ["sex", "age_group"]:
    df_attr = bias_df[bias_df["attribute_name"] == attr].copy()
    if "for_disparity" not in df_attr.columns:
        continue

    # Combinar FOR disparity + volume de FN e TN
    df_plot = df_attr[["attribute_value", "for_disparity", "fn", "tn"]].copy()
    df_plot = df_plot.dropna().sort_values("for_disparity")

    if df_plot.empty:
        continue

    fig, ax1 = plt.subplots(figsize=(8, 5))

    # Barras = disparidade FOR
    ax1.bar(df_plot["attribute_value"], df_plot["for_disparity"], color="steelblue", alpha=0.7)
    ax1.axhline(1.0, color="red", linestyle="--", linewidth=1)
    ax1.set_ylabel("FOR Disparity", color="steelblue")
    ax1.set_xlabel(attr)
    ax1.tick_params(axis='y', labelcolor="steelblue")
    plt.xticks(rotation=0)

    # Linha = volume (FN + TN)
    ax2 = ax1.twinx()
    volumes = df_plot["fn"] + df_plot["tn"]
    ax2.plot(df_plot["attribute_value"], volumes, color="darkorange", marker="o", linewidth=2)
    ax2.set_ylabel("Volume (FN+TN)", color="darkorange")
    ax2.tick_params(axis='y', labelcolor="darkorange")

    plt.title(f"FOR Disparity e Volume (FN+TN) por {attr.capitalize()}")
    plt.tight_layout()

    # Salvar gráfico
    fname = f"for_disparity_volume_{attr}_{timestamp}.png"
    plt.savefig(os.path.join(results_path, fname), dpi=120)
    plt.show()

    print(f"💾 Gráfico salvo: {fname}")



# ----------------------------------------
# 8️⃣ Gerar Relatório HTML automático (Using bias_df)
# ----------------------------------------
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = f"{results_path}/fairness_report_{timestamp}.html"

with open(report_path, "w") as fhtml:
    fhtml.write(f"<html><head><title>Fairness Report - Heart Disease</title></head><body>")
    fhtml.write(f"<h1>🏥 Heart Disease Fairness Report</h1>")
    fhtml.write(f"<p><b>Model Accuracy:</b> {acc}</p>")
    fhtml.write(f"<p><b>Timestamp:</b> {timestamp}</p>")

    fhtml.write("<h2>Bias Disparities</h2>")

    # Prepare bias_df for HTML display - only include relevant columns
    bias_df_display = bias_df[cols_bias_display].head(15).to_html(index=False)
    fhtml.write(bias_df_display)


    fhtml.write("<h2>Interpretation</h2>")
    fhtml.write("""
    <p><b>How to read Disparity values:</b></p>
    <ul>
      <li>Values close to <b>1.0</b> indicate fairness parity between the group and the reference group.</li>
      <li><b>FNR Disparity</b>: If > 1.0, the model has a higher False Negative Rate (misses more positives) for this group compared to the reference group (possible underdiagnosis).</li>
      <li><b>FPR Disparity</b>: If > 1.0, the model has a higher False Positive Rate (incorrectly predicts positives) for this group compared to the reference group (possible overdiagnosis).</li>
      <li><b>TPR Disparity</b>: If < 1.0, the model has a lower True Positive Rate (detects fewer true positives) for this group compared to the reference group.</li>
      <li><b>PPR Disparity</b>: Predicted Positive Rate disparity. If > 1.0, the model predicts positives more often for this group compared to the reference group.</li>
      <li><b>Pprev Disparity</b>: Predicted Prevalence disparity. Similar to PPR, but relative to the group size.</li>
      <li><b>Reference Group Value</b>: The value of the metric for the reference group used in the disparity calculation (e.g., male for sex, 40-55 for age_group).</li>
    </ul>
    <p>Check Aequitas documentation for detailed interpretation of other metrics.</p>
    """)

    fhtml.write("<h2>Available Metrics in Bias DataFrame</h2>")
    fhtml.write(f"<p><b>Bias columns:</b> {', '.join(available_bias_cols)}</p>")


    fhtml.write("</body></html>")

print(f"\n💾 Relatório HTML salvo em: {report_path}")
print(f"📊 Colunas disponíveis no bias_df: {bias_df.columns.tolist()}")