In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from scipy.stats import spearmanr # Para a estabilidade SHAP
from sklearn.model_selection import train_test_split
import shap # Certifique-se de que a biblioteca SHAP está instalada
# from aif360.metrics import ClassificationMetric # Ignorando por enquanto, usando cálculo manual de EOD

# Define o nome da coluna sensível para uso consistente
SENSITIVE_FEATURE_NAME = 'Q'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd
import numpy as np

def load_and_combine_data(directory_path):
    """Carrega X e y, limpa o cabeçalho e retorna X_train, y_train, X_test, y_test."""
    
    try:
        # Carregar X: assumindo cabeçalho correto
        X_train = pd.read_csv(f"{directory_path}/X_train.csv")
        X_test = pd.read_csv(f"{directory_path}/X_test.csv")
        
        # Carregar Y: Pulando o cabeçalho 'Y' dentro do arquivo (skiprows=1)
        y_train = pd.read_csv(f"{directory_path}/y_train.csv", 
                              header=None, 
                              names=['Y'], 
                              skiprows=1)
                             
        y_test = pd.read_csv(f"{directory_path}/y_test.csv", 
                             header=None, 
                             names=['Y'], 
                             skiprows=1)

        # 1. Limpeza e Conversão para Numérico (Essencial após skiprows)
        y_train['Y'] = pd.to_numeric(y_train['Y'].squeeze(), errors='coerce') 
        y_test['Y'] = pd.to_numeric(y_test['Y'].squeeze(), errors='coerce') 

        # 2. Verificar o Alinhamento (Safety Check)
        if len(X_train) != len(y_train) or len(X_test) != len(y_test):
             print(f"ERRO: Desalinhamento de tamanho. Treino: {len(X_train)} vs {len(y_train)}. Teste: {len(X_test)} vs {len(y_test)}.")
             return None, None, None, None # Retorna None se falhar
        
        # 3. Retornar no formato correto para model.fit()
        # y_train e y_test são Series 1D
        return X_train, y_train['Y'].values.ravel(), X_test, y_test['Y'].values.ravel()
    
    except FileNotFoundError as e:
        print(f"Erro ao carregar arquivos no caminho {directory_path}: {e}")
        return None, None, None, None

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from scipy.stats import spearmanr
import shap 

SENSITIVE_FEATURE_NAME = 'Q' 

def run_experiment(X_train, y_train, X_test, y_test, baseline_feature_ranking=None):
    """Treina o modelo e calcula métricas usando os arrays X e y separados."""
    
    results = {}
    
    # --- 1. Model Training ---
    # y_train já deve estar no formato 1D (ravel)
    model = LogisticRegression(solver='liblinear', random_state=42, max_iter=1000)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    # --- 2. Robustness Metrics ---
    results['Overall_Accuracy'] = accuracy_score(y_test, y_pred)
    results['Overall_F1_Score'] = f1_score(y_test, y_pred)
    results['AUROC'] = roc_auc_score(y_test, y_proba)
    
    # --- 3. Fairness Metrics (EOD) ---
    Q_test = X_test[SENSITIVE_FEATURE_NAME]
    
    # Cálculo do True Positive Rate (TPR)
    # Grupo Privilegiado (Q=0)
    TP_P = ((y_test == 1) & (Q_test == 0) & (y_pred == 1)).sum()
    P_pos = ((y_test == 1) & (Q_test == 0)).sum()
    TPR_P = TP_P / P_pos if P_pos > 0 else 0
    
    # Grupo Desprivilegiado (Q=1)
    TP_U = ((y_test == 1) & (Q_test == 1) & (y_pred == 1)).sum()
    U_pos = ((y_test == 1) & (Q_test == 1)).sum()
    TPR_U = TP_U / U_pos if U_pos > 0 else 0

    results['Equal_Opportunity_Difference'] = TPR_P - TPR_U
    
    # --- 4. Explainability Metrics (SHAP Stability) ---
    explainer = shap.Explainer(model, X_train)
    shap_values = explainer(X_test.sample(n=min(500, len(X_test)), random_state=42)) 
    
    mean_abs_shap = pd.Series(np.abs(shap_values.values).mean(axis=0), index=X_test.columns)
    current_feature_ranking = mean_abs_shap.sort_values(ascending=False).rank(method='first')

    results['Feature_Importance_Ranking'] = current_feature_ranking.to_dict()

    if baseline_feature_ranking is not None:
        I_base = pd.Series(baseline_feature_ranking)
        I_current = current_feature_ranking
        common_features = I_base.index.intersection(I_current.index)
        
        rho, _ = spearmanr(I_base.loc[common_features].values, 
                           I_current.loc[common_features].values)
        results['SHAP_Rank_Stability'] = rho
    else:
        results['SHAP_Rank_Stability'] = 1.0 

    return results

In [4]:
# --- LOOP PRINCIPAL DE EXECUÇÃO ---

# Lista de diretórios de experimentos (substitua pelos seus caminhos reais)
experiment_directories = [
    'datasets/baseline',
    # Bias Series (l_q)
    'datasets/bias_lq_0.1', 
    'datasets/bias_lq_0.3',
    'datasets/bias_lq_0.5',
    'datasets/bias_lq_0.7',
    'datasets/bias_lq_0.9',
    # Noise Series (sy)
    'datasets/noise_sy_0.1',
    'datasets/noise_sy_0.2',
    'datasets/noise_sy_0.3',
    'datasets/noise_sy_0.4',
    # Imbalance Series (p_u)
    'datasets/imbalance_pu_0.2',
    'datasets/imbalance_pu_0.4',
    'datasets/imbalance_pu_0.6',
    'datasets/imbalance_pu_0.8'
]

master_results_df = pd.DataFrame()
baseline_ranking = None

print("--- Iniciando Loop de Auditoria ---")

for i, dir_path in enumerate(experiment_directories):
    print(f"\nProcessando: {dir_path.split('/')[-1]}")
    
    # CHAMA A FUNÇÃO E ESPERA 4 VALORES
    X_train, y_train, X_test, y_test = load_and_combine_data(dir_path)
    
    # Se a função de carregamento falhar, pular
    if X_train is None:
        continue
    
    # Determinar se é a linha de base
    is_baseline = (i == 0)

    # Executar o experimento
    current_results = run_experiment(X_train, y_train, X_test, y_test, 
                                     baseline_feature_ranking=baseline_ranking)
    
    # Se for a Linha de Base, capturar o ranking para uso futuro
    if is_baseline:
        baseline_ranking = current_results['Feature_Importance_Ranking']
        print("-> Linha de Base estabelecida.")

    # Adicionar metadados ao resultado
    current_results['Condition'] = 'Baseline' if is_baseline else dir_path.split('/')[-1].split('_')[0]
    current_results['Parameter_Value'] = 'N/A' if is_baseline else dir_path.split('_')[-1]
    
    # Converter para DataFrame de uma linha e anexar ao mestre
    results_row = pd.DataFrame([current_results])
    master_results_df = pd.concat([master_results_df, results_row], ignore_index=True)
    
    print(f"   -> EOD: {current_results['Equal_Opportunity_Difference']:.3f} | F1: {current_results['Overall_F1_Score']:.3f} | Estabilidade SHAP: {current_results['SHAP_Rank_Stability']:.3f}")

# Remover o ranking SHAP complexo para simplificar o arquivo final
master_results_df = master_results_df.drop(columns=['Feature_Importance_Ranking'], errors='ignore')

# Salvar a tabela final
MASTER_RESULTS_FILE = 'project_audit_results.csv'
master_results_df.to_csv(MASTER_RESULTS_FILE, index=False)
print("\n--- Loop Concluído ---")
print(f"Resultados salvos em: {MASTER_RESULTS_FILE}")

--- Iniciando Loop de Auditoria ---

Processando: baseline
-> Linha de Base estabelecida.
   -> EOD: 0.000 | F1: 0.000 | Estabilidade SHAP: 1.000

Processando: bias_lq_0.1
   -> EOD: 0.012 | F1: 0.867 | Estabilidade SHAP: -0.400

Processando: bias_lq_0.3
   -> EOD: 0.027 | F1: 0.873 | Estabilidade SHAP: -0.400

Processando: bias_lq_0.5
   -> EOD: 0.062 | F1: 0.877 | Estabilidade SHAP: -0.400

Processando: bias_lq_0.7
   -> EOD: 0.075 | F1: 0.884 | Estabilidade SHAP: -0.400

Processando: bias_lq_0.9
   -> EOD: 0.053 | F1: 0.885 | Estabilidade SHAP: -0.400

Processando: noise_sy_0.1
   -> EOD: -0.010 | F1: 0.870 | Estabilidade SHAP: -0.400

Processando: noise_sy_0.2
   -> EOD: -0.030 | F1: 0.869 | Estabilidade SHAP: -0.400

Processando: noise_sy_0.3
   -> EOD: -0.026 | F1: 0.870 | Estabilidade SHAP: -0.400

Processando: noise_sy_0.4
   -> EOD: -0.035 | F1: 0.869 | Estabilidade SHAP: -0.400

Processando: imbalance_pu_0.2
   -> EOD: 0.026 | F1: 0.842 | Estabilidade SHAP: 0.000

Processando