In [None]:
# DASHBOARD DE INTELIGENCIA ARTIFICIAL RESPONSABLE (RAI)
# ================================================================

# 1. CONFIGURACIÓN Y IMPORTS PARA RAI
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import joblib
import warnings
from pathlib import Path
import shap
from typing import Dict, List, Any, Tuple

warnings.filterwarnings('ignore')

# Azure ML imports
from azureml.core import Workspace, Dataset, Experiment, Run, Model
from azureml.core.model import Model as AMLModel
import mlflow
import mlflow.sklearn

# RAI y análisis de equidad
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.inspection import permutation_importance
from sklearn.model_selection import cross_val_score

# Análisis estadístico
from scipy import stats
from scipy.stats import chi2_contingency, ks_2samp

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

print("🛡️  DASHBOARD DE INTELIGENCIA ARTIFICIAL RESPONSABLE")
print("="*70)
print("📋 Análisis de: Equidad, Explicabilidad, Robustez y Transparencia")
print("="*70)

# Conectar al workspace
try:
    ws = Workspace.from_config()
    print(f"✅ Conectado al workspace: {ws.name}")
except Exception as e:
    print(f"❌ Error conectando al workspace: {e}")
    raise

# Configurar MLflow
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

print(f"🔄 Cargando modelo y datos para análisis RAI...")


In [None]:
# 2. CARGA DE MODELO Y DATOS PARA ANÁLISIS RAI
print("\n📥 CARGA DE MODELO Y DATOS")
print("="*40)

# Cargar el modelo registrado más reciente
MODEL_NAME = "candidate-selection-model"

try:
    # Obtener la versión más reciente del modelo
    registered_model = AMLModel(ws, name=MODEL_NAME)
    print(f"✅ Modelo encontrado: {MODEL_NAME}")
    print(f"📋 Versión: {registered_model.version}")
    
    # Descargar artefactos del modelo
    model_path = registered_model.download(target_dir="./model_artifacts")
    print(f"📁 Artefactos descargados en: {model_path}")
    
    # Cargar modelo y scaler
    model = joblib.load(f"{model_path}/model.pkl")
    scaler = joblib.load(f"{model_path}/scaler.pkl")
    
    # Cargar metadatos
    with open(f"{model_path}/feature_names.json", 'r') as f:
        feature_metadata = json.load(f)
    
    with open(f"{model_path}/notebook_info.json", 'r') as f:
        training_info = json.load(f)
    
    print(f"✅ Modelo cargado: {type(model).__name__}")
    print(f"📊 Features: {feature_metadata['feature_count']}")
    
except Exception as e:
    print(f"❌ Error cargando modelo registrado: {e}")
    print("💡 Asegúrate de haber ejecutado el notebook de entrenamiento primero")
    raise

# Cargar datasets
try:
    # Cargar datos de test y validación
    test_data = pd.read_parquet(f"{model_path}/test_data.parquet")
    val_data = pd.read_parquet(f"{model_path}/val_data.parquet")
    
    print(f"\n📊 Datos cargados:")
    print(f"  Test: {test_data.shape}")
    print(f"  Validación: {val_data.shape}")
    
    # Separar features y targets
    feature_names = feature_metadata['feature_names']
    
    X_test = test_data[feature_names]
    y_test_true = test_data['y_true']
    y_test_pred = test_data['y_pred']
    y_test_proba = test_data['y_proba']
    
    # Combinar datos para análisis más completo
    X_combined = pd.concat([X_test, val_data[feature_names]], ignore_index=True)
    y_combined_true = pd.concat([y_test_true, val_data['y_true']], ignore_index=True)
    y_combined_pred = pd.concat([y_test_pred, val_data['y_pred']], ignore_index=True)
    y_combined_proba = pd.concat([y_test_proba, val_data['y_proba']], ignore_index=True)
    
    print(f"  Datos combinados: {X_combined.shape}")
    print(f"✅ Datos organizados para análisis RAI")
    
except Exception as e:
    print(f"❌ Error cargando datos: {e}")
    raise

# Cargar datos originales procesados para análisis de equidad
try:
    # Intentar cargar datos procesados con información demográfica
    processed_data_path = "data/processed/processed_candidates.parquet"
    if Path(processed_data_path).exists():
        full_processed_data = pd.read_parquet(processed_data_path)
        print(f"📊 Datos originales cargados: {full_processed_data.shape}")
        
        # Verificar columnas disponibles para análisis de equidad
        demographic_cols = []
        if 'gender' in full_processed_data.columns:
            demographic_cols.append('gender')
        if 'age_range' in full_processed_data.columns:
            demographic_cols.append('age_range')
        if 'location' in full_processed_data.columns:
            demographic_cols.append('location')
        if 'education_level' in full_processed_data.columns:
            demographic_cols.append('education_level')
            
        print(f"📋 Columnas demográficas disponibles: {demographic_cols}")
        demographic_data_available = len(demographic_cols) > 0
    else:
        print("⚠️  Datos originales no encontrados - análisis de equidad limitado")
        demographic_data_available = False
        full_processed_data = None
        demographic_cols = []

except Exception as e:
    print(f"⚠️  Error cargando datos demográficos: {e}")
    demographic_data_available = False
    full_processed_data = None
    demographic_cols = []

print(f"\n📋 CONFIGURACIÓN PARA ANÁLISIS RAI:")
print(f"  Modelo: {training_info['best_model_name']}")
print(f"  Muestras para análisis: {len(X_combined):,}")
print(f"  Features: {len(feature_names)}")
print(f"  Análisis demográfico: {'✅ Disponible' if demographic_data_available else '❌ No disponible'}")


In [None]:
# 3. ANÁLISIS DE EXPLICABILIDAD CON SHAP
print("\n🔍 ANÁLISIS DE EXPLICABILIDAD DEL MODELO")
print("="*50)

def analyze_model_explainability(model, X_sample, feature_names, max_samples=500):
    """Realiza análisis completo de explicabilidad usando SHAP"""
    
    explainability_results = {}
    
    # Tomar muestra para SHAP (computacionalmente costoso)
    if len(X_sample) > max_samples:
        sample_idx = np.random.choice(len(X_sample), max_samples, replace=False)
        X_shap = X_sample.iloc[sample_idx]
        print(f"📊 Usando muestra de {max_samples} observaciones para SHAP")
    else:
        X_shap = X_sample
        print(f"📊 Usando todas las {len(X_sample)} observaciones para SHAP")
    
    try:
        print("🔄 Calculando valores SHAP...")
        
        # Crear explainer según tipo de modelo
        if hasattr(model, 'predict_proba'):
            # Para modelos probabilísticos
            if hasattr(model, 'tree_'):
                # Modelos basados en árboles
                explainer = shap.TreeExplainer(model)
            else:
                # Otros modelos - usar KernelExplainer
                explainer = shap.KernelExplainer(
                    model.predict_proba, 
                    X_shap.sample(min(100, len(X_shap)))
                )
        else:
            # Modelos de regresión
            explainer = shap.KernelExplainer(
                model.predict, 
                X_shap.sample(min(100, len(X_shap)))
            )
        
        # Calcular valores SHAP
        shap_values = explainer.shap_values(X_shap)
        
        # Para modelos de clasificación binaria, tomar valores para clase positiva
        if isinstance(shap_values, list):
            shap_values_positive = shap_values[1]
        else:
            shap_values_positive = shap_values
        
        explainability_results['shap_values'] = shap_values_positive
        explainability_results['shap_data'] = X_shap
        explainability_results['explainer'] = explainer
        
        # Calcular importancia global
        feature_importance_shap = np.abs(shap_values_positive).mean(axis=0)
        importance_df = pd.DataFrame({
            'feature': feature_names,
            'importance': feature_importance_shap
        }).sort_values('importance', ascending=False)
        
        explainability_results['global_importance'] = importance_df
        
        print("✅ Análisis SHAP completado")
        
        return explainability_results
        
    except Exception as e:
        print(f"⚠️ Error en análisis SHAP: {e}")
        print("📝 Realizando análisis de explicabilidad alternativo...")
        
        # Análisis alternativo usando permutation importance
        try:
            perm_importance = permutation_importance(
                model, X_shap, 
                model.predict(X_shap) if not hasattr(model, 'predict_proba') else np.argmax(model.predict_proba(X_shap), axis=1),
                n_repeats=5, random_state=42
            )
            
            importance_df = pd.DataFrame({
                'feature': feature_names,
                'importance': perm_importance.importances_mean
            }).sort_values('importance', ascending=False)
            
            explainability_results['global_importance'] = importance_df
            explainability_results['shap_available'] = False
            
            print("✅ Análisis de explicabilidad alternativo completado")
            
        except Exception as e2:
            print(f"❌ Error en análisis alternativo: {e2}")
            explainability_results['error'] = str(e2)
        
        return explainability_results

# Realizar análisis de explicabilidad
explainability_analysis = analyze_model_explainability(
    model, X_combined, feature_names, max_samples=300
)

# Visualizar resultados de explicabilidad
if 'global_importance' in explainability_analysis:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 1. Importancia global de características
    top_features = explainability_analysis['global_importance'].head(15)
    
    axes[0,0].barh(range(len(top_features)), top_features['importance'])
    axes[0,0].set_yticks(range(len(top_features)))
    axes[0,0].set_yticklabels(top_features['feature'])
    axes[0,0].set_xlabel('Importancia Global')
    axes[0,0].set_title('Top 15 Características más Importantes\n(Análisis de Explicabilidad)')
    axes[0,0].invert_yaxis()
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Distribución de importancias
    axes[0,1].hist(explainability_analysis['global_importance']['importance'], 
                   bins=20, alpha=0.7, edgecolor='black')
    axes[0,1].set_xlabel('Importancia')
    axes[0,1].set_ylabel('Frecuencia')
    axes[0,1].set_title('Distribución de Importancias')
    axes[0,1].grid(True, alpha=0.3)
    
    # 3. Importancia acumulada
    sorted_importance = explainability_analysis['global_importance'].sort_values('importance', ascending=False)
    cumulative_importance = np.cumsum(sorted_importance['importance']) / sorted_importance['importance'].sum()
    
    axes[1,0].plot(range(1, len(cumulative_importance) + 1), cumulative_importance, 'b-', linewidth=2)
    axes[1,0].axhline(y=0.8, color='red', linestyle='--', alpha=0.7, label='80% importancia')
    axes[1,0].axhline(y=0.9, color='orange', linestyle='--', alpha=0.7, label='90% importancia')
    axes[1,0].set_xlabel('Número de Características')
    axes[1,0].set_ylabel('Importancia Acumulada')
    axes[1,0].set_title('Importancia Acumulada de Características')
    axes[1,0].legend()
    axes[1,0].grid(True, alpha=0.3)
    
    # 4. Top vs Bottom features
    top_5 = explainability_analysis['global_importance'].head(5)
    bottom_5 = explainability_analysis['global_importance'].tail(5)
    
    # Crear comparación
    comparison_features = list(top_5['feature']) + list(bottom_5['feature'])
    comparison_values = list(top_5['importance']) + list(bottom_5['importance'])
    colors = ['darkgreen'] * 5 + ['darkred'] * 5
    
    axes[1,1].barh(range(len(comparison_features)), comparison_values, color=colors, alpha=0.7)
    axes[1,1].set_yticks(range(len(comparison_features)))
    axes[1,1].set_yticklabels(comparison_features)
    axes[1,1].set_xlabel('Importancia')
    axes[1,1].set_title('Top 5 vs Bottom 5 Características')
    axes[1,1].invert_yaxis()
    axes[1,1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('explainability_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"✅ Análisis de explicabilidad guardado como 'explainability_analysis.png'")
    
    # Mostrar estadísticas de explicabilidad
    print(f"\n📊 ESTADÍSTICAS DE EXPLICABILIDAD:")
    print(f"  Top 5 características explican: {cumulative_importance.iloc[4]:.1%} de la importancia")
    print(f"  Top 10 características explican: {cumulative_importance.iloc[9]:.1%} de la importancia")
    print(f"  Características para 80% importancia: {(cumulative_importance >= 0.8).idxmax() + 1}")
    print(f"  Características para 90% importancia: {(cumulative_importance >= 0.9).idxmax() + 1}")
    
    # Guardar top features para análisis posterior
    top_important_features = explainability_analysis['global_importance'].head(10)['feature'].tolist()
    
else:
    print("❌ No se pudo completar el análisis de explicabilidad")
    top_important_features = feature_names[:10]  # Fallback

print(f"\n📋 TOP 10 CARACTERÍSTICAS MÁS EXPLICATIVAS:")
for i, feature in enumerate(top_important_features, 1):
    importance_val = explainability_analysis['global_importance'][
        explainability_analysis['global_importance']['feature'] == feature
    ]['importance'].iloc[0]
    print(f"  {i:2d}. {feature:30s} (importancia: {importance_val:.4f})")


In [None]:
# 4. ANÁLISIS DE EQUIDAD Y SESGO
print("\n⚖️ ANÁLISIS DE EQUIDAD Y SESGO DEL MODELO")
print("="*50)

def analyze_fairness_and_bias(X_data, y_true, y_pred, y_proba, feature_names):
    """Analiza equidad y sesgo en las predicciones del modelo"""
    
    fairness_results = {}
    
    # 1. Análisis de disparidad en características técnicas vs no técnicas
    print("📊 Análisis de disparidad por tipo de características...")
    
    # Identificar características técnicas vs demográficas/blandas
    technical_features = []
    soft_features = []
    
    for feature in feature_names:
        feature_lower = feature.lower()
        if any(tech_word in feature_lower for tech_word in 
               ['skill', 'experience', 'years', 'certification', 'degree', 'technical', 'programming', 'software']):
            technical_features.append(feature)
        else:
            soft_features.append(feature)
    
    print(f"  Características técnicas: {len(technical_features)}")
    print(f"  Características no técnicas: {len(soft_features)}")
    
    # 2. Análisis de correlación entre características y predicciones
    correlation_analysis = {}
    
    # Calcular correlaciones significativas
    for feature in feature_names:
        if feature in X_data.columns:
            # Correlación con predicción
            corr_pred = np.corrcoef(X_data[feature], y_pred)[0,1]
            # Correlación con probabilidad
            corr_proba = np.corrcoef(X_data[feature], y_proba)[0,1]
            # Correlación con truth
            corr_truth = np.corrcoef(X_data[feature], y_true)[0,1]
            
            correlation_analysis[feature] = {
                'corr_prediction': corr_pred if not np.isnan(corr_pred) else 0,
                'corr_probability': corr_proba if not np.isnan(corr_proba) else 0,
                'corr_truth': corr_truth if not np.isnan(corr_truth) else 0
            }
    
    fairness_results['correlation_analysis'] = correlation_analysis
    
    # 3. Análisis de distribución de probabilidades por cuartiles de características importantes
    print("📈 Análisis de distribución por cuartiles...")
    
    quartile_analysis = {}
    for feature in top_important_features[:5]:  # Top 5 características
        if feature in X_data.columns:
            # Dividir en cuartiles
            quartiles = pd.qcut(X_data[feature], q=4, labels=['Q1', 'Q2', 'Q3', 'Q4'], duplicates='drop')
            
            quartile_stats = {}
            for q in ['Q1', 'Q2', 'Q3', 'Q4']:
                mask = (quartiles == q)
                if mask.sum() > 0:
                    quartile_stats[q] = {
                        'count': mask.sum(),
                        'positive_rate': y_true[mask].mean(),
                        'predicted_positive_rate': y_pred[mask].mean(),
                        'mean_probability': y_proba[mask].mean(),
                        'std_probability': y_proba[mask].std()
                    }
            
            quartile_analysis[feature] = quartile_stats
    
    fairness_results['quartile_analysis'] = quartile_analysis
    
    # 4. Análisis de paridad demográfica simulada
    print("🎯 Análisis de paridad demográfica...")
    
    # Crear grupos sintéticos basados en características del modelo
    # Grupo 1: Alta experiencia + alta educación
    # Grupo 2: Experiencia media
    # Grupo 3: Baja experiencia
    
    demographic_parity = {}
    
    # Identificar features relacionadas con experiencia y educación
    experience_features = [f for f in feature_names if 'experience' in f.lower() or 'years' in f.lower()]
    education_features = [f for f in feature_names if 'education' in f.lower() or 'degree' in f.lower()]
    
    if experience_features and education_features:
        # Crear score compuesto
        exp_score = X_data[experience_features].mean(axis=1) if len(experience_features) > 1 else X_data[experience_features[0]]
        edu_score = X_data[education_features].mean(axis=1) if len(education_features) > 1 else X_data[education_features[0]]
        
        # Crear grupos basados en terciles
        exp_terciles = pd.qcut(exp_score, q=3, labels=['Low_Exp', 'Mid_Exp', 'High_Exp'], duplicates='drop')
        edu_terciles = pd.qcut(edu_score, q=3, labels=['Low_Edu', 'Mid_Edu', 'High_Edu'], duplicates='drop')
        
        # Analizar paridad entre grupos
        for exp_group in ['Low_Exp', 'Mid_Exp', 'High_Exp']:
            for edu_group in ['Low_Edu', 'Mid_Edu', 'High_Edu']:
                group_mask = (exp_terciles == exp_group) & (edu_terciles == edu_group)
                if group_mask.sum() > 10:  # Solo grupos con suficientes muestras
                    group_name = f"{exp_group}_{edu_group}"
                    demographic_parity[group_name] = {
                        'count': group_mask.sum(),
                        'positive_rate': y_true[group_mask].mean(),
                        'predicted_positive_rate': y_pred[group_mask].mean(),
                        'mean_probability': y_proba[group_mask].mean(),
                        'false_positive_rate': ((y_pred[group_mask] == 1) & (y_true[group_mask] == 0)).sum() / max(1, (y_true[group_mask] == 0).sum()),
                        'false_negative_rate': ((y_pred[group_mask] == 0) & (y_true[group_mask] == 1)).sum() / max(1, (y_true[group_mask] == 1).sum())
                    }
    
    fairness_results['demographic_parity'] = demographic_parity
    
    # 5. Métricas de equidad
    equity_metrics = {}
    
    # Calcular métricas de equidad entre grupos
    if demographic_parity:
        positive_rates = [group['predicted_positive_rate'] for group in demographic_parity.values()]
        fpr_rates = [group['false_positive_rate'] for group in demographic_parity.values()]
        fnr_rates = [group['false_negative_rate'] for group in demographic_parity.values()]
        
        equity_metrics = {
            'demographic_parity_difference': max(positive_rates) - min(positive_rates),
            'equalized_odds_difference_fpr': max(fpr_rates) - min(fpr_rates),
            'equalized_odds_difference_fnr': max(fnr_rates) - min(fnr_rates),
            'statistical_parity_ratio': min(positive_rates) / max(positive_rates) if max(positive_rates) > 0 else 0
        }
    
    fairness_results['equity_metrics'] = equity_metrics
    fairness_results['technical_features'] = technical_features
    fairness_results['soft_features'] = soft_features
    
    return fairness_results

# Realizar análisis de equidad
fairness_analysis = analyze_fairness_and_bias(
    X_combined, y_combined_true, y_combined_pred, y_combined_proba, feature_names
)

# Visualizar análisis de equidad
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

plot_idx = 0

# 1. Correlaciones features-predicción
if 'correlation_analysis' in fairness_analysis:
    corr_data = fairness_analysis['correlation_analysis']
    features = list(corr_data.keys())[:15]  # Top 15
    pred_corrs = [corr_data[f]['corr_prediction'] for f in features]
    
    axes[plot_idx].barh(range(len(features)), pred_corrs)
    axes[plot_idx].set_yticks(range(len(features)))
    axes[plot_idx].set_yticklabels(features)
    axes[plot_idx].set_xlabel('Correlación con Predicción')
    axes[plot_idx].set_title('Correlación Features-Predicción')
    axes[plot_idx].invert_yaxis()
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 2. Distribución por cuartiles de feature más importante
if 'quartile_analysis' in fairness_analysis and fairness_analysis['quartile_analysis']:
    top_feature = list(fairness_analysis['quartile_analysis'].keys())[0]
    quartile_data = fairness_analysis['quartile_analysis'][top_feature]
    
    quartiles = list(quartile_data.keys())
    true_rates = [quartile_data[q]['positive_rate'] for q in quartiles]
    pred_rates = [quartile_data[q]['predicted_positive_rate'] for q in quartiles]
    
    x = np.arange(len(quartiles))
    width = 0.35
    
    axes[plot_idx].bar(x - width/2, true_rates, width, label='Tasa Real', alpha=0.8)
    axes[plot_idx].bar(x + width/2, pred_rates, width, label='Tasa Predicha', alpha=0.8)
    axes[plot_idx].set_xlabel('Cuartil')
    axes[plot_idx].set_ylabel('Tasa Positiva')
    axes[plot_idx].set_title(f'Tasas por Cuartil - {top_feature}')
    axes[plot_idx].set_xticks(x)
    axes[plot_idx].set_xticklabels(quartiles)
    axes[plot_idx].legend()
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 3. Paridad demográfica
if 'demographic_parity' in fairness_analysis and fairness_analysis['demographic_parity']:
    parity_data = fairness_analysis['demographic_parity']
    groups = list(parity_data.keys())
    positive_rates = [parity_data[g]['predicted_positive_rate'] for g in groups]
    
    colors = plt.cm.viridis(np.linspace(0, 1, len(groups)))
    axes[plot_idx].bar(range(len(groups)), positive_rates, color=colors, alpha=0.8)
    axes[plot_idx].set_xlabel('Grupo Demográfico')
    axes[plot_idx].set_ylabel('Tasa Positiva Predicha')
    axes[plot_idx].set_title('Paridad Demográfica por Grupos')
    axes[plot_idx].set_xticks(range(len(groups)))
    axes[plot_idx].set_xticklabels(groups, rotation=45, ha='right')
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 4. Métricas de equidad
if 'equity_metrics' in fairness_analysis and fairness_analysis['equity_metrics']:
    equity_data = fairness_analysis['equity_metrics']
    metrics = list(equity_data.keys())
    values = list(equity_data.values())
    
    colors = ['red' if abs(v) > 0.1 else 'green' for v in values]
    axes[plot_idx].bar(range(len(metrics)), values, color=colors, alpha=0.7)
    axes[plot_idx].set_xlabel('Métrica de Equidad')
    axes[plot_idx].set_ylabel('Valor')
    axes[plot_idx].set_title('Métricas de Equidad\n(Verde: Bueno, Rojo: Problemático)')
    axes[plot_idx].set_xticks(range(len(metrics)))
    axes[plot_idx].set_xticklabels([m.replace('_', '\n') for m in metrics], rotation=45, ha='right')
    axes[plot_idx].axhline(y=0.1, color='orange', linestyle='--', alpha=0.7, label='Umbral preocupante')
    axes[plot_idx].axhline(y=-0.1, color='orange', linestyle='--', alpha=0.7)
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 5. Distribución de características técnicas vs no técnicas
tech_features = fairness_analysis.get('technical_features', [])
soft_features = fairness_analysis.get('soft_features', [])

categories = ['Técnicas', 'No Técnicas']
counts = [len(tech_features), len(soft_features)]

axes[plot_idx].pie(counts, labels=categories, autopct='%1.1f%%', startangle=90)
axes[plot_idx].set_title('Distribución de Tipos de Características')
plot_idx += 1

# 6. Análisis de sesgo en probabilidades
prob_bins = np.linspace(0, 1, 11)
prob_centers = (prob_bins[:-1] + prob_bins[1:]) / 2

# Calcular precisión por bin de probabilidad
precision_by_prob = []
for i in range(len(prob_bins)-1):
    mask = (y_combined_proba >= prob_bins[i]) & (y_combined_proba < prob_bins[i+1])
    if mask.sum() > 0:
        precision = y_combined_true[mask].mean()
        precision_by_prob.append(precision)
    else:
        precision_by_prob.append(0)

axes[plot_idx].plot(prob_centers, precision_by_prob, 'bo-', linewidth=2, markersize=6)
axes[plot_idx].plot([0, 1], [0, 1], 'r--', alpha=0.7, label='Perfecta calibración')
axes[plot_idx].set_xlabel('Probabilidad Predicha')
axes[plot_idx].set_ylabel('Precisión Real')
axes[plot_idx].set_title('Calibración del Modelo')
axes[plot_idx].legend()
axes[plot_idx].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('fairness_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ Análisis de equidad guardado como 'fairness_analysis.png'")


In [None]:
# 5. ANÁLISIS DE ROBUSTEZ Y ESTABILIDAD
print("\n🛡️ ANÁLISIS DE ROBUSTEZ Y ESTABILIDAD DEL MODELO")
print("="*55)

def analyze_model_robustness(model, X_data, y_true, feature_names):
    """Analiza la robustez y estabilidad del modelo"""
    
    robustness_results = {}
    
    # 1. Análisis de estabilidad con muestreo bootstrap
    print("📊 Análisis de estabilidad con bootstrap...")
    
    n_bootstrap = 100
    bootstrap_metrics = []
    
    for i in range(n_bootstrap):
        # Crear muestra bootstrap
        indices = np.random.choice(len(X_data), size=len(X_data), replace=True)
        X_boot = X_data.iloc[indices]
        y_boot = y_true.iloc[indices]
        
        # Predecir con el modelo
        if hasattr(model, 'predict_proba'):
            y_pred_proba = model.predict_proba(X_boot)[:, 1]
            y_pred = (y_pred_proba > 0.5).astype(int)
        else:
            y_pred = model.predict(X_boot)
            y_pred_proba = y_pred  # Para modelos sin probabilidades
        
        # Calcular métricas
        accuracy = (y_pred == y_boot).mean()
        
        if len(np.unique(y_boot)) > 1 and len(np.unique(y_pred)) > 1:
            from sklearn.metrics import f1_score, precision_score, recall_score
            f1 = f1_score(y_boot, y_pred, average='macro')
            precision = precision_score(y_boot, y_pred, average='macro')
            recall = recall_score(y_boot, y_pred, average='macro')
        else:
            f1 = precision = recall = 0
        
        bootstrap_metrics.append({
            'accuracy': accuracy,
            'f1_score': f1,
            'precision': precision,
            'recall': recall
        })
    
    # Calcular estadísticas de estabilidad
    bootstrap_df = pd.DataFrame(bootstrap_metrics)
    stability_stats = {
        'accuracy_mean': bootstrap_df['accuracy'].mean(),
        'accuracy_std': bootstrap_df['accuracy'].std(),
        'accuracy_cv': bootstrap_df['accuracy'].std() / bootstrap_df['accuracy'].mean(),
        'f1_mean': bootstrap_df['f1_score'].mean(),
        'f1_std': bootstrap_df['f1_score'].std(),
        'f1_cv': bootstrap_df['f1_score'].std() / max(bootstrap_df['f1_score'].mean(), 0.001)
    }
    
    robustness_results['bootstrap_stability'] = stability_stats
    robustness_results['bootstrap_metrics'] = bootstrap_df
    
    print(f"  Estabilidad de accuracy: CV = {stability_stats['accuracy_cv']:.3f}")
    print(f"  Estabilidad de F1: CV = {stability_stats['f1_cv']:.3f}")
    
    # 2. Análisis de sensibilidad a perturbaciones
    print("🔀 Análisis de sensibilidad a perturbaciones...")
    
    perturbation_results = {}
    noise_levels = [0.01, 0.05, 0.1, 0.2]
    
    # Predicción baseline
    if hasattr(model, 'predict_proba'):
        baseline_pred = model.predict_proba(X_data)[:, 1]
    else:
        baseline_pred = model.predict(X_data)
    
    for noise_level in noise_levels:
        # Agregar ruido gaussiano
        X_noisy = X_data.copy()
        for col in X_data.columns:
            if X_data[col].dtype in ['float64', 'int64']:
                noise = np.random.normal(0, noise_level * X_data[col].std(), len(X_data))
                X_noisy[col] = X_data[col] + noise
        
        # Predecir con datos ruidosos
        if hasattr(model, 'predict_proba'):
            noisy_pred = model.predict_proba(X_noisy)[:, 1]
        else:
            noisy_pred = model.predict(X_noisy)
        
        # Calcular diferencia
        pred_diff = np.abs(noisy_pred - baseline_pred).mean()
        pred_correlation = np.corrcoef(baseline_pred, noisy_pred)[0, 1]
        
        perturbation_results[noise_level] = {
            'mean_prediction_difference': pred_diff,
            'prediction_correlation': pred_correlation
        }
    
    robustness_results['perturbation_analysis'] = perturbation_results
    
    # 3. Análisis de características influyentes
    print("🎯 Análisis de características más influyentes...")
    
    feature_influence = {}
    sample_size = min(200, len(X_data))
    X_sample = X_data.sample(sample_size, random_state=42)
    
    baseline_sample_pred = model.predict_proba(X_sample)[:, 1] if hasattr(model, 'predict_proba') else model.predict(X_sample)
    
    for feature in feature_names[:10]:  # Top 10 features más importantes
        if feature in X_sample.columns:
            # Perturbar solo esta feature
            X_perturbed = X_sample.copy()
            
            if X_sample[feature].dtype in ['float64', 'int64']:
                # Para features numéricas, agregar ruido
                X_perturbed[feature] = X_sample[feature] + np.random.normal(0, X_sample[feature].std() * 0.1, sample_size)
            else:
                # Para features categóricas, shuffle
                X_perturbed[feature] = np.random.permutation(X_sample[feature])
            
            # Predecir con feature perturbada
            perturbed_pred = model.predict_proba(X_perturbed)[:, 1] if hasattr(model, 'predict_proba') else model.predict(X_perturbed)
            
            # Calcular influencia
            influence = np.abs(perturbed_pred - baseline_sample_pred).mean()
            feature_influence[feature] = influence
    
    robustness_results['feature_influence'] = feature_influence
    
    # 4. Análisis de casos límite
    print("🔍 Análisis de casos límite...")
    
    edge_case_analysis = {}
    
    # Casos con probabilidades cerca del umbral de decisión
    if hasattr(model, 'predict_proba'):
        probabilities = model.predict_proba(X_data)[:, 1]
        
        # Casos cerca del umbral (0.4-0.6)
        near_threshold = np.abs(probabilities - 0.5) < 0.1
        edge_case_analysis['near_threshold_count'] = near_threshold.sum()
        edge_case_analysis['near_threshold_percentage'] = near_threshold.mean()
        
        # Casos con alta confianza pero incorrectos
        high_confidence = np.abs(probabilities - 0.5) > 0.4
        if len(y_true) == len(probabilities):
            predictions = (probabilities > 0.5).astype(int)
            incorrect = predictions != y_true
            high_conf_incorrect = high_confidence & incorrect
            edge_case_analysis['high_confidence_errors'] = high_conf_incorrect.sum()
            edge_case_analysis['high_confidence_error_rate'] = high_conf_incorrect.mean()
    
    robustness_results['edge_case_analysis'] = edge_case_analysis
    
    return robustness_results

# Realizar análisis de robustez
robustness_analysis = analyze_model_robustness(
    model, X_combined, y_combined_true, feature_names
)

# Visualizar análisis de robustez
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

plot_idx = 0

# 1. Distribución de métricas bootstrap
if 'bootstrap_metrics' in robustness_analysis:
    bootstrap_df = robustness_analysis['bootstrap_metrics']
    
    axes[plot_idx].hist(bootstrap_df['accuracy'], bins=20, alpha=0.7, label='Accuracy', density=True)
    axes[plot_idx].hist(bootstrap_df['f1_score'], bins=20, alpha=0.7, label='F1-Score', density=True)
    axes[plot_idx].set_xlabel('Valor de Métrica')
    axes[plot_idx].set_ylabel('Densidad')
    axes[plot_idx].set_title('Distribución de Métricas\n(Bootstrap Stability)')
    axes[plot_idx].legend()
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 2. Coeficiente de variación de métricas
if 'bootstrap_stability' in robustness_analysis:
    stability_data = robustness_analysis['bootstrap_stability']
    metrics = ['accuracy_cv', 'f1_cv']
    cv_values = [stability_data[m] for m in metrics]
    
    colors = ['green' if cv < 0.05 else 'orange' if cv < 0.1 else 'red' for cv in cv_values]
    
    axes[plot_idx].bar(range(len(metrics)), cv_values, color=colors, alpha=0.7)
    axes[plot_idx].set_xlabel('Métrica')
    axes[plot_idx].set_ylabel('Coeficiente de Variación')
    axes[plot_idx].set_title('Estabilidad del Modelo\n(Verde: Estable, Rojo: Inestable)')
    axes[plot_idx].set_xticks(range(len(metrics)))
    axes[plot_idx].set_xticklabels(['Accuracy CV', 'F1 CV'])
    axes[plot_idx].axhline(y=0.05, color='orange', linestyle='--', alpha=0.7, label='Umbral aceptable')
    axes[plot_idx].axhline(y=0.1, color='red', linestyle='--', alpha=0.7, label='Umbral problemático')
    axes[plot_idx].legend()
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 3. Sensibilidad a perturbaciones
if 'perturbation_analysis' in robustness_analysis:
    pert_data = robustness_analysis['perturbation_analysis']
    noise_levels = list(pert_data.keys())
    pred_diffs = [pert_data[level]['mean_prediction_difference'] for level in noise_levels]
    correlations = [pert_data[level]['prediction_correlation'] for level in noise_levels]
    
    axes[plot_idx].plot(noise_levels, pred_diffs, 'ro-', linewidth=2, label='Diferencia Media')
    axes[plot_idx].set_xlabel('Nivel de Ruido')
    axes[plot_idx].set_ylabel('Diferencia en Predicciones')
    axes[plot_idx].set_title('Sensibilidad a Perturbaciones')
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 4. Correlación con perturbaciones
axes[plot_idx].plot(noise_levels, correlations, 'bo-', linewidth=2, label='Correlación')
axes[plot_idx].set_xlabel('Nivel de Ruido')
axes[plot_idx].set_ylabel('Correlación con Baseline')
axes[plot_idx].set_title('Correlación bajo Perturbaciones')
axes[plot_idx].set_ylim([0, 1])
axes[plot_idx].grid(True, alpha=0.3)
plot_idx += 1

# 5. Influencia de características
if 'feature_influence' in robustness_analysis:
    influence_data = robustness_analysis['feature_influence']
    features = list(influence_data.keys())
    influences = list(influence_data.values())
    
    axes[plot_idx].barh(range(len(features)), influences)
    axes[plot_idx].set_yticks(range(len(features)))
    axes[plot_idx].set_yticklabels(features)
    axes[plot_idx].set_xlabel('Influencia (Diferencia Media)')
    axes[plot_idx].set_title('Influencia por Característica')
    axes[plot_idx].invert_yaxis()
    axes[plot_idx].grid(True, alpha=0.3)
    plot_idx += 1

# 6. Análisis de casos límite
if 'edge_case_analysis' in robustness_analysis:
    edge_data = robustness_analysis['edge_case_analysis']
    
    categories = []
    values = []
    
    if 'near_threshold_percentage' in edge_data:
        categories.append('Cerca del\nUmbral')
        values.append(edge_data['near_threshold_percentage'] * 100)
    
    if 'high_confidence_error_rate' in edge_data:
        categories.append('Errores Alta\nConfianza')
        values.append(edge_data['high_confidence_error_rate'] * 100)
    
    if categories:
        colors = ['orange', 'red'][:len(categories)]
        axes[plot_idx].bar(range(len(categories)), values, color=colors, alpha=0.7)
        axes[plot_idx].set_xlabel('Tipo de Caso Límite')
        axes[plot_idx].set_ylabel('Porcentaje (%)')
        axes[plot_idx].set_title('Análisis de Casos Límite')
        axes[plot_idx].set_xticks(range(len(categories)))
        axes[plot_idx].set_xticklabels(categories)
        axes[plot_idx].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('robustness_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ Análisis de robustez guardado como 'robustness_analysis.png'")


In [None]:
# 6. REPORTE EJECUTIVO DE INTELIGENCIA ARTIFICIAL RESPONSABLE
print("\n📋 REPORTE EJECUTIVO - RAI DASHBOARD")
print("="*55)

def generate_rai_executive_report(explainability_analysis, fairness_analysis, robustness_analysis, model_info):
    """Genera un reporte ejecutivo completo de RAI"""
    
    report = {
        'timestamp': pd.Timestamp.now().isoformat(),
        'model_info': model_info,
        'executive_summary': {},
        'detailed_findings': {},
        'recommendations': [],
        'risk_assessment': {},
        'compliance_score': {}
    }
    
    # 1. RESUMEN EJECUTIVO
    print("📊 Generando resumen ejecutivo...")
    
    # Explicabilidad
    explainability_score = 0
    if 'global_importance' in explainability_analysis:
        # Score basado en concentración de importancia
        importance_df = explainability_analysis['global_importance']
        top_5_importance = importance_df.head(5)['importance'].sum()
        total_importance = importance_df['importance'].sum()
        concentration_ratio = top_5_importance / total_importance if total_importance > 0 else 0
        
        if concentration_ratio > 0.8:
            explainability_score = 85  # Muy concentrado, fácil explicar
        elif concentration_ratio > 0.6:
            explainability_score = 70  # Moderadamente concentrado
        else:
            explainability_score = 50  # Muy distribuido, difícil explicar
    
    # Equidad
    fairness_score = 90  # Baseline alto
    if 'equity_metrics' in fairness_analysis and fairness_analysis['equity_metrics']:
        equity_metrics = fairness_analysis['equity_metrics']
        
        for metric, value in equity_metrics.items():
            if abs(value) > 0.2:
                fairness_score -= 30
            elif abs(value) > 0.1:
                fairness_score -= 15
            elif abs(value) > 0.05:
                fairness_score -= 5
    
    # Robustez
    robustness_score = 80  # Baseline
    if 'bootstrap_stability' in robustness_analysis:
        stability_stats = robustness_analysis['bootstrap_stability']
        
        # Penalizar alta variabilidad
        if stability_stats['accuracy_cv'] > 0.1:
            robustness_score -= 30
        elif stability_stats['accuracy_cv'] > 0.05:
            robustness_score -= 15
        
        if stability_stats['f1_cv'] > 0.1:
            robustness_score -= 20
        elif stability_stats['f1_cv'] > 0.05:
            robustness_score -= 10
    
    # Score general RAI
    overall_rai_score = (explainability_score + fairness_score + robustness_score) / 3
    
    report['executive_summary'] = {
        'overall_rai_score': overall_rai_score,
        'explainability_score': explainability_score,
        'fairness_score': fairness_score,
        'robustness_score': robustness_score,
        'rai_level': 'EXCELENTE' if overall_rai_score >= 80 else 'BUENO' if overall_rai_score >= 65 else 'ACEPTABLE' if overall_rai_score >= 50 else 'REQUIERE MEJORAS'
    }
    
    # 2. HALLAZGOS DETALLADOS
    detailed_findings = {}
    
    # Explicabilidad
    if 'global_importance' in explainability_analysis:
        importance_df = explainability_analysis['global_importance']
        detailed_findings['explainability'] = {
            'top_3_features': importance_df.head(3)['feature'].tolist(),
            'features_for_80_percent': len(importance_df[importance_df['importance'].cumsum() / importance_df['importance'].sum() <= 0.8]),
            'importance_concentration': concentration_ratio
        }
    
    # Equidad
    if 'demographic_parity' in fairness_analysis and fairness_analysis['demographic_parity']:
        parity_data = fairness_analysis['demographic_parity']
        positive_rates = [group['predicted_positive_rate'] for group in parity_data.values()]
        detailed_findings['fairness'] = {
            'max_group_disparity': max(positive_rates) - min(positive_rates),
            'number_of_groups_analyzed': len(parity_data),
            'groups_with_high_disparity': sum(1 for group in parity_data.values() if abs(group['predicted_positive_rate'] - np.mean(positive_rates)) > 0.1)
        }
    
    # Robustez
    if 'bootstrap_stability' in robustness_analysis:
        stability_stats = robustness_analysis['bootstrap_stability']
        detailed_findings['robustness'] = {
            'accuracy_stability_cv': stability_stats['accuracy_cv'],
            'f1_stability_cv': stability_stats['f1_cv'],
            'model_stability_level': 'ALTA' if max(stability_stats['accuracy_cv'], stability_stats['f1_cv']) < 0.05 else 'MEDIA' if max(stability_stats['accuracy_cv'], stability_stats['f1_cv']) < 0.1 else 'BAJA'
        }
    
    report['detailed_findings'] = detailed_findings
    
    # 3. RECOMENDACIONES
    recommendations = []
    
    # Recomendaciones de explicabilidad
    if explainability_score < 70:
        recommendations.append({
            'category': 'Explicabilidad',
            'priority': 'ALTA',
            'recommendation': 'Simplificar el modelo o implementar técnicas de interpretabilidad local para mejorar la explicabilidad',
            'action_items': ['Implementar LIME o SHAP para explicaciones locales', 'Considerar modelos más simples', 'Documentar las características más importantes']
        })
    
    # Recomendaciones de equidad
    if fairness_score < 70:
        recommendations.append({
            'category': 'Equidad',
            'priority': 'ALTA',
            'recommendation': 'Abordar sesgos detectados en el modelo mediante técnicas de mitigación',
            'action_items': ['Rebalancear datos de entrenamiento', 'Implementar restricciones de equidad', 'Monitorear métricas de equidad en producción']
        })
    
    # Recomendaciones de robustez
    if robustness_score < 70:
        recommendations.append({
            'category': 'Robustez',
            'priority': 'MEDIA',
            'recommendation': 'Mejorar la estabilidad del modelo mediante técnicas de regularización',
            'action_items': ['Aumentar datos de entrenamiento', 'Aplicar técnicas de regularización', 'Implementar validación cruzada robusta']
        })
    
    # Recomendaciones generales
    if overall_rai_score >= 80:
        recommendations.append({
            'category': 'General',
            'priority': 'BAJA',
            'recommendation': 'Mantener monitoreo continuo y documentación de RAI',
            'action_items': ['Implementar monitoreo en tiempo real', 'Crear documentación de RAI', 'Establecer revisiones periódicas']
        })
    
    report['recommendations'] = recommendations
    
    # 4. EVALUACIÓN DE RIESGOS
    risk_level = 'BAJO'
    risk_factors = []
    
    if explainability_score < 50:
        risk_factors.append('Modelo difícil de explicar')
        risk_level = 'ALTO'
    elif explainability_score < 70:
        risk_factors.append('Explicabilidad limitada')
        if risk_level == 'BAJO':
            risk_level = 'MEDIO'
    
    if fairness_score < 50:
        risk_factors.append('Sesgos significativos detectados')
        risk_level = 'ALTO'
    elif fairness_score < 70:
        risk_factors.append('Posibles sesgos menores')
        if risk_level == 'BAJO':
            risk_level = 'MEDIO'
    
    if robustness_score < 50:
        risk_factors.append('Modelo inestable')
        risk_level = 'ALTO'
    elif robustness_score < 70:
        risk_factors.append('Estabilidad moderada')
        if risk_level == 'BAJO':
            risk_level = 'MEDIO'
    
    report['risk_assessment'] = {
        'overall_risk_level': risk_level,
        'risk_factors': risk_factors,
        'mitigation_priority': 'INMEDIATA' if risk_level == 'ALTO' else 'CORTO_PLAZO' if risk_level == 'MEDIO' else 'LARGO_PLAZO'
    }
    
    # 5. PUNTUACIÓN DE CUMPLIMIENTO
    compliance_score = {
        'gdpr_compliance': 85 if explainability_score > 70 else 60,  # Derecho a explicación
        'ai_ethics_score': overall_rai_score,
        'regulatory_readiness': 90 if overall_rai_score > 75 and len(risk_factors) == 0 else 70 if overall_rai_score > 60 else 50
    }
    
    report['compliance_score'] = compliance_score
    
    return report

# Generar reporte RAI
rai_report = generate_rai_executive_report(
    explainability_analysis, 
    fairness_analysis, 
    robustness_analysis,
    {
        'model_name': training_info['best_model_name'],
        'model_version': registered_model.version,
        'feature_count': len(feature_names),
        'training_samples': len(X_combined)
    }
)

# Visualizar dashboard ejecutivo RAI
fig, axes = plt.subplots(3, 3, figsize=(20, 15))
axes = axes.flatten()

# 1. Score general RAI
scores = [
    rai_report['executive_summary']['explainability_score'],
    rai_report['executive_summary']['fairness_score'],
    rai_report['executive_summary']['robustness_score'],
    rai_report['executive_summary']['overall_rai_score']
]
labels = ['Explicabilidad', 'Equidad', 'Robustez', 'RAI General']
colors = ['green' if s >= 80 else 'orange' if s >= 65 else 'red' for s in scores]

axes[0].bar(range(len(labels)), scores, color=colors, alpha=0.8)
axes[0].set_ylabel('Puntuación')
axes[0].set_title('Puntuaciones RAI')
axes[0].set_xticks(range(len(labels)))
axes[0].set_xticklabels(labels, rotation=45)
axes[0].set_ylim([0, 100])
axes[0].grid(True, alpha=0.3)

# Agregar valores en las barras
for i, score in enumerate(scores):
    axes[0].text(i, score + 2, f'{score:.0f}', ha='center', va='bottom', fontweight='bold')

# 2. Nivel de riesgo
risk_level = rai_report['risk_assessment']['overall_risk_level']
risk_colors = {'BAJO': 'green', 'MEDIO': 'orange', 'ALTO': 'red'}
risk_color = risk_colors.get(risk_level, 'gray')

axes[1].pie([1], labels=[f'Riesgo\n{risk_level}'], colors=[risk_color], autopct='', startangle=90)
axes[1].set_title('Nivel de Riesgo General')

# 3. Cumplimiento regulatorio
compliance_data = rai_report['compliance_score']
comp_labels = ['GDPR', 'Ética IA', 'Preparación\nRegulatoria']
comp_values = [compliance_data['gdpr_compliance'], compliance_data['ai_ethics_score'], compliance_data['regulatory_readiness']]

axes[2].bar(range(len(comp_labels)), comp_values, color=['blue', 'purple', 'teal'], alpha=0.7)
axes[2].set_ylabel('Puntuación')
axes[2].set_title('Cumplimiento Regulatorio')
axes[2].set_xticks(range(len(comp_labels)))
axes[2].set_xticklabels(comp_labels)
axes[2].set_ylim([0, 100])
axes[2].grid(True, alpha=0.3)

# 4. Top características más importantes
if 'explainability' in rai_report['detailed_findings']:
    top_features = rai_report['detailed_findings']['explainability']['top_3_features']
    axes[3].barh(range(len(top_features)), [3, 2, 1], color='lightblue', alpha=0.8)
    axes[3].set_yticks(range(len(top_features)))
    axes[3].set_yticklabels(top_features)
    axes[3].set_xlabel('Ranking de Importancia')
    axes[3].set_title('Top 3 Características\nMás Importantes')
    axes[3].invert_yaxis()

# 5. Distribución de recomendaciones por prioridad
if rai_report['recommendations']:
    priorities = [rec['priority'] for rec in rai_report['recommendations']]
    priority_counts = pd.Series(priorities).value_counts()
    
    colors_priority = {'ALTA': 'red', 'MEDIA': 'orange', 'BAJA': 'green'}
    colors = [colors_priority.get(p, 'gray') for p in priority_counts.index]
    
    axes[4].pie(priority_counts.values, labels=priority_counts.index, colors=colors, autopct='%1.0f', startangle=90)
    axes[4].set_title('Distribución de\nRecomendaciones por Prioridad')

# 6. Timeline de implementación sugerido
implementation_timeline = {
    'Inmediato (0-1 mes)': len([r for r in rai_report['recommendations'] if r['priority'] == 'ALTA']),
    'Corto plazo (1-3 meses)': len([r for r in rai_report['recommendations'] if r['priority'] == 'MEDIA']),
    'Largo plazo (3+ meses)': len([r for r in rai_report['recommendations'] if r['priority'] == 'BAJA'])
}

timeline_labels = list(implementation_timeline.keys())
timeline_values = list(implementation_timeline.values())

axes[5].bar(range(len(timeline_labels)), timeline_values, color=['red', 'orange', 'green'], alpha=0.7)
axes[5].set_ylabel('Número de Acciones')
axes[5].set_title('Timeline de Implementación')
axes[5].set_xticks(range(len(timeline_labels)))
axes[5].set_xticklabels(timeline_labels, rotation=45, ha='right')

# 7. Matriz de riesgo vs impacto
risk_impact_data = []
for rec in rai_report['recommendations']:
    priority_to_risk = {'ALTA': 3, 'MEDIA': 2, 'BAJA': 1}
    category_to_impact = {'Explicabilidad': 3, 'Equidad': 3, 'Robustez': 2, 'General': 1}
    
    risk_level = priority_to_risk.get(rec['priority'], 1)
    impact_level = category_to_impact.get(rec['category'], 1)
    risk_impact_data.append((risk_level, impact_level, rec['category']))

if risk_impact_data:
    x_vals = [item[0] for item in risk_impact_data]
    y_vals = [item[1] for item in risk_impact_data]
    colors_scatter = ['red' if x >= 3 else 'orange' if x >= 2 else 'green' for x in x_vals]
    
    axes[6].scatter(x_vals, y_vals, c=colors_scatter, alpha=0.7, s=100)
    axes[6].set_xlabel('Nivel de Riesgo')
    axes[6].set_ylabel('Nivel de Impacto')
    axes[6].set_title('Matriz Riesgo vs Impacto')
    axes[6].set_xlim([0.5, 3.5])
    axes[6].set_ylim([0.5, 3.5])
    axes[6].grid(True, alpha=0.3)

# 8. Tendencia de mejora sugerida
months = ['Mes 1', 'Mes 3', 'Mes 6', 'Mes 12']
current_score = rai_report['executive_summary']['overall_rai_score']
projected_scores = [
    current_score,
    current_score + 10,  # Mejoras rápidas
    current_score + 20,  # Mejoras sustanciales
    min(95, current_score + 25)  # Mejoras a largo plazo
]

axes[7].plot(months, projected_scores, 'bo-', linewidth=3, markersize=8)
axes[7].fill_between(months, projected_scores, alpha=0.3)
axes[7].set_ylabel('Puntuación RAI')
axes[7].set_title('Proyección de Mejora RAI')
axes[7].set_ylim([0, 100])
axes[7].grid(True, alpha=0.3)

# 9. Resumen de estado actual
status_text = f"""
ESTADO ACTUAL RAI

Puntuación General: {rai_report['executive_summary']['overall_rai_score']:.0f}/100
Nivel: {rai_report['executive_summary']['rai_level']}

Riesgo: {rai_report['risk_assessment']['overall_risk_level']}
Recomendaciones: {len(rai_report['recommendations'])}

Cumplimiento GDPR: {rai_report['compliance_score']['gdpr_compliance']:.0f}%
Preparación Regulatoria: {rai_report['compliance_score']['regulatory_readiness']:.0f}%
"""

axes[8].text(0.05, 0.95, status_text, transform=axes[8].transAxes, fontsize=10,
             verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
axes[8].set_xlim([0, 1])
axes[8].set_ylim([0, 1])
axes[8].axis('off')
axes[8].set_title('Resumen Ejecutivo')

plt.tight_layout()
plt.savefig('rai_executive_dashboard.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ Dashboard ejecutivo RAI guardado como 'rai_executive_dashboard.png'")


In [None]:
# 7. REGISTRO EN AZURE ML Y GENERACIÓN DE DOCUMENTACIÓN FINAL
print("\n🔄 REGISTRO RAI EN AZURE ML Y DOCUMENTACIÓN")
print("="*55)

# Crear experimento para RAI
RAI_EXPERIMENT = "candidate-rai-analysis"

with mlflow.start_run(experiment_id=mlflow.create_experiment(RAI_EXPERIMENT) if RAI_EXPERIMENT not in [exp.name for exp in mlflow.search_experiments()] else mlflow.get_experiment_by_name(RAI_EXPERIMENT).experiment_id):
    
    # Registrar métricas RAI en MLflow
    print("📊 Registrando métricas RAI...")
    
    # Puntuaciones principales
    mlflow.log_metric("rai_overall_score", rai_report['executive_summary']['overall_rai_score'])
    mlflow.log_metric("explainability_score", rai_report['executive_summary']['explainability_score'])
    mlflow.log_metric("fairness_score", rai_report['executive_summary']['fairness_score'])
    mlflow.log_metric("robustness_score", rai_report['executive_summary']['robustness_score'])
    
    # Métricas de cumplimiento
    mlflow.log_metric("gdpr_compliance_score", rai_report['compliance_score']['gdpr_compliance'])
    mlflow.log_metric("ai_ethics_score", rai_report['compliance_score']['ai_ethics_score'])
    mlflow.log_metric("regulatory_readiness_score", rai_report['compliance_score']['regulatory_readiness'])
    
    # Métricas de robustez detalladas
    if 'bootstrap_stability' in robustness_analysis:
        stability_stats = robustness_analysis['bootstrap_stability']
        mlflow.log_metric("accuracy_cv", stability_stats['accuracy_cv'])
        mlflow.log_metric("f1_cv", stability_stats['f1_cv'])
        mlflow.log_metric("accuracy_stability_mean", stability_stats['accuracy_mean'])
        mlflow.log_metric("f1_stability_mean", stability_stats['f1_mean'])
    
    # Métricas de equidad
    if 'equity_metrics' in fairness_analysis and fairness_analysis['equity_metrics']:
        equity_metrics = fairness_analysis['equity_metrics']
        for metric_name, value in equity_metrics.items():
            mlflow.log_metric(f"fairness_{metric_name}", value)
    
    # Registrar parámetros
    mlflow.log_param("evaluated_model", training_info['best_model_name'])
    mlflow.log_param("model_version", registered_model.version)
    mlflow.log_param("rai_analysis_date", pd.Timestamp.now().strftime('%Y-%m-%d'))
    mlflow.log_param("risk_level", rai_report['risk_assessment']['overall_risk_level'])
    mlflow.log_param("rai_level", rai_report['executive_summary']['rai_level'])
    mlflow.log_param("recommendations_count", len(rai_report['recommendations']))
    
    # Registrar características más importantes
    if 'explainability' in rai_report['detailed_findings']:
        top_features = rai_report['detailed_findings']['explainability']['top_3_features']
        for i, feature in enumerate(top_features, 1):
            mlflow.log_param(f"top_explainable_feature_{i}", feature)
    
    # Registrar artefactos (visualizaciones)
    print("🖼️ Registrando visualizaciones RAI...")
    mlflow.log_artifact("explainability_analysis.png")
    mlflow.log_artifact("fairness_analysis.png") 
    mlflow.log_artifact("robustness_analysis.png")
    mlflow.log_artifact("rai_executive_dashboard.png")
    
    # Guardar reporte RAI completo como JSON
    rai_report_clean = {}
    for key, value in rai_report.items():
        if isinstance(value, dict):
            rai_report_clean[key] = {k: v for k, v in value.items() if not isinstance(v, pd.DataFrame)}
        else:
            rai_report_clean[key] = value
    
    with open('rai_complete_report.json', 'w') as f:
        json.dump(rai_report_clean, f, indent=2, default=str)
    
    mlflow.log_artifact('rai_complete_report.json')
    
    current_run = mlflow.active_run()
    rai_run_id = current_run.info.run_id
    
    print(f"✅ Análisis RAI registrado en MLflow")
    print(f"📋 Run ID: {rai_run_id}")

# Generar reporte de texto detallado
print("\n📄 Generando documentación RAI detallada...")

rai_documentation = f"""
# REPORTE DE INTELIGENCIA ARTIFICIAL RESPONSABLE (RAI)
## Modelo de Selección de Candidatos

**Fecha de análisis:** {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
**Modelo evaluado:** {training_info['best_model_name']} v{registered_model.version}
**Run ID de análisis:** {rai_run_id}

---

## RESUMEN EJECUTIVO

### Puntuaciones Generales
- **Puntuación RAI General:** {rai_report['executive_summary']['overall_rai_score']:.0f}/100 ({rai_report['executive_summary']['rai_level']})
- **Explicabilidad:** {rai_report['executive_summary']['explainability_score']:.0f}/100
- **Equidad:** {rai_report['executive_summary']['fairness_score']:.0f}/100
- **Robustez:** {rai_report['executive_summary']['robustness_score']:.0f}/100

### Evaluación de Riesgo
- **Nivel de riesgo:** {rai_report['risk_assessment']['overall_risk_level']}
- **Prioridad de mitigación:** {rai_report['risk_assessment']['mitigation_priority']}

### Cumplimiento Regulatorio
- **Cumplimiento GDPR:** {rai_report['compliance_score']['gdpr_compliance']:.0f}%
- **Preparación regulatoria:** {rai_report['compliance_score']['regulatory_readiness']:.0f}%

---

## ANÁLISIS DETALLADO

### 1. EXPLICABILIDAD DEL MODELO
"""

if 'explainability' in rai_report['detailed_findings']:
    explainability_findings = rai_report['detailed_findings']['explainability']
    rai_documentation += f"""
**Características más importantes:**
"""
    for i, feature in enumerate(explainability_findings['top_3_features'], 1):
        rai_documentation += f"{i}. {feature}\n"
    
    rai_documentation += f"""
**Concentración de importancia:** {explainability_findings['importance_concentration']:.2f}
**Características para 80% de explicación:** {explainability_findings['features_for_80_percent']}

**Interpretación:** """
    if explainability_findings['importance_concentration'] > 0.8:
        rai_documentation += "El modelo es altamente explicable con pocas características dominantes."
    elif explainability_findings['importance_concentration'] > 0.6:
        rai_documentation += "El modelo tiene explicabilidad moderada con características importantes bien definidas."
    else:
        rai_documentation += "El modelo distribuye importancia entre muchas características, requiere técnicas adicionales de explicabilidad."

rai_documentation += f"""

### 2. ANÁLISIS DE EQUIDAD
"""

if 'fairness' in rai_report['detailed_findings']:
    fairness_findings = rai_report['detailed_findings']['fairness']
    rai_documentation += f"""
**Grupos analizados:** {fairness_findings['number_of_groups_analyzed']}
**Disparidad máxima entre grupos:** {fairness_findings['max_group_disparity']:.3f}
**Grupos con alta disparidad:** {fairness_findings['groups_with_high_disparity']}

**Interpretación:** """
    if fairness_findings['max_group_disparity'] < 0.05:
        rai_documentation += "El modelo muestra alta equidad entre grupos analizados."
    elif fairness_findings['max_group_disparity'] < 0.1:
        rai_documentation += "El modelo muestra equidad aceptable con disparidades menores."
    else:
        rai_documentation += "Se detectaron disparidades significativas que requieren atención."

rai_documentation += f"""

### 3. ANÁLISIS DE ROBUSTEZ
"""

if 'robustness' in rai_report['detailed_findings']:
    robustness_findings = rai_report['detailed_findings']['robustness']
    rai_documentation += f"""
**Coeficiente de variación - Accuracy:** {robustness_findings['accuracy_stability_cv']:.4f}
**Coeficiente de variación - F1:** {robustness_findings['f1_stability_cv']:.4f}
**Nivel de estabilidad:** {robustness_findings['model_stability_level']}

**Interpretación:** """
    if robustness_findings['model_stability_level'] == 'ALTA':
        rai_documentation += "El modelo muestra alta estabilidad y robustez."
    elif robustness_findings['model_stability_level'] == 'MEDIA':
        rai_documentation += "El modelo tiene estabilidad moderada, aceptable para producción."
    else:
        rai_documentation += "El modelo muestra baja estabilidad y requiere mejoras antes de producción."

rai_documentation += f"""

---

## RECOMENDACIONES

"""

for i, rec in enumerate(rai_report['recommendations'], 1):
    rai_documentation += f"""
### {i}. {rec['category']} (Prioridad: {rec['priority']})
**Recomendación:** {rec['recommendation']}

**Acciones específicas:**
"""
    for action in rec['action_items']:
        rai_documentation += f"- {action}\n"
    rai_documentation += "\n"

if rai_report['risk_assessment']['risk_factors']:
    rai_documentation += f"""
---

## FACTORES DE RIESGO IDENTIFICADOS

"""
    for i, risk_factor in enumerate(rai_report['risk_assessment']['risk_factors'], 1):
        rai_documentation += f"{i}. {risk_factor}\n"

rai_documentation += f"""

---

## CONCLUSIONES Y PRÓXIMOS PASOS

### Estado Actual
El modelo presenta un nivel RAI **{rai_report['executive_summary']['rai_level']}** con una puntuación de {rai_report['executive_summary']['overall_rai_score']:.0f}/100. El riesgo general se clasifica como **{rai_report['risk_assessment']['overall_risk_level']}**.

### Recomendaciones Prioritarias
"""

high_priority_recs = [r for r in rai_report['recommendations'] if r['priority'] == 'ALTA']
if high_priority_recs:
    rai_documentation += f"Se requiere atención inmediata en {len(high_priority_recs)} área(s) crítica(s):\n"
    for rec in high_priority_recs:
        rai_documentation += f"- {rec['category']}: {rec['recommendation']}\n"
else:
    rai_documentation += "No se identificaron áreas que requieran atención inmediata.\n"

rai_documentation += f"""

### Siguientes Pasos
1. **Inmediato (0-1 mes):** Implementar recomendaciones de alta prioridad
2. **Corto plazo (1-3 meses):** Abordar recomendaciones de prioridad media
3. **Largo plazo (3+ meses):** Implementar mejoras adicionales y monitoreo continuo

### Monitoreo Continuo
- Revisar métricas RAI cada 3 meses
- Monitorear equidad en producción
- Actualizar documentación de explicabilidad
- Evaluar nuevos riesgos emergentes

---

**Documento generado automáticamente por el sistema RAI**
**Última actualización:** {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
"""

# Guardar documentación
with open('rai_detailed_report.md', 'w', encoding='utf-8') as f:
    f.write(rai_documentation)

# Registro en MLflow
with mlflow.start_run(run_id=rai_run_id):
    mlflow.log_artifact('rai_detailed_report.md')

print(f"✅ Documentación RAI completa generada")

# Mostrar resumen final
print(f"\n🎯 RESUMEN FINAL DEL ANÁLISIS RAI")
print("="*55)
print(f"📊 Puntuación RAI General: {rai_report['executive_summary']['overall_rai_score']:.0f}/100")
print(f"🏆 Nivel RAI: {rai_report['executive_summary']['rai_level']}")
print(f"⚠️  Nivel de riesgo: {rai_report['risk_assessment']['overall_risk_level']}")
print(f"📋 Recomendaciones generadas: {len(rai_report['recommendations'])}")
print(f"🔄 Run ID RAI: {rai_run_id}")

print(f"\n📁 ARCHIVOS GENERADOS:")
print(f"  - explainability_analysis.png")
print(f"  - fairness_analysis.png") 
print(f"  - robustness_analysis.png")
print(f"  - rai_executive_dashboard.png")
print(f"  - rai_complete_report.json")
print(f"  - rai_detailed_report.md")

print(f"\n✅ ANÁLISIS RAI COMPLETADO")
print(f"🔗 Todos los artefactos disponibles en Azure ML Studio")
print(f"📋 Experimento: {RAI_EXPERIMENT}")

# Guardar métricas finales para reporte
final_rai_metrics = {
    'overall_score': rai_report['executive_summary']['overall_rai_score'],
    'risk_level': rai_report['risk_assessment']['overall_risk_level'],
    'recommendations_count': len(rai_report['recommendations']),
    'high_priority_recommendations': len([r for r in rai_report['recommendations'] if r['priority'] == 'ALTA']),
    'compliance_ready': rai_report['compliance_score']['regulatory_readiness'] >= 70
}

print(f"\n📈 MÉTRICAS FINALES PARA SEGUIMIENTO:")
for metric, value in final_rai_metrics.items():
    print(f"  {metric}: {value}")

print(f"\n🎉 Pipeline RAI completo exitosamente!")
print(f"🔄 Ready para revisión ejecutiva y implementación en producción")
