In [None]:
# Verificar paquetes necesarios
!pip show azure-ai-ml


In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()


In [None]:
# Conectar al workspace
ml_client = MLClient.from_config(credential=credential)
print(f"Conectado al workspace: {ml_client.workspace_name}")


In [None]:
# CAMBIAR AQUÍ EL NOMBRE DE TU MODELO
MODELO_SELECCIONADO = "model_RandomForest"  # Cambia por: model_SVM, model_LogisticRegression, model_GradientBoosting, etc.

# Verificar que el modelo existe
try:
    modelo = ml_client.models.get(name=MODELO_SELECCIONADO, version="1")
    print(f"✅ Modelo encontrado: {modelo.name} v{modelo.version}")
    print(f"📝 Descripción: {modelo.description}")
    print(f"🏷️ Tipo: {modelo.type}")
except Exception as e:
    print(f"❌ Error: No se encontró el modelo {MODELO_SELECCIONADO}")
    print("Modelos disponibles:")
    for model in ml_client.models.list():
        print(f"  - {model.name}")


In [None]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import os

# CAMBIAR AQUÍ LAS RUTAS A TUS DATOS
# Usa los mismos datos que usaste para entrenar el modelo
RUTA_DATOS_TRAIN = "train_data.csv"  # Cambiar por tu archivo
RUTA_DATOS_TEST = "test_data.csv"    # Cambiar por tu archivo
COLUMNA_TARGET = "target"            # Cambiar por tu columna objetivo

# Si tienes los datos en otro formato, cámbialos aquí
print("📁 Archivos disponibles en el directorio actual:")
for file in os.listdir("."):
    if file.endswith(('.csv', '.parquet', '.json')):
        print(f"  - {file}")


In [None]:
# Cargar y convertir datos a Parquet (requerido para RAI)
try:
    # Cargar datos
    df_train = pd.read_csv(RUTA_DATOS_TRAIN)
    df_test = pd.read_csv(RUTA_DATOS_TEST)
    
    print(f"✅ Datos cargados:")
    print(f"  📊 Train: {df_train.shape}")
    print(f"  📊 Test: {df_test.shape}")
    print(f"  🎯 Columna target: {COLUMNA_TARGET}")
    
    # Verificar que la columna target existe
    if COLUMNA_TARGET not in df_train.columns:
        print(f"❌ Error: Columna '{COLUMNA_TARGET}' no encontrada")
        print(f"Columnas disponibles: {list(df_train.columns)}")
    else:
        print(f"✅ Columna target encontrada")
        
    # Mostrar preview
    print("\n📋 Preview de los datos:")
    display(df_train.head())
    
except Exception as e:
    print(f"❌ Error cargando datos: {e}")
    print("\n💡 Asegúrate de que las rutas de los archivos sean correctas")


In [None]:
# Crear directorios y convertir a Parquet
os.makedirs("rai-train-data", exist_ok=True)
os.makedirs("rai-test-data", exist_ok=True)

# Convertir a Parquet
table_train = pa.Table.from_pandas(df_train)
table_test = pa.Table.from_pandas(df_test)

pq.write_table(table_train, "rai-train-data/data.parquet", version="1.0")
pq.write_table(table_test, "rai-test-data/data.parquet", version="1.0")

print("✅ Datos convertidos a formato Parquet")


In [None]:
# Crear archivos MLTable (requeridos para RAI)

# MLTable para datos de entrenamiento
mltable_train = '''
type: mltable
paths:
  - pattern: ./*.parquet
transformations:
  - read_parquet
'''

with open("rai-train-data/MLTable", "w") as f:
    f.write(mltable_train)

# MLTable para datos de test
mltable_test = '''
type: mltable
paths:
  - pattern: ./*.parquet
transformations:
  - read_parquet
'''

with open("rai-test-data/MLTable", "w") as f:
    f.write(mltable_test)

print("✅ Archivos MLTable creados")


In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# Nombres para los datasets
train_data_name = f"{MODELO_SELECCIONADO}_train_rai"
test_data_name = f"{MODELO_SELECCIONADO}_test_rai"
data_version = "1"

# Registrar datos de entrenamiento
train_data = Data(
    path="rai-train-data/",
    type=AssetTypes.MLTABLE,
    description=f"RAI training data para {MODELO_SELECCIONADO}",
    name=train_data_name,
    version=data_version,
)
ml_client.data.create_or_update(train_data)

# Registrar datos de test
test_data = Data(
    path="rai-test-data/",
    type=AssetTypes.MLTABLE,
    description=f"RAI test data para {MODELO_SELECCIONADO}",
    name=test_data_name,
    version=data_version,
)
ml_client.data.create_or_update(test_data)

print(f"✅ Datasets registrados:")
print(f"  📊 Train: {train_data_name}")
print(f"  📊 Test: {test_data_name}")


In [None]:
# Conectar al registro de Azure ML para obtener componentes RAI
registry_name = "azureml"
ml_client_registry = MLClient(
    credential=credential,
    subscription_id=ml_client.subscription_id,
    resource_group_name=ml_client.resource_group_name,
    registry_name=registry_name,
)

# Obtener componentes RAI
label = "latest"

rai_constructor_component = ml_client_registry.components.get(
    name="rai_tabular_insight_constructor", label=label
)

version = rai_constructor_component.version
print(f"📦 Versión de componentes RAI: {version}")

rai_erroranalysis_component = ml_client_registry.components.get(
    name="rai_tabular_erroranalysis", version=version
)

rai_explanation_component = ml_client_registry.components.get(
    name="rai_tabular_explanation", version=version
)

rai_gather_component = ml_client_registry.components.get(
    name="rai_tabular_insight_gather", version=version
)

print("✅ Componentes RAI obtenidos")


In [None]:
from azure.ai.ml import Input, dsl, Output
from azure.ai.ml.constants import AssetTypes
import uuid

# Configuración del modelo
model_name = MODELO_SELECCIONADO
expected_model_id = f"{model_name}:1"
azureml_model_id = f"azureml:{expected_model_id}"

# CAMBIAR aquí el nombre de tu cluster de cómputo
compute_name = "aml-cluster"  # Cambia por tu cluster

@dsl.pipeline(
    compute=compute_name,
    description=f"RAI dashboard para {model_name}",
    experiment_name=f"RAI_insights_{model_name}",
)
def rai_pipeline_para_tu_modelo(target_column_name, train_data, test_data):
    # Construir RAI dashboard
    create_rai_job = rai_constructor_component(
        title=f"RAI Dashboard - {model_name}",
        task_type="classification",  # Cambiar a "regression" si es necesario
        model_info=expected_model_id,
        model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),
        train_dataset=train_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
    )
    create_rai_job.set_limits(timeout=300)

    # Agregar análisis de errores
    error_job = rai_erroranalysis_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
    )
    error_job.set_limits(timeout=300)

    # Agregar explicaciones del modelo
    explanation_job = rai_explanation_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        comment=f"Explicaciones para {model_name}", 
    )
    explanation_job.set_limits(timeout=300)

    # Combinar todos los insights
    rai_gather_job = rai_gather_component(
        constructor=create_rai_job.outputs.rai_insights_dashboard,
        insight_3=error_job.outputs.error_analysis,
        insight_4=explanation_job.outputs.explanation,
    )
    rai_gather_job.set_limits(timeout=300)

    rai_gather_job.outputs.dashboard.mode = "upload"

    return {
        "dashboard": rai_gather_job.outputs.dashboard,
    }

print(f"✅ Pipeline RAI definido para {model_name}")


In [None]:
from azure.ai.ml import Input

# Preparar inputs del pipeline
diabetes_train_input = Input(
    type="mltable",
    path=f"azureml:{train_data_name}:{data_version}",
    mode="download",
)

diabetes_test_input = Input(
    type="mltable",
    path=f"azureml:{test_data_name}:{data_version}",
    mode="download",
)

# Crear instancia del pipeline
insights_pipeline_job = rai_pipeline_para_tu_modelo(
    target_column_name=COLUMNA_TARGET,
    train_data=diabetes_train_input,
    test_data=diabetes_test_input,
)

# Configurar output
rand_path = str(uuid.uuid4())
insights_pipeline_job.outputs.dashboard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/dashboard/",
    mode="upload",
    type="uri_folder",
)

print("✅ Pipeline configurado y listo para ejecutar")


In [None]:
from azure.ai.ml.entities import PipelineJob
from IPython.core.display import HTML
from IPython.display import display
import time

def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    print("🚀 Pipeline enviado. Puedes seguir el progreso en:")
    display(HTML(f'<a href="{created_job.studio_url}" target="_blank">{created_job.studio_url}</a>'))

    while created_job.status not in [
        "Completed",
        "Failed",
        "Canceled",
        "NotResponding",
    ]:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print(f"📊 Estado actual: {created_job.status}")
        
    if created_job.status == "Completed":
        print("✅ Pipeline completado exitosamente!")
        print("🎯 Ve al Azure ML Studio para ver tu dashboard RAI")
    else:
        print(f"❌ Pipeline terminó con estado: {created_job.status}")
        
    return created_job

# EJECUTAR EL PIPELINE
print(f"🎯 Ejecutando RAI para el modelo: {MODELO_SELECCIONADO}")
print(f"📊 Usando datos con columna target: {COLUMNA_TARGET}")
print("\n⏳ Esto puede tomar varios minutos...")

insights_job = submit_and_wait(ml_client, insights_pipeline_job)
