In [5]:
import openml
import pandas as pd
from openml.tasks import TaskType

def get_algorithm_ranking_for_dataset(
    dataset_id,
    metric="accuracy",
):
    """
    Returns a ranking of algorithms for a given dataset
    based on OpenML runs.
    """

    # 1. Obtener tareas del dataset
    tasks = openml.tasks.list_tasks(
        data_id=dataset_id,
        task_type= TaskType.SUPERVISED_CLASSIFICATION,
        output_format="dataframe"
    )

    if tasks.empty:
        return None

    task_ids = tasks.index.tolist()

    # 2. Obtener runs asociados a esas tareas
    runs = openml.runs.list_runs(
        task=task_ids,
        output_format="dataframe"
    )

    if runs.empty:
        return None

    # 3. Filtrar solo runs que tengan la métrica deseada
    runs = runs[runs[f"evaluation_measures.{metric}"].notna()]

    if runs.empty:
        return None

    # # 4. Extraer información relevante
    # df = pd.DataFrame({
    #     "algorithm": runs["flow_name"],
    #     "score": runs[f"evaluation_measures.{metric}"]
    # })

    # 3. Extraer el score de la métrica desde la columna 'evaluations'
    def get_score(row):
        evals = row.get("evaluations", {})
        return evals.get(metric, None)

    runs["score"] = runs.apply(get_score, axis=1)
    
    # Filtrar solo los runs que tengan score
    runs = runs[runs["score"].notna()]
    if runs.empty:
        return None

    # 4. Extraer información relevante
    df = pd.DataFrame({
        "algorithm": runs["flow_name"],
        "score": runs["score"]
    })

    # 5. Agregar: mejor score por algoritmo
    agg = df.groupby("algorithm")["score"].max().reset_index()
    

    # 6. Ranking (mayor score = mejor)
    ranking = agg.sort_values("score", ascending=False).reset_index(drop=True)
    ranking["rank"] = ranking.index + 1

    return ranking


In [6]:
dataset_id = 61  # Iris dataset
ranking = get_algorithm_ranking_for_dataset(dataset_id)

print(ranking.head())


KeyError: 'evaluation_measures.accuracy'