In [1]:
# ruff: noqa: T201, T203
from __future__ import annotations

import os
from pathlib import Path

if Path.cwd().name == "notebooks":
    os.chdir("..")

import logging

import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm

from config.config import RUN_DIR
from config.runs_vars import SUBSET_TO_DATASET_MAP
from src.constants import PAIRS_SEPARATOR
from src.evaluate import (
    analyze_results,
    get_predictions_with_gt,
    plot_usage_histograms,
    read_run_metrics_df,
    store_run_metrics_df,
)
from src.formatting import (
    format_combined_metrics_path,
    format_oracle_pairs_filepath,
    format_oracle_pairs_precomputed_dir,
    format_predictions_run_path,
    format_storing_pathes_from_run_path,
    format_subsets_ontologies_paths,
)
from src.LLM_servers.openai import OpenAIServer
from src.onto_access import OntologyAccess
from src.onto_object import OntologyEntryAttr
from src.processing import parallel_samples_process, save_oracle_pairs_with_prompts, try_load_precomputed_oracle_pairs
from src.prompts.prompts import (
    prompt_direct_entity,
    prompt_direct_entity_ontological,
    prompt_direct_entity_with_synonyms,
    prompt_sequential_hierarchy,
    prompt_sequential_hierarchy_ontological,
    prompt_sequential_hierarchy_with_synonyms,
)
from src.prompts.system import (
    BASELINE_INITIALIZATION_MESSAGE,
    INTUITIVE_NATURAL_LANGUAGE_JUDGEMENT_MESSAGE,
    ONTOLOGY_AWARE_REASONING_MESSAGE,
    SYNONYM_AWARE_MESSAGE,
)
from src.utils import read_oracle_pairs, save_run_results

pd.set_option("display.max_rows", None)
logging.getLogger().setLevel(logging.WARNING)
load_dotenv()

%load_ext autoreload
%autoreload 2

In [2]:
PROMPT_FUNCTIONS_MAP = {
    prompt_function.__name__.replace("prompt_", ""): prompt_function
    for prompt_function in [
        prompt_direct_entity,
        prompt_direct_entity_ontological,
        prompt_sequential_hierarchy,
        prompt_sequential_hierarchy_ontological,
        prompt_direct_entity_with_synonyms,
        prompt_sequential_hierarchy_with_synonyms,
    ]
}

In [3]:
DATASET, SUBSET = "anatomy", "human-mouse"

src_onto_path, tgt_onto_path = format_subsets_ontologies_paths(DATASET, SUBSET)
src_onto_path, tgt_onto_path

(PosixPath('data/anatomy/human-mouse/mouse.owl'),
 PosixPath('data/anatomy/human-mouse/human.owl'))

### Pre-Validation

In [None]:
onto_src = OntologyAccess(src_onto_path, annotate_on_init=True)
onto_tgt = OntologyAccess(tgt_onto_path, annotate_on_init=True)

In [None]:
src_entity, tgt_entity = read_oracle_pairs(format_oracle_pairs_filepath(DATASET, SUBSET))[0]
src_entity, tgt_entity = OntologyEntryAttr(src_entity, onto_src), OntologyEntryAttr(tgt_entity, onto_tgt)

print(src_entity)
print(tgt_entity)

for name, prompt_function in PROMPT_FUNCTIONS_MAP.items():
    print(f"Prompt function: {name}")
    print(prompt_function(src_entity, tgt_entity))
    print("=" * 50)

{'class': mouse.MA_0002684, 'uri': 'http://mouse.owl#MA_0002684', 'preffered_names': {'stomach muscularis mucosa'}, 'synonyms': {'MA_0002684'}, 'all_names': {'stomach muscularis mucosa'}, 'parents': {owl.Thing}, 'children': set()}
{'class': human.NCI_C32658, 'uri': 'http://human.owl#NCI_C32658', 'preffered_names': {'Gastric_Muscularis_Mucosa'}, 'synonyms': {'Gastric Muscularis Mucosa'}, 'all_names': {'Gastric_Muscularis_Mucosa', 'Gastric Muscularis Mucosa'}, 'parents': {human.NCI_C13166, human.NCI_C21599, human.NCI_C12749, human.NCI_C32656, human.NCI_C12219, human.NCI_C33177, human.NCI_C32664, human.NCI_C33149, human.NCI_C32918, human.NCI_C12801, human.NCI_C33904, owl.Thing}, 'children': set()}
Prompt function: direct_entity
We have two entities from different biomedical ontologies.
The first one is "stomach muscularis mucosa", which belongs to the broader category "Thing"
The second one is "Gastric_Muscularis_Mucosa", which belongs to the broader category "Gastric_Mucosa"

Do they mea

In [None]:
for name, prompt_function in PROMPT_FUNCTIONS_MAP.items():
    print(f"Prompt function: {name}")
    oracle_candidate_pairs = try_load_precomputed_oracle_pairs(DATASET, SUBSET, prompt_function)
    print(*oracle_candidate_pairs[0], sep="\n")


Prompt function: direct_entity
http://mouse.owl#MA_0002684
http://human.owl#NCI_C32658
We have two entities from different biomedical ontologies.
The first one is "stomach muscularis mucosa", which belongs to the broader category "Thing"
The second one is "Gastric_Muscularis_Mucosa", which belongs to the broader category "Gastric_Mucosa"

Do they mean the same thing? Respond with "True" or "False".
Prompt function: direct_entity_ontological
http://mouse.owl#MA_0002684
http://human.owl#NCI_C32658
Analyze the following entities, each originating from a distinct biomedical ontology.
Your task is to assess whether they represent the **same ontological concept**, considering both their semantic meaning and hierarchical position.

1. Source entity: "stomach muscularis mucosa"
	- Direct ontological parent: Thing

2. Target entity: "Gastric_Muscularis_Mucosa"
	- Direct ontological parent: Gastric_Mucosa

Are these entities **ontologically equivalent** within their respective ontologies? Respon

### Pre-Processing

In [None]:
ALL_DATASET_NAMES = {
    # "anatomy": ["human-mouse"]j,
    # "bioml-2024": ["snomed-fma.body", "snomed-ncit.neoplas", "snomed-ncit.pharm", "ncit-doid", "omim-ordo"],
    "largebio": ["fma-nci", "fma-snomed", "snomed-nci"],
}
prompt_functions = list(PROMPT_FUNCTIONS_MAP.values())

for dataset_name, set_names in ALL_DATASET_NAMES.items():
    for set_name in set_names:
        try:
            ontologies = []
            for onto_path in tqdm(
                format_subsets_ontologies_paths(dataset_name, set_name),
                desc=f"Loading ontologies: {dataset_name} - {set_name}",
            ):
                ontologies.append(OntologyAccess(onto_path, annotate_on_init=True))

            pairs_path = format_oracle_pairs_filepath(dataset_name, set_name)
            oracle_pairs_dir = format_oracle_pairs_precomputed_dir(dataset_name, set_name)

            pairs_with_prompts_df = save_oracle_pairs_with_prompts(
                pairs_path, *ontologies, prompt_functions, oracle_pairs_dir, PAIRS_SEPARATOR, max_workers=2
            )
        except Exception as e:  # noqa: BLE001
            print(f"Error processing {dataset_name} - {set_name}: {e}")
            continue

### Run experiments


In [None]:
MAX_WORKERS = 100
MODELS = ["gemini-2.0-flash-lite"]  # "gpt-4o-mini"

DATASETS_MAP = {
    "anatomy": ["human-mouse"],
    "bioml-2024": ["omim-ordo"], # ["snomed-fma.body", "snomed-ncit.neoplas", "snomed-ncit.pharm", # "ncit-doid"],
    "largebio": ["fma-nci"], # "fma-snomed", "snomed-nci"]
}
SYSPROMPTS_MAP = {
    "base": BASELINE_INITIALIZATION_MESSAGE,
    "natural_language": INTUITIVE_NATURAL_LANGUAGE_JUDGEMENT_MESSAGE,
    "ontology_aware": ONTOLOGY_AWARE_REASONING_MESSAGE,
    "synonym_aware": SYNONYM_AWARE_MESSAGE,
    "none": None,
}

onto_src, onto_tgt = None, None
EXPERIMENT_TYPE = "syspromts"

In [5]:
for sysprompt_name, sys_prompt in SYSPROMPTS_MAP.items():
    for DATASET in DATASETS_MAP:
        for SUBSET in DATASETS_MAP[DATASET]:
            # Load the ontologies here, if there are no precomputed prompts
            for MODEL in MODELS:
                llm_oracle = (
                    OpenAIServer(
                        api_key=os.environ["GEMINI_API_KEY"],
                        base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
                    )
                    if MODEL.startswith("gemini")
                    else OpenAIServer(api_key=os.environ["OPENAI_API_KEY"])
                )
                llm_oracle.add_system_context(sys_prompt) if sys_prompt else None

                run_path = format_predictions_run_path(DATASET, SUBSET, MODEL, EXPERIMENT_TYPE, exp_spec=sysprompt_name)
                print(f"{run_path=} | {DATASET=} | {SUBSET=} | {MODEL=} | {sysprompt_name=}")

                for prompt_name, prompt_function in PROMPT_FUNCTIONS_MAP.items():
                    oracle_candidate_pairs = try_load_precomputed_oracle_pairs(DATASET, SUBSET, prompt_function)

                    results, tokens_usage, confidences = parallel_samples_process(
                        oracle_candidate_pairs, llm_oracle, onto_src, onto_tgt, MODEL, MAX_WORKERS, prompt_function
                    )
                    prediction_path, stats_path, diagram_path = format_storing_pathes_from_run_path(
                        run_path, SUBSET, MODEL, prompt_name, suffix=""
                    )
                    save_run_results(results, prediction_path, columns=["Source", "Target", "Prediction", "Confidence"])
                    plot_usage_histograms(
                        tokens_usage, confidences, do_plot=False, do_print=False, suptitle=prompt_name
                    )
                    try:
                        predictions = get_predictions_with_gt(run_path, DATASET, SUBSET, MODEL, prompt_name, suffix="")
                        analyze_results(
                            predictions,
                            print_results=False,
                            plot_confusion_matrix=False,
                            subtitle=f"{SUBSET}: {MODEL} {prompt_name} | ",
                            cm_save_path=diagram_path,
                            stats_path=stats_path,
                        )
                    except Exception as e:  # noqa: BLE001
                        print(f"Error: {e}")

                store_run_metrics_df(PROMPT_FUNCTIONS_MAP, run_path, DATASET, SUBSET, MODEL)

run_path=PosixPath('outputs/anatomy/human-mouse/gemini-2.0-flash-lite/syspromts/none') | DATASET='anatomy' | SUBSET='human-mouse' | MODEL='gemini-2.0-flash-lite' | sysprompt_name='none'


Processing Lines prompt_direct_entity: 100%|██████████| 398/398 [00:08<00:00, 44.82it/s]
Processing Lines prompt_direct_entity_ontological: 100%|██████████| 398/398 [00:20<00:00, 19.23it/s]
Processing Lines prompt_sequential_hierarchy: 100%|██████████| 398/398 [00:20<00:00, 19.61it/s]
Processing Lines prompt_sequential_hierarchy_ontological: 100%|██████████| 398/398 [00:12<00:00, 31.10it/s] 
Processing Lines prompt_direct_entity_with_synonyms: 100%|██████████| 398/398 [00:18<00:00, 21.74it/s]
Processing Lines prompt_sequential_hierarchy_with_synonyms: 100%|██████████| 398/398 [00:10<00:00, 37.47it/s]


run_path=PosixPath('outputs/bioml-2024/omim-ordo/gemini-2.0-flash-lite/syspromts/none') | DATASET='bioml-2024' | SUBSET='omim-ordo' | MODEL='gemini-2.0-flash-lite' | sysprompt_name='none'


Processing Lines prompt_direct_entity: 100%|██████████| 1464/1464 [00:32<00:00, 44.60it/s]
Processing Lines prompt_direct_entity_ontological: 100%|██████████| 1464/1464 [00:32<00:00, 44.43it/s]
Processing Lines prompt_sequential_hierarchy: 100%|██████████| 1464/1464 [00:31<00:00, 46.47it/s]
Processing Lines prompt_sequential_hierarchy_ontological: 100%|██████████| 1464/1464 [00:30<00:00, 47.40it/s]
Processing Lines prompt_direct_entity_with_synonyms: 100%|██████████| 1464/1464 [00:33<00:00, 43.94it/s]
Processing Lines prompt_sequential_hierarchy_with_synonyms: 100%|██████████| 1464/1464 [00:41<00:00, 35.10it/s]


run_path=PosixPath('outputs/largebio/fma-nci/gemini-2.0-flash-lite/syspromts/none') | DATASET='largebio' | SUBSET='fma-nci' | MODEL='gemini-2.0-flash-lite' | sysprompt_name='none'


Processing Lines prompt_direct_entity: 100%|██████████| 799/799 [00:21<00:00, 36.53it/s]
Processing Lines prompt_direct_entity_ontological: 100%|██████████| 799/799 [00:22<00:00, 35.67it/s]
Processing Lines prompt_sequential_hierarchy: 100%|██████████| 799/799 [00:23<00:00, 34.08it/s] 
Processing Lines prompt_sequential_hierarchy_ontological: 100%|██████████| 799/799 [00:24<00:00, 32.93it/s] 
Processing Lines prompt_direct_entity_with_synonyms: 100%|██████████| 799/799 [00:23<00:00, 34.25it/s]
Processing Lines prompt_sequential_hierarchy_with_synonyms: 100%|██████████| 799/799 [00:23<00:00, 33.40it/s]


## Analysis


In [56]:
runs_array = []

for directory in sorted(RUN_DIR.iterdir(), key=lambda x: x.name, reverse=True):
    if not directory.is_dir():
        continue
    for file in directory.iterdir():
        if '_' not in file.name:
            continue
        subset_name, model_name = file.name.split('_')[:2]
        dataset_name = SUBSET_TO_DATASET_MAP.get(subset_name)
    runs_array.append((directory.name, model_name, subset_name, dataset_name))

len(runs_array), runs_array

(27,
 [('2025-05-01_11-47-16', 'gemini-2.0-flash', 'snomed-nci', 'largebio'),
  ('2025-05-01_11-37-30', 'gemini-2.0-flash-lite', 'snomed-nci', 'largebio'),
  ('2025-05-01_11-24-31', 'gemini-2.0-flash', 'fma-snomed', 'largebio'),
  ('2025-05-01_11-19-48', 'gemini-2.0-flash-lite', 'fma-snomed', 'largebio'),
  ('2025-05-01_11-15-05',
   'gemini-2.0-flash',
   'snomed-ncit.pharm',
   'bioml-2024'),
  ('2025-05-01_11-12-35',
   'gemini-2.0-flash-lite',
   'snomed-ncit.pharm',
   'bioml-2024'),
  ('2025-05-01_11-07-00',
   'gemini-2.0-flash',
   'snomed-ncit.neoplas',
   'bioml-2024'),
  ('2025-05-01_11-06-39',
   'gemini-2.0-flash-lite',
   'snomed-ncit.neoplas',
   'bioml-2024'),
  ('2025-05-01_10-56-46', 'gemini-2.0-flash', 'snomed-fma.body', 'bioml-2024'),
  ('2025-05-01_10-50-33',
   'gemini-2.0-flash-lite',
   'snomed-fma.body',
   'bioml-2024'),
  ('2025-05-01_10-13-02', 'gemini-2.0-flash-lite', 'fma-snomed', 'largebio'),
  ('2025-04-10_22-11-36', 'gemini-2.0-flash', 'ncit-doid', 'bio

In [None]:
runs = [
    ('2025-05-01_11-47-16', 'gemini-2.0-flash', 'snomed-nci', 'largebio'),
    ('2025-05-01_11-37-30', 'gemini-2.0-flash-lite', 'snomed-nci', 'largebio'),
    ('2025-05-01_11-24-31', 'gemini-2.0-flash', 'fma-snomed', 'largebio'),
    ('2025-05-01_11-19-48', 'gemini-2.0-flash-lite', 'fma-snomed', 'largebio'),
    ('2025-05-01_11-15-05', 'gemini-2.0-flash', 'snomed-ncit.pharm', 'bioml-2024'),
    ('2025-05-01_11-12-35', 'gemini-2.0-flash-lite', 'snomed-ncit.pharm', 'bioml-2024'),
    ('2025-05-01_11-07-00', 'gemini-2.0-flash', 'snomed-ncit.neoplas', 'bioml-2024'),
    ('2025-05-01_11-06-39', 'gemini-2.0-flash-lite', 'snomed-ncit.neoplas', 'bioml-2024'),
    ('2025-05-01_10-56-46', 'gemini-2.0-flash', 'snomed-fma.body', 'bioml-2024'),
    ('2025-05-01_10-50-33', 'gemini-2.0-flash-lite', 'snomed-fma.body', 'bioml-2024'),
    ('2025-05-01_10-13-02', 'gemini-2.0-flash-lite', 'fma-snomed', 'largebio'),
    ('2025-04-10_22-11-36', 'gemini-2.0-flash', 'ncit-doid', 'bioml-2024'),
    ('2025-04-10_00-33-19', 'gemini-2.0-flash-lite', 'ncit-doid', 'bioml-2024'),
    ('2025-04-06_22-05-02', 'gemini-2.0-flash-lite', 'omim-ordo', 'bioml-2024'),
    ('2025-04-06_19-27-41', 'gemini-2.0-flash', 'omim-ordo', 'bioml-2024'),
    ('2025-04-06_14-51-00', 'gemini-2.0-flash-lite', 'fma-nci', 'largebio'),
    ('2025-04-06_14-18-46', 'gemini-2.0-flash', 'fma-nci', 'largebio'),
    ('2025-04-04_19-41-28', 'gemini-2.0-flash', 'human-mouse', 'anatomy'),
    ('2025-04-04_19-23-47', 'gemini-2.0-flash-lite', 'human-mouse', 'anatomy'),
    ('2025-04-04_00-43-16', 'gemini-2.0-flash', 'fma-snomed', 'largebio'),
    ('2025-03-28_10-15-04', 'gpt-4o-mini', 'fma-nci', 'largebio'),
    ('2025-03-19_23-32-20', 'gpt-4o-mini', 'omim-ordo', 'bioml-2024'),
    ('2025-03-19_22-17-52', 'gpt-4o-mini', 'ncit-doid', 'bioml-2024'),
    ('2025-03-19_19-48-41', 'gpt-4o-mini', 'human-mouse', 'anatomy'),
    ('2025-03-14_00-40-55', 'gpt-4o-mini', 'ncit-doid', 'bioml-2024'),
    ('2025-03-12_17-21-53', 'gpt-4o-mini', 'human-mouse', 'anatomy'),
    ('2025-03-12_14-43-32', 'gpt-4o-mini', 'omim-ordo', 'bioml-2024')
 ]

suffix = "_reduced"

dfs = []
for run_subdir, model, set_name, dataset in runs:
    metrics_df = read_run_metrics_df(run_subdir, suffix)
    for col, value in zip(["Dataset", "SubSet", "Model"], [dataset, set_name, model]):
        if col not in metrics_df.columns:
            metrics_df[col] = value
    metrics_df["Run"] = run_subdir
    dfs.append(metrics_df)

results_df = pd.concat(dfs, ignore_index=True)
results_df = results_df.sort_values(by=["Dataset", "SubSet", "Model"])

results_df.to_csv(format_combined_metrics_path(suffix), index=False)
results_df

Unnamed: 0,Experiment,Accuracy,Precision,Recall,F1 Score,Specificity,Sensitivity,Dataset,SubSet,Model,Run
102,direct_entity,0.7905,0.7778,0.9448,0.8532,0.5111,0.9448,anatomy,human-mouse,gemini-2.0-flash,2025-04-04_19-41-28
103,direct_entity_ontological,0.7866,0.7946,0.9018,0.8448,0.5778,0.9018,anatomy,human-mouse,gemini-2.0-flash,2025-04-04_19-41-28
106,direct_entity_with_synonyms,0.8024,0.7729,0.9816,0.8649,0.4778,0.9816,anatomy,human-mouse,gemini-2.0-flash,2025-04-04_19-41-28
104,sequential_hierarchy,0.8024,0.7783,0.9693,0.8634,0.5,0.9693,anatomy,human-mouse,gemini-2.0-flash,2025-04-04_19-41-28
105,sequential_hierarchy_ontological,0.7628,0.7696,0.9018,0.8305,0.5111,0.9018,anatomy,human-mouse,gemini-2.0-flash,2025-04-04_19-41-28
107,sequential_hierarchy_with_synonyms,0.7984,0.7718,0.9755,0.8618,0.4778,0.9755,anatomy,human-mouse,gemini-2.0-flash,2025-04-04_19-41-28
108,direct_entity,0.7826,0.8506,0.8037,0.8265,0.7444,0.8037,anatomy,human-mouse,gemini-2.0-flash-lite,2025-04-04_19-23-47
109,direct_entity_ontological,0.7549,0.8582,0.7423,0.7961,0.7778,0.7423,anatomy,human-mouse,gemini-2.0-flash-lite,2025-04-04_19-23-47
112,direct_entity_with_synonyms,0.747,0.7487,0.9141,0.8232,0.4444,0.9141,anatomy,human-mouse,gemini-2.0-flash-lite,2025-04-04_19-23-47
110,sequential_hierarchy,0.8024,0.7958,0.9325,0.8588,0.5667,0.9325,anatomy,human-mouse,gemini-2.0-flash-lite,2025-04-04_19-23-47


### Rerun evaluation

In [35]:
runs = [
    ('2025-05-01_11-47-16', 'gemini-2.0-flash', 'snomed-nci', 'largebio'),
    ('2025-05-01_11-37-30', 'gemini-2.0-flash-lite', 'snomed-nci', 'largebio'),
    ('2025-05-01_11-24-31', 'gemini-2.0-flash', 'fma-snomed', 'largebio'),
    ('2025-05-01_11-19-48', 'gemini-2.0-flash-lite', 'fma-snomed', 'largebio'),
    ('2025-05-01_11-15-05', 'gemini-2.0-flash', 'snomed-ncit.pharm', 'bioml-2024'),
    ('2025-05-01_11-12-35', 'gemini-2.0-flash-lite', 'snomed-ncit.pharm', 'bioml-2024'),
    ('2025-05-01_11-07-00', 'gemini-2.0-flash', 'snomed-ncit.neoplas', 'bioml-2024'),
    ('2025-05-01_11-06-39', 'gemini-2.0-flash-lite', 'snomed-ncit.neoplas', 'bioml-2024'),
    ('2025-05-01_10-56-46', 'gemini-2.0-flash', 'snomed-fma.body', 'bioml-2024'),
    ('2025-05-01_10-50-33', 'gemini-2.0-flash-lite', 'snomed-fma.body', 'bioml-2024'),
    ('2025-05-01_10-13-02', 'gemini-2.0-flash-lite', 'fma-snomed', 'largebio'),
    ('2025-04-10_22-11-36', 'gemini-2.0-flash', 'ncit-doid', 'bioml-2024'),
    ('2025-04-10_00-33-19', 'gemini-2.0-flash-lite', 'ncit-doid', 'bioml-2024'),
    ('2025-04-06_22-05-02', 'gemini-2.0-flash-lite', 'omim-ordo', 'bioml-2024'),
    ('2025-04-06_19-27-41', 'gemini-2.0-flash', 'omim-ordo', 'bioml-2024'),
    ('2025-04-06_14-51-00', 'gemini-2.0-flash-lite', 'fma-nci', 'largebio'),
    ('2025-04-06_14-18-46', 'gemini-2.0-flash', 'fma-nci', 'largebio'),
    ('2025-04-04_19-41-28', 'gemini-2.0-flash', 'human-mouse', 'anatomy'),
    ('2025-04-04_19-23-47', 'gemini-2.0-flash-lite', 'human-mouse', 'anatomy'),
    ('2025-04-04_00-43-16', 'gemini-2.0-flash', 'fma-snomed', 'largebio'),
    ('2025-03-28_10-15-04', 'gpt-4o-mini', 'fma-nci', 'largebio'),
    ('2025-03-19_23-32-20', 'gpt-4o-mini', 'omim-ordo', 'bioml-2024'),
    ('2025-03-19_22-17-52', 'gpt-4o-mini', 'ncit-doid', 'bioml-2024'),
    ('2025-03-19_19-48-41', 'gpt-4o-mini', 'human-mouse', 'anatomy'),
    ('2025-03-14_00-40-55', 'gpt-4o-mini', 'ncit-doid', 'bioml-2024'),
    ('2025-03-12_17-21-53', 'gpt-4o-mini', 'human-mouse', 'anatomy'),
    ('2025-03-12_14-43-32', 'gpt-4o-mini', 'omim-ordo', 'bioml-2024')
 ]

In [None]:
suffix = "_reduced"
for run_subdir, model, set_name, dataset_name in tqdm(runs[:]):
    run_path = RUN_DIR / run_subdir
    print(f"Run path: {run_path}, dataset: {dataset_name}, set: {set_name}, model: {model}")

    for prompt_name in tqdm(PROMPT_FUNCTIONS_MAP, disable=True):
        try:
            prediction_path, stats_path, diagram_path = format_storing_pathes_from_run_path(
                run_path, set_name, model, prompt_name, suffix
            )
            predictions = get_predictions_with_gt(run_path, dataset_name, set_name, model, prompt_name, suffix)
            analyze_results(
                predictions,
                print_results=False,
                plot_confusion_matrix=False,
                subtitle=f"{set_name}: {model} {prompt_name} | ",
                cm_save_path=diagram_path,
                stats_path=stats_path,
            )
        except Exception as e:  # noqa: BLE001
            print(f"Error: {e!s}")

    store_run_metrics_df(PROMPT_FUNCTIONS_MAP, run_path, dataset_name, set_name, model, suffix)

  0%|          | 0/27 [00:00<?, ?it/s]

Run path: runs/2025-05-01_11-47-16, dataset: largebio, set: snomed-nci, model: gemini-2.0-flash


  4%|▎         | 1/27 [00:07<03:12,  7.39s/it]

Run path: runs/2025-05-01_11-37-30, dataset: largebio, set: snomed-nci, model: gemini-2.0-flash-lite


  7%|▋         | 2/27 [00:12<02:29,  5.99s/it]

Run path: runs/2025-05-01_11-24-31, dataset: largebio, set: fma-snomed, model: gemini-2.0-flash


 11%|█         | 3/27 [00:14<01:44,  4.34s/it]

Run path: runs/2025-05-01_11-19-48, dataset: largebio, set: fma-snomed, model: gemini-2.0-flash-lite


 15%|█▍        | 4/27 [00:17<01:22,  3.57s/it]

Run path: runs/2025-05-01_11-15-05, dataset: bioml-2024, set: snomed-ncit.pharm, model: gemini-2.0-flash


 19%|█▊        | 5/27 [00:19<01:04,  2.95s/it]

Run path: runs/2025-05-01_11-12-35, dataset: bioml-2024, set: snomed-ncit.pharm, model: gemini-2.0-flash-lite


 22%|██▏       | 6/27 [00:20<00:52,  2.51s/it]

Run path: runs/2025-05-01_11-07-00, dataset: bioml-2024, set: snomed-ncit.neoplas, model: gemini-2.0-flash


 26%|██▌       | 7/27 [00:21<00:42,  2.11s/it]

Run path: runs/2025-05-01_11-06-39, dataset: bioml-2024, set: snomed-ncit.neoplas, model: gemini-2.0-flash-lite


 30%|██▉       | 8/27 [00:23<00:35,  1.85s/it]

Run path: runs/2025-05-01_10-56-46, dataset: bioml-2024, set: snomed-fma.body, model: gemini-2.0-flash


 33%|███▎      | 9/27 [00:25<00:33,  1.89s/it]

Run path: runs/2025-05-01_10-50-33, dataset: bioml-2024, set: snomed-fma.body, model: gemini-2.0-flash-lite


 37%|███▋      | 10/27 [00:27<00:32,  1.91s/it]

Run path: runs/2025-05-01_10-13-02, dataset: largebio, set: fma-snomed, model: gemini-2.0-flash-lite


 41%|████      | 11/27 [00:29<00:33,  2.12s/it]

Run path: runs/2025-04-10_22-11-36, dataset: bioml-2024, set: ncit-doid, model: gemini-2.0-flash


 44%|████▍     | 12/27 [00:31<00:28,  1.92s/it]

Run path: runs/2025-04-10_00-33-19, dataset: bioml-2024, set: ncit-doid, model: gemini-2.0-flash-lite


 48%|████▊     | 13/27 [00:32<00:24,  1.77s/it]

Run path: runs/2025-04-06_22-05-02, dataset: bioml-2024, set: omim-ordo, model: gemini-2.0-flash-lite


 52%|█████▏    | 14/27 [00:33<00:20,  1.59s/it]

Run path: runs/2025-04-06_19-27-41, dataset: bioml-2024, set: omim-ordo, model: gemini-2.0-flash


 56%|█████▌    | 15/27 [00:35<00:17,  1.47s/it]

Run path: runs/2025-04-06_14-51-00, dataset: largebio, set: fma-nci, model: gemini-2.0-flash-lite


 59%|█████▉    | 16/27 [00:36<00:14,  1.34s/it]

Run path: runs/2025-04-06_14-18-46, dataset: largebio, set: fma-nci, model: gemini-2.0-flash


 63%|██████▎   | 17/27 [00:37<00:12,  1.26s/it]

Run path: runs/2025-04-04_19-41-28, dataset: anatomy, set: human-mouse, model: gemini-2.0-flash


 67%|██████▋   | 18/27 [00:38<00:10,  1.17s/it]

Run path: runs/2025-04-04_19-23-47, dataset: anatomy, set: human-mouse, model: gemini-2.0-flash-lite


 70%|███████   | 19/27 [00:38<00:08,  1.03s/it]

Run path: runs/2025-04-04_00-43-16, dataset: largebio, set: fma-snomed, model: gemini-2.0-flash


 74%|███████▍  | 20/27 [00:41<00:10,  1.43s/it]

Run path: runs/2025-03-28_10-15-04, dataset: largebio, set: fma-nci, model: gpt-4o-mini


 78%|███████▊  | 21/27 [00:42<00:07,  1.32s/it]

Run path: runs/2025-03-19_23-32-20, dataset: bioml-2024, set: omim-ordo, model: gpt-4o-mini
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_23-32-20/omim-ordo_gpt-4o-mini_direct_entity_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_23-32-20/omim-ordo_gpt-4o-mini_direct_entity_ontological_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_23-32-20/omim-ordo_gpt-4o-mini_sequential_hierarchy_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_23-32-20/omim-ordo_gpt-4o-mini_sequential_hierarchy_ontological_results_reduced.csv'


 81%|████████▏ | 22/27 [00:42<00:05,  1.05s/it]

File not found for direct_entity in runs/2025-03-19_23-32-20
File not found for direct_entity_ontological in runs/2025-03-19_23-32-20
File not found for sequential_hierarchy in runs/2025-03-19_23-32-20
File not found for sequential_hierarchy_ontological in runs/2025-03-19_23-32-20
Run path: runs/2025-03-19_22-17-52, dataset: bioml-2024, set: ncit-doid, model: gpt-4o-mini
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_22-17-52/ncit-doid_gpt-4o-mini_direct_entity_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_22-17-52/ncit-doid_gpt-4o-mini_direct_entity_ontological_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_22-17-52/ncit-doid_gpt-4o-mini_sequential_hierarchy_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_22-17-52/ncit-doid_gpt-4o-mini_sequential_hierarchy_ontological_results_reduced.csv'


 85%|████████▌ | 23/27 [00:43<00:03,  1.12it/s]

File not found for direct_entity in runs/2025-03-19_22-17-52
File not found for direct_entity_ontological in runs/2025-03-19_22-17-52
File not found for sequential_hierarchy in runs/2025-03-19_22-17-52
File not found for sequential_hierarchy_ontological in runs/2025-03-19_22-17-52
Run path: runs/2025-03-19_19-48-41, dataset: anatomy, set: human-mouse, model: gpt-4o-mini
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_19-48-41/human-mouse_gpt-4o-mini_direct_entity_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_19-48-41/human-mouse_gpt-4o-mini_direct_entity_ontological_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_19-48-41/human-mouse_gpt-4o-mini_sequential_hierarchy_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-19_19-48-41/human-mouse_gpt-4o-mini_sequential_hierarchy_ontological_results_reduced.csv'


 89%|████████▉ | 24/27 [00:43<00:02,  1.43it/s]

File not found for direct_entity in runs/2025-03-19_19-48-41
File not found for direct_entity_ontological in runs/2025-03-19_19-48-41
File not found for sequential_hierarchy in runs/2025-03-19_19-48-41
File not found for sequential_hierarchy_ontological in runs/2025-03-19_19-48-41
Run path: runs/2025-03-14_00-40-55, dataset: bioml-2024, set: ncit-doid, model: gpt-4o-mini


 93%|█████████▎| 25/27 [00:44<00:01,  1.27it/s]

Error: [Errno 2] No such file or directory: 'runs/2025-03-14_00-40-55/ncit-doid_gpt-4o-mini_direct_entity_with_synonyms_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-14_00-40-55/ncit-doid_gpt-4o-mini_sequential_hierarchy_with_synonyms_results_reduced.csv'
File not found for direct_entity_with_synonyms in runs/2025-03-14_00-40-55
File not found for sequential_hierarchy_with_synonyms in runs/2025-03-14_00-40-55
Run path: runs/2025-03-12_17-21-53, dataset: anatomy, set: human-mouse, model: gpt-4o-mini


 96%|█████████▋| 26/27 [00:44<00:00,  1.43it/s]

Error: [Errno 2] No such file or directory: 'runs/2025-03-12_17-21-53/human-mouse_gpt-4o-mini_direct_entity_with_synonyms_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-12_17-21-53/human-mouse_gpt-4o-mini_sequential_hierarchy_with_synonyms_results_reduced.csv'
File not found for direct_entity_with_synonyms in runs/2025-03-12_17-21-53
File not found for sequential_hierarchy_with_synonyms in runs/2025-03-12_17-21-53
Run path: runs/2025-03-12_14-43-32, dataset: bioml-2024, set: omim-ordo, model: gpt-4o-mini


100%|██████████| 27/27 [00:45<00:00,  1.69s/it]

Error: [Errno 2] No such file or directory: 'runs/2025-03-12_14-43-32/omim-ordo_gpt-4o-mini_direct_entity_with_synonyms_results_reduced.csv'
Error: [Errno 2] No such file or directory: 'runs/2025-03-12_14-43-32/omim-ordo_gpt-4o-mini_sequential_hierarchy_with_synonyms_results_reduced.csv'
File not found for direct_entity_with_synonyms in runs/2025-03-12_14-43-32
File not found for sequential_hierarchy_with_synonyms in runs/2025-03-12_14-43-32



