# Merge results files from different runs

Sometimes runs were cut short, or new models needed to be added.

This notebook gives utility functions to merge those results.

In [1]:
# path to the project root
root = ".."

# files to merge, with paths relative to the project root
main_data = (
    "output/describe/member/desc_mem_alternative_2025-08-07T06:49:54.859845.json"
)
merge_data = (
    "output/describe/member/desc_mem_alternative_2025-08-08T14:32:01.976894.json"
)

# merge type, either "tasks" or "models"
merge_type = "tasks"  # or "models"

In [None]:
# define the utility function to merge results files!

from typing import Literal, get_args
from src import evaluate_hallucinations
from llm_cgr import load_json, save_json
from src.constants import HallucinationLevel
from src.libraries.load import DEFAULT_DOCUMENTATION_FILE, DEFAULT_PYPI_PACKAGES_FILE

MergeTypes = Literal[
    "tasks",
    "models",
]


def merge_results(
    main_file: str,
    merge_file: str,
    merge_type: MergeTypes,
) -> None:
    """
    Merges two result files, in one of the following ways depending on the `merge_type`:
        - "tasks": when a run was cut short, and we want to add more tasks.
        - "models": when we want to expand the results with more models.
    """
    # open both files
    main_data = load_json(main_file)
    merge_data = load_json(merge_file)

    # assert runs are compatible
    for key in [
        "run_id",
        "hallucination_level",
        "dataset_file",
        "configured_temperature",
        "configured_top_p",
        "configured_max_tokens",
        "system_prompt",
        "post_prompt",
    ]:
        if main_data["metadata"][key] != merge_data["metadata"][key]:
            raise ValueError(f"Cannot merge results with different {key}.")

    # merge the data
    if merge_type == "tasks":
        main_data["generations"].update(merge_data["generations"])
        main_data["errors"].update(merge_data["errors"])

    elif merge_type == "models":
        raise NotImplementedError("Merging models is not implemented yet.")

    else:
        raise ValueError(
            f"Unknown merge type: {merge_type}. Use one of {get_args(MergeTypes)}."
        )

    # merge the metadata
    main_data["metadata"]["end_datetime"] = merge_data["metadata"]["end_datetime"]

    # save the merged data
    save_json(
        data=main_data,
        file_path=main_file,
    )

    # determine the ground truth file based on the hallucination level
    level = HallucinationLevel(main_data["metadata"]["hallucination_level"])
    ground_truth_file = f"../{DEFAULT_PYPI_PACKAGES_FILE if level == HallucinationLevel.LIBRARY else DEFAULT_DOCUMENTATION_FILE}"

    # evaluate the merged hallucinations
    evaluate_hallucinations(
        results_file=main_file,
        ground_truth_file=ground_truth_file,
    )

In [3]:
# do the merge

merge_results(
    main_file=f"{root}/{main_data}",
    merge_file=f"{root}/{merge_data}",
    merge_type=merge_type,
)