In [1]:
import os
import pandas as pd

In [2]:
output_files = os.listdir("./evaluation_1k/")

In [3]:
results = pd.DataFrame(
    columns=[
        "Embedding Method",
        "Supporting documents retrieved",
        "Query Type",
        "Condition skyline (p@k)",
        "LLM",
        "Conditions accuracy",
        "Severity accuracy",
    ],
)

In [4]:
query_types = ["all", "cluster", "vague", "hypochondriac", "basic", "downplay"]

In [5]:
for output_file in output_files:
    # Get the model name and k value from the filename
    output_file_split = output_file.split("_")[0].split("-")
    model = "-".join(output_file_split[2:4]).replace("pt", ".")
    k = output_file_split[4][1:]
    print(model, k)

    # Read the jsonl file
    print(f"Reading './evaluation_1k/{output_file}'")
    data = pd.read_json(f"./evaluation_1k/{output_file}", lines=True)

    for query_type in query_types:
        if query_type == "all":
            sub_data = data
        else:
            # Get data for the specific query type
            sub_data = data[data["query_type"] == query_type]
        condition_match, severity_match, retriever_match = sub_data[
            ["conditions_match", "severity_match", "retriever_match"]
        ].sum(axis=0) / len(sub_data)

        i = len(results)
        results.loc[i, "Supporting documents retrieved"] = k
        results.loc[i, "Query Type"] = query_type
        results.loc[i, "Condition skyline (p@k)"] = retriever_match
        results.loc[i, "LLM"] = model
        results.loc[i, "Conditions accuracy"] = condition_match
        results.loc[i, "Severity accuracy"] = severity_match

deepseek-r1 30
Reading './evaluation_1k/evaluate-rag-deepseek-r1-k30-chroma_2025-04-15_17-24-42.jsonl'
qwen2.5-1.5b 30
Reading './evaluation_1k/evaluate-rag-qwen2pt5-1pt5b-k30-chroma_2025-04-15_15-27-32.jsonl'
qwen2.5-14b 30
Reading './evaluation_1k/evaluate-rag-qwen2pt5-14b-k30-chroma_2025-04-23_10-28-14.jsonl'
qwen2.5-14b 10
Reading './evaluation_1k/evaluate-rag-qwen2pt5-14b-k10-chroma_2025-04-24_08-47-15.jsonl'
qwen2.5-1.5b 10
Reading './evaluation_1k/evaluate-rag-qwen2pt5-1pt5b-k10-chroma_2025-04-15_15-16-40.jsonl'
qwen2.5-7b 10
Reading './evaluation_1k/evaluate-rag-qwen2pt5-7b-k10-chroma_2025-04-23_17-08-57.jsonl'
o3-mini 30
Reading './evaluation_1k/evaluate-rag-o3-mini-k30-chroma_2025-04-15_16-14-48.jsonl'
qwen2.5-7b 30
Reading './evaluation_1k/evaluate-rag-qwen2pt5-7b-k30-chroma_2025-04-23_09-16-04.jsonl'
qwen2.5-7b 30
Reading './evaluation_1k/evaluate-rag-qwen2pt5-7b-k30-chroma_2025-04-23_17-31-22.jsonl'
o3-mini 10
Reading './evaluation_1k/evaluate-rag-o3-mini-k10-chroma_2025-0

In [6]:
results.sort_values(
    by=["Query Type", "Supporting documents retrieved", "LLM"], inplace=True
)
results["Embedding Method"] = "sentence-transformers/all-mpnet-base-v2"

In [7]:
results[results["Query Type"] == "all"].drop(columns=["Query Type"]).to_csv(
    "./2b_results.csv", index=False
)

In [8]:
results.to_csv(
    "./2b_granular.csv",
    index=False,
)

In [9]:
!uv pip install tabulate

md_text = """
## 2b_580a395.md split by query type

"""

for query_type in query_types:
    sub_results = results[results["Query Type"] == query_type]

    # add to markdown
    md_text += f"### {query_type}\n"
    md_text += sub_results.to_markdown(index=False, tablefmt="github", floatfmt=".2f")
    md_text += "\n\n"

with open("./2b_granular.md", "w") as f:
    f.write(md_text)

[2mUsing Python 3.12.9 environment at: /Users/rwood/projects/t0_proj/t0/experiments/t0-001/.venv[0m
[2mAudited [1m1 package[0m [2min 40ms[0m[0m


In [10]:
results[results["Query Type"] == "all"]

Unnamed: 0,Embedding Method,Supporting documents retrieved,Query Type,Condition skyline (p@k),LLM,Conditions accuracy,Severity accuracy
54,sentence-transformers/all-mpnet-base-v2,10,all,0.693387,o3-mini,0.470942,0.423848
24,sentence-transformers/all-mpnet-base-v2,10,all,0.694,qwen2.5-1.5b,0.256,0.083
18,sentence-transformers/all-mpnet-base-v2,10,all,0.694,qwen2.5-14b,0.35,0.384
66,sentence-transformers/all-mpnet-base-v2,10,all,0.694,qwen2.5-32b,0.42,0.408
30,sentence-transformers/all-mpnet-base-v2,10,all,0.694,qwen2.5-7b,0.343,0.275
0,sentence-transformers/all-mpnet-base-v2,30,all,0.828,deepseek-r1,0.44,0.42
36,sentence-transformers/all-mpnet-base-v2,30,all,0.825651,o3-mini,0.482966,0.455912
6,sentence-transformers/all-mpnet-base-v2,30,all,0.828,qwen2.5-1.5b,0.158,0.053
12,sentence-transformers/all-mpnet-base-v2,30,all,0.827,qwen2.5-14b,0.289,0.355
60,sentence-transformers/all-mpnet-base-v2,30,all,0.828,qwen2.5-14b,0.278,0.36
