In [18]:
import pandas as pd
import sys
import os

# Add the absolute path to the rag directory
sys.path.append('../')
from backend.app.rag.pipeline import run_rag_pipeline
from backend.app.schemas.rag import RAGRequest



In [19]:
# Create a dataframe with metabolic questions:
manual_test_questions = {
    "Questions": [
        # Producers
        "How is pyruvate produced?",
        "What reactions produce C00022?",
        "How is oxaloacetate synthesized?",
        "Which enzymes produce glucose?",
        "How is lactate generated?",

        # Consumers
        "What consumes glucose?",
        "Which reactions use C00031?",
        "What degrades lactate?",
        "What pathways break down pyruvate?",
        "Describe enzymes that use glucose",

        # Participants
        "Which enzymes act on glucose?",
        "List participants in R00200",
        "What are the reactants in R01070?",
        "enzyme ec 1.1.1.1",
        "participants in map00010",

        # Summary
        "What is glycolysis pathway?",
        "Tell me about C00022",
        "Describe R00200",
        "Overview of hsa00010",
        "What is EC 2.7.1.1?",

        # Unknown / edge
        "about pyruvate",
        "C00022",
        "Hi there",
        "random text with no biology",
        "   ",
    ]
}
test_df = pd.DataFrame(manual_test_questions)

In [25]:
test_df

Unnamed: 0,Questions
0,How is pyruvate produced?
1,What reactions produce C00022?
2,How is oxaloacetate synthesized?
3,Which enzymes produce glucose?
4,How is lactate generated?
5,What consumes glucose?
6,Which reactions use C00031?
7,What degrades lactate?
8,What pathways break down pyruvate?
9,Describe enzymes that use glucose


In [20]:
test_df.iloc[0,0]

'How is pyruvate produced?'

In [21]:
# Create a rag request object for the first question
rag_request = RAGRequest(question=test_df.iloc[0,0])

In [23]:
run_rag_pipeline(rag_request)

RAGResponse(answer='LLM response unavailable. Reason: missing APP_LLM_API_KEY. Question: How is pyruvate produced? | Context length: 387', interpretation=RAGInterpretation(entity_type='compound', entity_id=None, entity_name='pyruvate', intent='producers', confidence=0.7999999999999999), context='Metabolic Graph Context\n\nInterpretation:\n- entity_type: compound\n- intent: producers\n- entity_id: C00022\n- entity_name: pyruvate\n\nCounts:\n- reactions: 1\n- compounds: 1\n- enzymes: 0\n\nReactions:\n- R01196 (pyruvate:ferredoxin 2-oxidoreductase (CoA-acetylating))\n\nCompounds:\n- C00022 (Pyruvate)\n\nTrace IDs:\n- reaction_ids: R01196\n- compound_ids: C00022\n- pathway_ids: none\n- enzyme_ecs: none', reactions=[RAGReactionSummary(reaction_id='R01196', name='pyruvate:ferredoxin 2-oxidoreductase (CoA-acetylating)')], compounds=[RAGCompoundSummary(compound_id='C00022', name='Pyruvate')], enzymes=[], trace=RAGTrace(reaction_ids=['R01196'], compound_ids=['C00022'], pathway_ids=[], enzyme_ec

In [24]:
# Run for all questions
def rag_questions_to_df(questions: list[str]) -> pd.DataFrame:
    rows = []

    for q in questions:
        try:
            resp = run_rag_pipeline(RAGRequest(question=q))
            rows.append(
                {
                    "question": q,
                    "answer": resp.answer,
                    "intent": resp.interpretation.intent,
                    "entity_type": resp.interpretation.entity_type,
                    "entity_id": resp.interpretation.entity_id,
                    "entity_name": resp.interpretation.entity_name,
                    "confidence": resp.interpretation.confidence,
                    "resolved_entity_id": resp.trace.compound_ids[0] if resp.trace.compound_ids else None,
                    "reactions_n": len(resp.reactions),
                    "compounds_n": len(resp.compounds),
                    "enzymes_n": len(resp.enzymes),
                    "reaction_ids": ", ".join(r.reaction_id for r in resp.reactions) or None,
                    "compound_ids": ", ".join(c.compound_id for c in resp.compounds) or None,
                    "enzyme_ecs": ", ".join(resp.enzymes) or None,
                    "error": None,
                }
            )
        except Exception as e:
            rows.append(
                {
                    "question": q,
                    "answer": None,
                    "intent": None,
                    "entity_type": None,
                    "entity_id": None,
                    "entity_name": None,
                    "confidence": None,
                    "resolved_entity_id": None,
                    "reactions_n": None,
                    "compounds_n": None,
                    "enzymes_n": None,
                    "reaction_ids": None,
                    "compound_ids": None,
                    "enzyme_ecs": None,
                    "error": str(e),
                }
            )

    return pd.DataFrame(rows)


In [26]:
answers_df = rag_questions_to_df(test_df["Questions"])



In [27]:
answers_df

Unnamed: 0,question,answer,intent,entity_type,entity_id,entity_name,confidence,resolved_entity_id,reactions_n,compounds_n,enzymes_n,reaction_ids,compound_ids,enzyme_ecs,error
0,How is pyruvate produced?,LLM response unavailable. Reason: missing APP_...,producers,compound,,pyruvate,0.8,C00022,1.0,1.0,0.0,R01196,C00022,,
1,What reactions produce C00022?,LLM response unavailable. Reason: missing APP_...,producers,compound,C00022,,0.8,C00022,1.0,1.0,0.0,R01196,C00022,,
2,How is oxaloacetate synthesized?,LLM response unavailable. Reason: missing APP_...,producers,compound,,oxaloacetate,0.8,C00036,0.0,1.0,0.0,,C00036,,
3,Which enzymes produce glucose?,LLM response unavailable. Reason: missing APP_...,producers,enzyme,,,0.6,,0.0,0.0,0.0,,,,
4,How is lactate generated?,LLM response unavailable. Reason: missing APP_...,producers,compound,,lactate,0.8,,0.0,0.0,0.0,,,,
5,What consumes glucose?,LLM response unavailable. Reason: missing APP_...,consumers,compound,,glucose,0.8,C00267,3.0,1.0,0.0,"R01786, R02189, R09085",C00267,,
6,Which reactions use C00031?,LLM response unavailable. Reason: missing APP_...,summary,compound,C00031,c00031,0.99,,0.0,0.0,0.0,,,,
7,What degrades lactate?,LLM response unavailable. Reason: missing APP_...,consumers,compound,,lactate,0.8,,0.0,0.0,0.0,,,,
8,What pathways break down pyruvate?,LLM response unavailable. Reason: missing APP_...,consumers,compound,,pyruvate,0.65,C00022,4.0,1.0,0.0,"R00014, R00200, R00209, R10866",C00022,,
9,Describe enzymes that use glucose,LLM response unavailable. Reason: missing APP_...,summary,enzyme,,glucose,0.8,,0.0,0.0,0.0,,,,


In [None]:
# Validation checks for Task 3 manual question validation
expected_cases = pd.DataFrame(
    {
        "question": manual_test_questions["Questions"],
        "expected_intent": [
            # Producers
            "producers", "producers", "producers", "producers", "producers",
            # Consumers
            "consumers", "consumers", "consumers", "consumers", "consumers",
            # Participants
            "participants", "participants", "participants", "participants", "participants",
            # Summary
            "summary", "summary", "summary", "summary", "summary",
            # Unknown / edge
            "summary", "summary", "unknown", "unknown", "unknown",
        ],
        "expected_entity_type": [
            # Producers
            "compound", "compound", "compound", "compound", "compound",
            # Consumers
            "compound", "compound", "compound", "compound", "compound",
            # Participants
            "compound", "reaction", "reaction", "enzyme", "pathway",
            # Summary
            "pathway", "compound", "reaction", "pathway", "enzyme",
            # Unknown / edge
            "unknown", "compound", "unknown", "unknown", "unknown",
        ],
        "expect_grounding": [
            # Producers
            True, True, True, True, True,
            # Consumers
            True, True, True, True, True,
            # Participants
            True, True, True, True, True,
            # Summary
            True, True, True, True, True,
            # Unknown / edge
            False, True, False, False, False,
        ],
    }
)

validation_df = answers_df.merge(expected_cases, on="question", how="left")

validation_df["is_valid_input"] = validation_df["error"].isna()
validation_df["is_intent_ok"] = validation_df["intent"] == validation_df["expected_intent"]
validation_df["is_entity_type_ok"] = validation_df["entity_type"] == validation_df["expected_entity_type"]


def _has_grounding(row: pd.Series) -> bool:
    if pd.notna(row["error"]):
        return False
    return any((row["reactions_n"] or 0, row["compounds_n"] or 0, row["enzymes_n"] or 0))


validation_df["is_grounded_ok"] = validation_df.apply(
    lambda row: (not row["expect_grounding"]) or _has_grounding(row), axis=1
)
validation_df["is_pass"] = (
    validation_df["is_intent_ok"]
    & validation_df["is_entity_type_ok"]
    & validation_df["is_grounded_ok"]
    & validation_df["is_valid_input"]
)

valid_rows = validation_df[validation_df["is_valid_input"]]
summary_df = pd.DataFrame(
    {
        "metric": [
            "intent_accuracy",
            "entity_type_accuracy",
            "grounding_pass_rate",
            "overall_pass_rate",
            "overall_pass_rate_valid_inputs",
        ],
        "value": [
            validation_df["is_intent_ok"].mean(),
            validation_df["is_entity_type_ok"].mean(),
            validation_df["is_grounded_ok"].mean(),
            validation_df["is_pass"].mean(),
            valid_rows["is_pass"].mean() if not valid_rows.empty else 0.0,
        ],
    }
)
summary_df["value"] = (summary_df["value"] * 100).round(1)

summary_df



In [None]:
# Show only failed validation rows for quick debugging
failed_cols = [
    "question",
    "expected_intent",
    "intent",
    "expected_entity_type",
    "entity_type",
    "expect_grounding",
    "reactions_n",
    "compounds_n",
    "enzymes_n",
    "error",
    "is_valid_input",
    "is_intent_ok",
    "is_entity_type_ok",
    "is_grounded_ok",
    "is_pass",
]

failed_df = validation_df.loc[~validation_df["is_pass"], failed_cols].copy()
failed_df


