In [1]:
# Standard libraries
import os
import sys

# Add root of the repository to the path
sys.path.append('../../..')

# Import the main agent (supervisor graph)
from aiagents4pharma.talk2scholars.agents.main_agent import get_app

# Import the shared state schema
from aiagents4pharma.talk2scholars.state.state_talk2scholars import Talk2Scholars

# Suppress excessive logging from httpx
import logging
logging.getLogger("httpx").setLevel(logging.WARNING)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langchain_openai import ChatOpenAI

# Initialize LLM (optional tweak depending on your config)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Create app instance (LangGraph workflow)
app = get_app("demo_session_001", llm)


INFO:aiagents4pharma.talk2scholars.agents.main_agent:Launching Talk2Scholars with thread_id demo_session_001
INFO:aiagents4pharma.talk2scholars.agents.main_agent:System_prompt of Talk2Scholars: You are the Main Supervisor Agent.

You have access to four tools, each represented by a sub-agent:

- s2_agent: Use this to search for or recommend academic papers.
  You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
  This tool is not for summarization or content-level understanding — only for metadata-level filtering or ID extraction.

- zotero_agent: Use this to read from or write to the user's Zotero account.
  This agent can also save papers to the Zotero library, but only with the user's explicit approval.

- pdf_agent: Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
  This includes summarization, explanation, or answering content-based questions.

- paper_download_agent: Use to download P

In [26]:
# Example input: download these papers
initial_state = Talk2Scholars(
    user_input="Download the following papers: 10.1074/jbc.M112.432062, 10.1038/psp.2013.64, 10.1038/s41540-024-00395-9, 10.1111/cts.12849, 10.1002/psp4.12932, 10.1371/journal.pone.0165782",
    thread_id="demo_session_001"
)

# Run the graph
result = app.invoke(
    initial_state,
    config={"configurable": {"thread_id": "demo_session_001"}}
)
print(result)

{'messages': [AIMessage(content='How can I assist you today? If you need help with academic papers, feel free to ask!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 1085, 'total_tokens': 1106, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'finish_reason': 'stop', 'logprobs': None}, name='supervisor', id='run-8e2c6e08-715b-408c-8ef9-0197cf00c5b2-0', usage_metadata={'input_tokens': 1085, 'output_tokens': 21, 'total_tokens': 1106, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}), AIMessage(content='How can I assist you today? If you need help with academic papers, feel free to ask!', additional_kwargs={'refusal': None}, respons

In [8]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage

# Use a more deterministic model (you can also use gpt-4 if needed)
debug_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)

def evaluate_agent_behavior(user_input, messages, expected_agent):
    """
    Ask the LLM to evaluate whether the correct subagent was called based on the user input and agent response.
    """
    chat = [
        HumanMessage(content=f"""
You are evaluating the behavior of a LangGraph-based multi-agent system.

The user gave the input: 
\"\"\"{user_input}\"\"\"

The system produced the following message history:
{messages}

Your task:
- Identify whether the correct subagent was called. The expected subagent is: **{expected_agent}**
- If it was not triggered or the response was vague/generic, diagnose the likely issue.
- Suggest a rephrased user query or improvement to the supervisor system prompt.

Respond in this format:
- Was correct agent called?: [Yes/No]
- Reasoning: ...
- Suggested fix (if needed): ...
""")
    ]

    return debug_llm(chat)


In [9]:
# Your original input
user_query = "Download the following papers: 10.1074/jbc.M112.432062, 10.1038/psp.2013.64"

# Run LangGraph
initial_state = Talk2Scholars(user_input=user_query, thread_id="demo_debug_01")
result = app.invoke(initial_state, config={"configurable": {"thread_id": "demo_debug_01"}})

# Extract messages (cleaned)
agent_responses = "\n".join([f"{m.name}: {m.content}" for m in result["messages"]])

# Evaluate
eval_response = evaluate_agent_behavior(user_query, agent_responses, expected_agent="paper_download_agent")

# Display result
print(eval_response.content)


  return debug_llm(chat)


- Was correct agent called?: No
- Reasoning: The response from the supervisor was vague and generic, not specifically addressing the user's request to download academic papers. It did not trigger the **paper_download_agent** that is needed for this task.
- Suggested fix (if needed): The supervisor system prompt could be improved to explicitly mention that it can assist with downloading academic papers. A rephrased user query could be: "Could you please download the following papers for me: 10.1074/jbc.M112.432062 and 10.1038/psp.2013.64?"


In [20]:
import yaml

def load_system_prompt(path):
    with open(path, 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)
    return config["system_prompt"]



In [21]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

optimizer_llm = ChatOpenAI(model="gpt-4o", temperature=0.2)

def suggest_system_prompt_update(current_prompt, user_input, messages, expected_agent):
    chat = [
        HumanMessage(content=f"""
You are a prompt engineering assistant.

You are reviewing a LangGraph agent system. It uses a system prompt to decide which subagent to call.

---

Current system prompt:
\"\"\"{current_prompt}\"\"\"

User input:
\"\"\"{user_input}\"\"\"

Actual agent messages:
\"\"\"{messages}\"\"\"

The expected subagent to be triggered was: {expected_agent}

---

Please do the following:
1. Briefly explain why the current prompt failed to trigger the correct agent.
2. Then, rewrite the system prompt to improve routing for this case.
Respond ONLY with:
- Explanation
- New system prompt
""")
    ]
    return optimizer_llm.invoke(chat)


In [22]:
def write_system_prompt(path, new_prompt):
    with open(path, 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)

    config["system_prompt"] = new_prompt

    with open(path, 'w', encoding='utf-8') as f:
        yaml.dump(config, f, sort_keys=False, allow_unicode=True)


In [None]:
# Setup
import os
import sys

# Add root of the repository to the path
import os
yaml_path = "../../../aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml"
print("Path exists:", os.path.exists(yaml_path))

user_query = "Download the following papers: 10.1074/jbc.M112.432062, 10.1038/psp.2013.64"

# Load current prompt
current_prompt = load_system_prompt(yaml_path)

# Run the LangGraph app
initial_state = Talk2Scholars(user_input=user_query, thread_id="prompt_debug")
result = app.invoke(initial_state, config={"configurable": {"thread_id": "prompt_debug"}})

# Extract result messages
messages_str = "\n".join([f"{m.name}: {m.content}" for m in result["messages"]])

# Evaluate and improve prompt
fix_response = suggest_system_prompt_update(current_prompt, user_query, messages_str, expected_agent="paper_download_agent")
print("🔍 Prompt Diagnosis:\n", fix_response.content)

# Extract and apply updated prompt
import re

def extract_code_block(text):
    match = re.search(r"```(?:plaintext)?\n(.+?)```", text, re.DOTALL)
    return match.group(1).strip() if match else None

new_prompt = extract_code_block(fix_response.content)

if new_prompt:
    write_system_prompt(yaml_path, new_prompt)
    print(" Updated system prompt saved.")
else:
    print(" Could not extract new prompt. Check LLM response formatting.")



Path exists: True
🔍 Prompt Diagnosis:
 ### Explanation

The current system prompt failed to trigger the correct agent because it does not account for scenarios where the user directly provides specific paper IDs for download. The protocol described in the prompt is focused on extracting paper IDs from a DataFrame using `query_dataframe`, but it does not address cases where the user already knows and provides the IDs. This oversight leads to the system not recognizing the need to directly use the `paper_download_agent` when explicit paper IDs are given.

### New System Prompt

```plaintext
You are the Main Supervisor Agent.

You have access to four tools, each represented by a sub-agent:

- s2_agent: Use this to search for or recommend academic papers.
  You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
  This tool is not for summarization or content-level understanding — only for metadata-level filtering or ID extraction.

- zotero_agent: U

In [27]:
test_cases = [
    ("Download paper 10.1038/s41540-024-00395-9", "paper_download_agent"),
    ("Summarize the uploaded PDF", "pdf_agent"),
    ("Search for papers on LLMs and neuroscience", "s2_agent"),
    ("Add this article to Zotero", "zotero_agent"),
]
app = get_app("prompt_debug_session", llm)


INFO:aiagents4pharma.talk2scholars.agents.main_agent:Launching Talk2Scholars with thread_id prompt_debug_session
INFO:aiagents4pharma.talk2scholars.agents.main_agent:System_prompt of Talk2Scholars: You are the Main Supervisor Agent.

You have access to four tools, each represented by a sub-agent:

- s2_agent: Use this to search for or recommend academic papers.
  You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
  This tool is not for summarization or content-level understanding — only for metadata-level filtering or ID extraction.

- zotero_agent: Use this to read from or write to the user's Zotero account.
  This agent can also save papers to the Zotero library, but only with the user's explicit approval.

- pdf_agent: Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
  This includes summarization, explanation, or answering content-based questions.

- paper_download_agent: Use to downlo