# Agent Class

In [1]:
import operator
from typing import List, Annotated, TypedDict
from langchain_core.messages import AnyMessage, SystemMessage, ToolMessage, AIMessage
from langgraph.graph import StateGraph, START, END


class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]
class Agent:

    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges(
            "llm",
            self.exists_action,
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile()
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        want_tools = isinstance(result, AIMessage) and bool(getattr(result, "tool_calls", None))
        return  want_tools

    async def call_openai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = await self.model.ainvoke(messages) # aynchronous invoke
        return {'messages': [message]}

    async def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            if not t['name'] in self.tools:      # check for bad tool name from LLM
                print("\n ....bad tool name....")
                result = "bad tool name, retry"  # instruct LLM to retry if bad
            else:
                result = await self.tools[t['name']].ainvoke(t['args']) # aynchronous invoke
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}

# MCP Connection

In [2]:
from mcp import ClientSession
from mcp.client.sse import sse_client

In [8]:
class MCPConnectionManager:
    def __init__(self):
        self.session = None
        self.client = None
        self._initialized = False
    
    async def initialize(self):
        if self._initialized:
            return self.session

        # Create connection that stays alive
        self.client = sse_client("http://127.0.0.1:8787/sse")
        read_stream, write_stream = await self.client.__aenter__()
        self.session = ClientSession(read_stream, write_stream)
        await self.session.__aenter__()
        await self.session.initialize()
        self._initialized = True
        return self.session

# Workflow Demo: 
## Topic Not Found → arXiv Search → Download → Query

In [9]:
# Initialize MCP Connection and Create Agent with Tools
from langchain_openai import ChatOpenAI
from langchain.tools import StructuredTool
from langchain_core.messages import HumanMessage
from pydantic import BaseModel, Field
from typing import Optional, Any
import json

# Connect to MCP Server
mcp_manager = MCPConnectionManager()
session = await mcp_manager.initialize()
print("✓ Connected to MCP Server")

# Get tools from MCP
tools_response = await session.list_tools()

def create_mcp_tool_wrapper(tool_info):
    """Create a closure to capture the tool correctly"""
    tool_name = tool_info.name
    async def mcp_tool_wrapper(**kwargs):
        result = await session.call_tool(tool_name, arguments=kwargs)
        if result.content:
            return result.content[0].text
        return "No content returned"
    return mcp_tool_wrapper

# Convert MCP tools to LangChain StructuredTools
tools = []
for tool in tools_response.tools:
    field_definitions = {}
    if tool.inputSchema and "properties" in tool.inputSchema:
        for prop_name, prop_info in tool.inputSchema["properties"].items():
            field_type = str if prop_info.get("type") == "string" else Any
            required = prop_name in tool.inputSchema.get("required", [])
            if required:
                field_definitions[prop_name] = (field_type, Field(description=prop_info.get("description", "")))
            else:
                field_definitions[prop_name] = (Optional[field_type], Field(default=None, description=prop_info.get("description", "")))
    
    ArgsModel = type(f"{tool.name}Args", (BaseModel,), {
        "__annotations__": {k: v[0] for k, v in field_definitions.items()}, 
        **{k: v[1] for k, v in field_definitions.items()}
    })
    
    structured_tool = StructuredTool(
        name=tool.name,
        description=tool.description,
        coroutine=create_mcp_tool_wrapper(tool),
        args_schema=ArgsModel
    )
    tools.append(structured_tool)

print(f"✓ Loaded {len(tools)} tools: {[t.name for t in tools]}")

# Create LLM and Agent
llm = ChatOpenAI(model="gpt-4o-mini")

workflow_system_prompt = """You are a research assistant that helps users find information from academic papers.

You have access to these tools:
1. research_paper_probe - Search the local RAG knowledge base of research papers
2. search_arxiv - Search arXiv for new academic papers  
3. download_paper - Download papers from arXiv and add them to the knowledge base

WORKFLOW:
1. When a user asks about a topic, FIRST use research_paper_probe to check the local knowledge base.
2. If NO relevant results are found (low confidence or empty sources), inform the user and ASK if they want you to search arXiv for papers on this topic.
3. If the user says yes, use search_arxiv to find relevant papers. Present the results as a numbered list with title, authors, year, and a brief abstract summary.
4. Ask the user which papers (up to 3) they would like to add to the collection.
5. When the user selects papers, use download_paper for each selected paper to download and index them.
6. After downloading, use research_paper_probe again to answer the original question using the newly added papers.

Always cite your sources with paper titles and years."""

agent = Agent(llm, tools, system=workflow_system_prompt)
print("✓ Agent created with workflow system prompt")

✓ Connected to MCP Server
✓ Loaded 3 tools: ['research_paper_probe', 'search_arxiv', 'download_paper']
✓ Agent created with workflow system prompt


Error in sse_reader
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-packages\httpx\_transports\default.py", line 101, in map_httpcore_exceptions
    yield
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-packages\httpx\_transports\default.py", line 271, in __aiter__
    async for part in self._httpcore_stream:
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-packages\httpcore\_async\connection_pool.py", line 407, in __aiter__
    raise exc from None
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-packages\httpcore\_async\connection_pool.py", line 403, in __aiter__
    async for part in self._stream:
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-packages\httpcore\_async\http11.py", line 342, in __aiter__
    raise exc
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-packages\httpcore\_async\http11.py", line 334, in __aiter__
    async for chunk in self._connection._receive_response_body(**kwargs):
  File "c:\Users\User\anaconda3\envs\PT\Lib\site-p

In [25]:
# STEP 1: Ask about a topic NOT in the knowledge base
query = "What are the latest advances in using LLMs and AI agents for drug discovery?"
messages = [HumanMessage(content=query)]

result = await agent.graph.ainvoke({"messages": messages})
print("=" * 60)
print("STEP 1: Initial Query (Topic Not Found)")
print("=" * 60)
print(f"User: {query}")
print("-" * 60)
print(f"Agent: {result['messages'][-1].content}")

Calling: {'name': 'research_paper_probe', 'args': {'query': 'latest advances in using LLMs and AI agents for drug discovery'}, 'id': 'call_LkIKMruscTI9Bk8I1mWrmP1u', 'type': 'tool_call'}
Back to the model!
WORKFLOW STEP 1: Initial Query
User: What are the latest advances in using LLMs and AI agents for drug discovery?
------------------------------------------------------------
Agent: I couldn't find specific papers on the latest advances in using LLMs (Large Language Models) and AI agents for drug discovery in the local knowledge base. Would you like me to search arXiv for recent academic papers on this topic?


In [26]:
# STEP 2: User says YES → Agent searches arXiv
messages = result['messages'] + [HumanMessage(content="Yes, please search for papers on LLM agents drug discovery")]

result = await agent.graph.ainvoke({"messages": messages})
print("=" * 60)
print("STEP 2: Search arXiv")
print("=" * 60)
print("User: Yes, please search for papers on LLM agents drug discovery")
print("-" * 60)
print(f"Agent: {result['messages'][-1].content}")

Calling: {'name': 'search_arxiv', 'args': {'query': 'LLM agents drug discovery', 'subject': 'Artificial Intelligence'}, 'id': 'call_8XvtXDmEdNvi3TlGnx0ffyqH', 'type': 'tool_call'}
Back to the model!
WORKFLOW STEP 2: Search arXiv
User: Yes, please search for papers on LLM agents drug discovery
------------------------------------------------------------
Agent: Here are some recent academic papers related to LLMs and AI agents in the context of drug discovery:

1. **[RAG-Enhanced Collaborative LLM Agents for Drug Discovery](http://arxiv.org/abs/2502.17506v3)**  
   - **Authors:** Namkyeong Lee, Edward De Brouwer, Ehsan Hajiramezanali, Tommaso Biancalani, Chanyoung Park  
   - **Year:** 2025  
   - **Abstract:** This paper discusses the potential of large language models (LLMs) in drug discovery and addresses the challenges posed by the specialized nature of biochemical data, which often requires costly domain-specific fine-tuning. The authors propose RAG-enhanced collaborative LLM agents

In [27]:
# STEP 3: User selects paper → Agent downloads and indexes it
messages = result['messages'] + [HumanMessage(content="Please add paper #1 to the collection.")]

result = await agent.graph.ainvoke({"messages": messages})
print("=" * 60)
print("STEP 3: Download & Index Paper")
print("=" * 60)
print("User: Please add paper #1 to the collection.")
print("-" * 60)
print(f"Agent: {result['messages'][-1].content}")

Calling: {'name': 'download_paper', 'args': {'pdf_url': 'https://arxiv.org/pdf/2502.17506v3', 'title': 'RAG-Enhanced Collaborative LLM Agents for Drug Discovery', 'year': 2025, 'subject': 'Artificial Intelligence', 'topic': None}, 'id': 'call_dPqTJcElUsYpF5Z1CimvHQOm', 'type': 'tool_call'}
Back to the model!
WORKFLOW STEP 3: Download & Index Paper
User: Please add paper #1 (RAG-Enhanced Collaborative LLM Agents for Drug Discovery) to the collection.
------------------------------------------------------------
Agent: The paper **"RAG-Enhanced Collaborative LLM Agents for Drug Discovery"** (2025) has been successfully added to the collection. You can find it in your papers directory:

- **File Path:** `C:\Users\User\Desktop\llms\Project\Research Assistant Multi Agent System\Tools Server\RAG SETUP\Papers\Artificial Intelligence\Uncategorized\RAG-Enhanced Collaborative LLM Agents for Drug Discovery - 2025.pdf`

If you need further assistance or want to explore more topics, feel free to ask

In [None]:
# STEP 4: Query the newly added paper

probe_result = await session.call_tool("research_paper_probe", arguments={
    "query": "How do RAG-enhanced LLM agents work for drug discovery?",
    "k": 5
})
response = json.loads(probe_result.content[0].text)
print("=" * 60)
print("STEP 4: Query Newly Indexed Paper")
print("=" * 60)
print(f"Query: How do RAG-enhanced LLM agents work for drug discovery?")
print(f"Confidence: {response['confidence']}")
print("-" * 60)
print(response['response'])
print("-" * 60)
print("Sources:")
for src in response['sources']:
    print(f"  - {src['paper_title']} ({src['year']}) p.{src['page']}")

STEP 4: Query Newly Indexed Paper
Query: How do RAG-enhanced LLM agents work for drug discovery?
Confidence: 0.8
------------------------------------------------------------
## Answer

RAG-enhanced LLM agents work for drug discovery by dynamically retrieving information from biomedical knowledge bases and contextualizing query molecules. They integrate relevant evidence to generate responses without the need for domain-specific fine-tuning. This approach allows the agents to adapt to new insights and complex scientific questions in real-time, addressing challenges such as data heterogeneity, ambiguity, and multi-source integration. The framework leverages the collaboration of multiple LLM agents to improve the effectiveness of tasks like drug-target prediction, molecular captioning, and biological activity prediction, outperforming both general-purpose and domain-specific LLMs as well as traditional deep learning methods.

## Sources

1. *RAG-Enhanced Collaborative LLM Agents for Drug 

# Getting the requirements from the kernel environmnet

In [1]:
import subprocess

packages = [
    "fastapi", "uvicorn", "langchain", "langchain-core", "langchain-openai",
    "langgraph", "openai", "chromadb", "pydantic", "python-dotenv",
    "httpx", "arxiv", "pymupdf", "mcp", "pypdf", "aiohttp"
]

for pkg in packages:
    result = subprocess.run(["pip", "show", pkg], capture_output=True, text=True)
    if result.returncode == 0:
        for line in result.stdout.split('\n'):
            if line.startswith('Name:') or line.startswith('Version:'):
                print(line)
        print("-" * 30)
    else:
        print(f"{pkg}: Not installed")
        print("-" * 30)

Name: fastapi
Version: 0.116.1
------------------------------
Name: uvicorn
Version: 0.35.0
------------------------------
Name: langchain
Version: 0.3.27
------------------------------
Name: langchain-core
Version: 0.3.72
------------------------------
Name: langchain-openai
Version: 0.3.28
------------------------------
Name: langgraph
Version: 0.6.3
------------------------------
Name: openai
Version: 1.100.1
------------------------------
Name: chromadb
Version: 1.0.17
------------------------------
Name: pydantic
Version: 2.11.7
------------------------------
Name: python-dotenv
Version: 1.1.1
------------------------------
Name: httpx
Version: 0.28.1
------------------------------
Name: arxiv
Version: 2.3.1
------------------------------
Name: PyMuPDF
Version: 1.26.3
------------------------------
Name: mcp
Version: 1.12.4
------------------------------
Name: pypdf
Version: 6.0.0
------------------------------
Name: aiohttp
Version: 3.12.14
------------------------------
