# MCP를 통한 External Database 활용

이 노트북에서는 Strands Agents와 MCP (Model Context Protocol)를 사용하여 외부 데이터베이스인 Arxiv, ChEMBL, PubMed, ClinicalTrials.gov를 연동하는 방법을 실습합니다.

## 학습 목표
- MCP를 통해 외부 데이터베이스를 Agent의 도구로 활용하는 방법 이해
- Agent-as-tool 패턴을 사용한 다중 MCP 서버 통합
- 실제 연구 질문에 대한 답변 생성 실습

## 1. 환경 설정

필요한 라이브러리와 의존성을 설치합니다.

In [None]:
# 필요한 패키지 설치
%pip install strands-agents strands-agents-tools mcp boto3 arxiv chembl-webresource-client python-dateutil pubmedmcp --quiet

In [None]:
# 라이브러리 임포트
import sys
import logging
import asyncio
from typing import List, Dict, Any
from datetime import datetime

# AWS SDK
import boto3
from botocore.config import Config

# Strands Agents
from strands import Agent
from strands.models import BedrockModel
from strands.tools.mcp import MCPClient

# MCP
from mcp import stdio_client, StdioServerParameters

# 로깅 설정
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("external_dbs_demo")

## 2. MCP 서버 구성

각 외부 데이터베이스에 대한 MCP 서버를 생성합니다.

In [None]:
%%writefile mcp_server_arxiv.py
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

import logging
import sys
from datetime import datetime, timezone
from typing import Any, Dict, List

import arxiv
from dateutil import parser
from mcp.server.fastmcp import FastMCP

logging.basicConfig(
    level=logging.INFO,
    format='%(filename)s:%(lineno)d | %(message)s',
    handlers=[logging.StreamHandler(sys.stderr)]
)
logger = logging.getLogger("arxiv_mcp")

MAX_RESULTS = 10

try:
    mcp = FastMCP(name="arxiv_tools")
    logger.info("arXiv MCP server initialized successfully")
except Exception as e:
    logger.error(f"Error: {str(e)}")

def _is_within_date_range(
    date: datetime, start: datetime | None, end: datetime | None
) -> bool:
    """Check if a date falls within the specified range."""
    if start and not start.tzinfo:
        start = start.replace(tzinfo=timezone.utc)
    if end and not end.tzinfo:
        end = end.replace(tzinfo=timezone.utc)

    if start and date < start:
        return False
    if end and date > end:
        return False
    return True

def _process_paper(paper: arxiv.Result) -> Dict[str, Any]:
    """Process paper information with resource URI."""
    return {
        "id": paper.get_short_id(),
        "title": paper.title,
        "authors": [author.name for author in paper.authors],
        "abstract": paper.summary,
        "categories": paper.categories,
        "published": paper.published.isoformat(),
        "url": paper.pdf_url,
        "resource_uri": f"arxiv://{paper.get_short_id()}",
    }

@mcp.tool()
async def search_papers(
    query: str, 
    max_results: int = 10, 
    date_from: str = None, 
    date_to: str = None, 
    categories: List[str] = None
) -> List[Dict[str, Any]]:
    """Search for papers on arXiv with advanced filtering."""
    try:
        client = arxiv.Client()
        max_results = min(int(max_results), MAX_RESULTS)

        # Build search query with category filtering
        if categories:
            category_filter = " OR ".join(f"cat:{cat}" for cat in categories)
            query = f"({query}) AND ({category_filter})"

        search = arxiv.Search(
            query=query,
            max_results=max_results,
            sort_by=arxiv.SortCriterion.SubmittedDate,
        )

        # Process results with date filtering
        results = []
        try:
            date_from_obj = (
                parser.parse(date_from).replace(tzinfo=timezone.utc)
                if date_from
                else None
            )
            date_to_obj = (
                parser.parse(date_to).replace(tzinfo=timezone.utc) if date_to else None
            )
        except (ValueError, TypeError) as e:
            return [{"error": f"Invalid date format - {str(e)}"}]

        for paper in client.results(search):
            if _is_within_date_range(paper.published, date_from_obj, date_to_obj):
                results.append(_process_paper(paper))

            if len(results) >= max_results:
                break

        return results

    except Exception as e:
        logger.error(f"Search error: {str(e)}")
        return [{"error": f"Search failed: {str(e)}"}]

@mcp.tool()
async def download_paper(paper_id: str) -> Dict[str, Any]:
    """Download a paper from arXiv."""
    try:
        client = arxiv.Client()
        search = arxiv.Search(id_list=[paper_id])
        
        for paper in client.results(search):
            return {
                "id": paper.get_short_id(),
                "title": paper.title,
                "url": paper.pdf_url,
                "download_status": "success",
                "resource_uri": f"arxiv://{paper.get_short_id()}"
            }
        
        return {"error": f"Paper with ID {paper_id} not found"}
    except Exception as e:
        logger.error(f"Download error: {str(e)}")
        return {"error": f"Download failed: {str(e)}"}

@mcp.tool()
async def read_paper(paper_id: str) -> Dict[str, Any]:
    """Read the content of an arXiv paper."""
    try:
        client = arxiv.Client()
        search = arxiv.Search(id_list=[paper_id])

        for paper in client.results(search):
            return {
                "id": paper.get_short_id(),
                "title": paper.title,
                "authors": [author.name for author in paper.authors],
                "abstract": paper.summary,
                "categories": paper.categories,
                "published": paper.published.isoformat(),
                "content_type": "text",
                "content": paper.summary,
            }

        return {"error": f"Paper with ID {paper_id} not found"}
    except Exception as e:
        logger.error(f"Read error: {str(e)}")
        return {"error": f"Read failed: {str(e)}"}

@mcp.tool()
async def list_papers(
    category: str = None, max_results: int = 10
) -> List[Dict[str, Any]]:
    """Get a list of the latest papers in a specific category."""
    try:
        client = arxiv.Client()
        max_results = min(int(max_results), MAX_RESULTS)

        query = f"cat:{category}" if category else ""
        search = arxiv.Search(
            query=query,
            max_results=max_results,
            sort_by=arxiv.SortCriterion.SubmittedDate,
        )

        results = []
        for paper in client.results(search):
            results.append(_process_paper(paper))
            if len(results) >= max_results:
                break

        return results
    except Exception as e:
        logger.error(f"List error: {str(e)}")
        return [{"error": f"List failed: {str(e)}"}]

if __name__ == "__main__":
    mcp.run()

In [None]:
%%writefile mcp_server_chembl.py
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

import logging
import sys
from typing import Any, Dict, List

from chembl_webresource_client.new_client import new_client
from mcp.server.fastmcp import FastMCP

MAXIMUM_ACTIVITY = 100

logging.basicConfig(
    level=logging.INFO,
    format='%(filename)s:%(lineno)d | %(message)s',
    handlers=[logging.StreamHandler(sys.stderr)]
)
logger = logging.getLogger("chembl_mcp")

try:
    mcp = FastMCP(name="chembl_tools")
    logger.info("ChEMBL MCP server initialized successfully")
except Exception as e:
    logger.error(f"Error: {str(e)}")

@mcp.tool()
async def compount_activity(compound_name: str) -> List[Dict[str, Any]]:
    """Get activity data for the specified compound"""
    client = new_client
    molecule_id = client.molecule.filter(pref_name__iexact=compound_name).only('molecule_chembl_id')[0]
    activity = list(client.activity.filter(molecule_chembl_id=molecule_id['molecule_chembl_id']).filter(standard_type="IC50").only(['pchembl_value', 'assay_description', 'canonical_smiles']))
    if len(activity) > MAXIMUM_ACTIVITY:
        activity = activity[:MAXIMUM_ACTIVITY]
    return activity

@mcp.tool()
async def target_activity(target_name: str) -> List[Dict[str, Any]]:
    """Get activity data for the specified target"""
    client = new_client
    target_id = client.target.filter(target_synonym__icontains=target_name, organism='Homo sapiens').only('target_chembl_id')[0]
    activity = list(client.activity.filter(target_chembl_id=target_id['target_chembl_id']).filter(standard_type="IC50").only(['pchembl_value', 'assay_description', 'canonical_smiles']))
    if len(activity) > MAXIMUM_ACTIVITY:
        activity = activity[:MAXIMUM_ACTIVITY]
    return activity

if __name__ == "__main__":
    mcp.run()

In [None]:
%%writefile mcp_server_pubmed.py
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

import logging
import sys
from typing import Any, Dict, List, Optional

import httpx
from defusedxml import ElementTree as ET
from mcp.server.fastmcp import FastMCP

logging.basicConfig(
    level=logging.INFO,
    format='%(filename)s:%(lineno)d | %(message)s',
    handlers=[logging.StreamHandler(sys.stderr)]
)
logger = logging.getLogger("pubmed_mcp")

try:
    mcp = FastMCP(name="pubmed_tools")
    logger.info("PubMed MCP server initialized successfully")
except Exception as e:
    logger.error(f"Error: {str(e)}")

# Helper functions for PubMed API
def search_pubmed(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
    """Search PubMed for articles matching the query"""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
    
    # Search for IDs
    search_url = f"{base_url}/esearch.fcgi"
    search_params = {
        "db": "pubmed",
        "term": query,
        "retmax": max_results,
        "retmode": "json",
        "sort": "relevance",
    }
    
    try:
        search_response = httpx.get(search_url, params=search_params)
        search_response.raise_for_status()
        search_data = search_response.json()
        
        # Extract IDs
        id_list = search_data["esearchresult"]["idlist"]
        if not id_list:
            return []
        
        # Fetch article details
        fetch_url = f"{base_url}/efetch.fcgi"
        fetch_params = {
            "db": "pubmed",
            "id": ",".join(id_list),
            "retmode": "xml"
        }
        
        fetch_response = httpx.get(fetch_url, params=fetch_params)
        fetch_response.raise_for_status()
        
        # Parse XML response
        root = ET.fromstring(fetch_response.text)
        articles = []
        
        for article_element in root.findall(".//PubmedArticle"):
            try:
                article = {}
                
                # Extract PMID
                pmid = article_element.find(".//PMID")
                if pmid is not None:
                    article["id"] = pmid.text
                
                # Extract title
                title = article_element.find(".//ArticleTitle")
                if title is not None:
                    article["title"] = title.text
                
                # Extract abstract
                abstract_parts = article_element.findall(".//AbstractText")
                if abstract_parts:
                    abstract = " ".join([part.text for part in abstract_parts if part.text])
                    article["abstract"] = abstract
                
                # Extract authors
                author_elements = article_element.findall(".//Author")
                if author_elements:
                    authors = []
                    for author in author_elements:
                        last_name = author.find("LastName")
                        fore_name = author.find("ForeName")
                        if last_name is not None and fore_name is not None:
                            authors.append(f"{fore_name.text} {last_name.text}")
                        elif last_name is not None:
                            authors.append(last_name.text)
                    article["authors"] = ", ".join(authors)
                
                # Extract journal info
                journal = article_element.find(".//Journal/Title")
                if journal is not None:
                    article["journal"] = journal.text
                
                # Extract publication year
                pub_date = article_element.find(".//PubDate/Year")
                if pub_date is not None:
                    article["year"] = pub_date.text
                
                articles.append(article)
            except Exception as e:
                logger.error(f"Error parsing article: {e}")
                continue
        
        return articles
    except Exception as e:
        logger.error(f"Error searching PubMed: {e}")
        return []

def get_pubmed_article_details(pmid: str) -> Optional[Dict[str, Any]]:
    """Get detailed information about a specific PubMed article"""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
    fetch_url = f"{base_url}/efetch.fcgi"
    
    fetch_params = {"db": "pubmed", "id": pmid, "retmode": "xml"}
    
    try:
        fetch_response = httpx.get(fetch_url, params=fetch_params)
        fetch_response.raise_for_status()
        
        # Parse XML response
        root = ET.fromstring(fetch_response.text)
        article_element = root.find(".//PubmedArticle")
        
        if article_element is None:
            return None
        
        article = {"id": pmid, "references": []}
        
        # Extract title
        title = article_element.find(".//ArticleTitle")
        if title is not None:
            article["title"] = title.text
        
        # Extract abstract
        abstract_parts = article_element.findall(".//AbstractText")
        if abstract_parts:
            abstract = " ".join([part.text for part in abstract_parts if part.text])
            article["abstract"] = abstract
        
        # Extract authors
        author_elements = article_element.findall(".//Author")
        if author_elements:
            authors = []
            for author in author_elements:
                last_name = author.find("LastName")
                fore_name = author.find("ForeName")
                if last_name is not None and fore_name is not None:
                    authors.append(f"{fore_name.text} {last_name.text}")
                elif last_name is not None:
                    authors.append(last_name.text)
            article["authors"] = ", ".join(authors)
        
        # Extract journal info
        journal = article_element.find(".//Journal/Title")
        if journal is not None:
            article["journal"] = journal.text
        
        # Extract publication year
        pub_date = article_element.find(".//PubDate/Year")
        if pub_date is not None:
            article["year"] = pub_date.text
        
        # Extract DOI
        article_id_list = article_element.findall(".//ArticleId")
        for article_id in article_id_list:
            if article_id.get("IdType") == "doi":
                article["doi"] = article_id.text
        
        # Extract keywords
        keyword_elements = article_element.findall(".//Keyword")
        if keyword_elements:
            keywords = [k.text for k in keyword_elements if k.text]
            article["keywords"] = ", ".join(keywords)
        
        return article
    except Exception as e:
        logger.error(f"Error fetching article details: {e}")
        return None

@mcp.tool()
def pubmed_search(query: str, max_results: int = 10):
    """Search PubMed for articles matching the query."""
    logger.info(f"Searching PubMed for: {query}")
    results = search_pubmed(query, max_results)
    logger.info(f"Found {len(results)} results")
    return results

@mcp.tool()
def pubmed_get_article(pmid: str):
    """Get detailed information about a specific PubMed article."""
    logger.info(f"Fetching PubMed article: {pmid}")
    result = get_pubmed_article_details(pmid)
    if result:
        logger.info(f"Successfully fetched article: {pmid}")
    else:
        logger.info(f"Failed to fetch article: {pmid}")
    return result

@mcp.tool()
def pubmed_search_by_protein(protein_name: str, max_results: int = 10):
    """Search PubMed for articles about a specific protein."""
    query = f"{protein_name}[Title/Abstract] AND protein[Title/Abstract]"
    logger.info(f"Searching PubMed for protein: {protein_name}")
    results = search_pubmed(query, max_results)
    logger.info(f"Found {len(results)} results for protein: {protein_name}")
    return results

@mcp.tool()
def pubmed_search_by_disease(disease_name: str, max_results: int = 10):
    """Search PubMed for articles about a specific disease."""
    query = f"{disease_name}[Title/Abstract] AND (disease[Title/Abstract] OR disorder[Title/Abstract] OR condition[Title/Abstract])"
    logger.info(f"Searching PubMed for disease: {disease_name}")
    results = search_pubmed(query, max_results)
    logger.info(f"Found {len(results)} results for disease: {disease_name}")
    return results

@mcp.tool()
def pubmed_search_by_drug(drug_name: str, max_results: int = 10):
    """Search PubMed for articles about a specific drug."""
    query = f"{drug_name}[Title/Abstract] AND (drug[Title/Abstract] OR medication[Title/Abstract] OR compound[Title/Abstract])"
    logger.info(f"Searching PubMed for drug: {drug_name}")
    results = search_pubmed(query, max_results)
    logger.info(f"Found {len(results)} results for drug: {drug_name}")
    return results

if __name__ == "__main__":
    mcp.run()

In [None]:
%%writefile mcp_server_clinicaltrial.py
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

import logging
import os
import sys

import pandas as pd
from mcp.server.fastmcp import FastMCP
from pytrials.client import ClinicalTrials

MAX_OUTPUT_CHARS = 20000

logging.basicConfig(
    level=logging.INFO,
    format='%(filename)s:%(lineno)d | %(message)s',
    handlers=[logging.StreamHandler(sys.stderr)]
)
logger = logging.getLogger("clinicaltrial_mcp")

try:
    mcp = FastMCP(name="clinicaltrial_tools")
    logger.info("Clinical Trial MCP server initialized successfully")
except Exception as e:
    logger.error(f"Error: {str(e)}")

ct = ClinicalTrials()

# Helper functions
def load_csv_file(filename):
    """Load data from a CSV file"""
    if os.path.exists(filename):
        return pd.read_csv(filename)
    return None

def format_limited_output(df, max_rows=None, max_chars=MAX_OUTPUT_CHARS):
    """Format DataFrame output with character limit and metadata"""
    if df is None or df.empty:
        return "No data available"

    total_rows = len(df)

    # If maximum rows are specified, limit the output rows
    if max_rows and max_rows < total_rows:
        display_df = df.head(max_rows)
        rows_shown = max_rows
    else:
        display_df = df
        rows_shown = total_rows

    # Convert to string
    output = display_df.to_string()

    # If exceeding character limit, truncate
    if len(output) > max_chars:
        output = output[:max_chars] + "\n...[Output truncated]"

    # Add metadata
    metadata = f"\n\nData summary: Total {total_rows} records, showing {rows_shown} records."

    return output + metadata

def list_available_csv_files():
    """List all available CSV files in the current directory"""
    return [f for f in os.listdir(".") if f.endswith(".csv")]

@mcp.tool()
def search_clinical_trials_and_save_studies_to_csv(
    search_expr: str,
    max_studies: int = 10,
    save_csv: bool = False,
    filename: str = "search_results.csv",
    fields: list = None,
) -> str:
    """Search for clinical trials using a search expression"""
    try:
        # Default fields if none provided
        if fields is None:
            fields = ["NCT Number", "Conditions", "Study Title", "Brief Summary"]

        # Get study fields
        results = ct.get_study_fields(
            search_expr=search_expr, fields=fields, max_studies=max_studies
        )

        if len(results) > 1:  # Header + data
            df = pd.DataFrame.from_records(results[1:], columns=results[0])

            # Save to CSV if requested
            if save_csv:
                csv_filename = filename or f"search_results_{search_expr.replace('+', '_')}.csv"
                df.to_csv(csv_filename, index=False)
                storage_info = f"Complete results have been saved to file {csv_filename}"
                return f"Results saved to {csv_filename}\n\n{format_limited_output(df)}\n{storage_info}"

            return format_limited_output(df)
        return "No results found"
    except Exception as e:
        return f"Error searching clinical trials: {str(e)}"

@mcp.tool()
def get_full_study_details(nct_id: str) -> str:
    """Get detailed information about a specific clinical trial"""
    try:
        study = ct.get_full_studies(search_expr=f"NCT Number={nct_id}", max_studies=1)
        if len(study) > 1:  # Header + data
            df = pd.DataFrame.from_records(study[1:], columns=study[0])
            return format_limited_output(df)
        return f"Study with NCT ID {nct_id} not found"
    except Exception as e:
        return f"Error fetching study details: {str(e)}"

@mcp.tool()
def get_studies_by_keyword(
    keyword: str, max_studies: int = 20, save_csv: bool = False, filename: str = None
) -> str:
    """Get studies related to a specific keyword"""
    try:
        fields = ["NCT Number", "Conditions", "Study Title", "Brief Summary"]
        results = ct.get_study_fields(
            search_expr=keyword, fields=fields, max_studies=max_studies
        )

        if len(results) > 1:  # Header + data
            df = pd.DataFrame.from_records(results[1:], columns=results[0])

            # Save to CSV if requested
            if save_csv:
                csv_filename = filename or f"keyword_results_{keyword.replace(' ', '_')}.csv"
                df.to_csv(csv_filename, index=False)
                storage_info = f"Complete results have been saved to file {csv_filename}"
                return f"Results saved to {csv_filename}\n\n{format_limited_output(df)}\n{storage_info}"

            return format_limited_output(df)
        return f"No studies found for keyword: {keyword}"
    except Exception as e:
        return f"Error searching studies by keyword: {str(e)}"

@mcp.tool()
def get_full_studies_and_save(
    search_expr: str, max_studies: int = 20, filename: str = "full_studies.csv"
) -> str:
    """Get full studies data and save to CSV"""
    try:
        # Get full studies
        full_studies = ct.get_full_studies(
            search_expr=search_expr, max_studies=max_studies
        )

        if len(full_studies) > 1:  # Header + data
            # Convert to DataFrame
            df = pd.DataFrame.from_records(full_studies[1:], columns=full_studies[0])

            # Save to CSV
            df.to_csv(filename, index=False)

            return f"Successfully saved {len(df)} full studies to {filename}"
        return "No results found to save"
    except Exception as e:
        return f"Error saving full studies to CSV: {str(e)}"

@mcp.tool()
def load_csv_data(filename: str) -> str:
    """Load and display data from a CSV file"""
    # Ensure the filename ends with .csv
    if not filename.endswith(".csv"):
        filename += ".csv"

    df = load_csv_file(filename)
    if df is not None:
        return f"Loaded data from {filename}:\n\n{format_limited_output(df)}"
    return f"CSV file {filename} not found or could not be loaded"

@mcp.tool()
def list_saved_csv_files() -> str:
    """List all available CSV files in the current directory"""
    files = list_available_csv_files()
    if files:
        return f"Available CSV files:\n\n{chr(10).join(files)}"
    return "No CSV files available"

if __name__ == "__main__":
    mcp.run()

## 3. MCP 클라이언트 설정

각 외부 데이터베이스에 대한 MCP 클라이언트를 설정합니다.

In [None]:
# MCP 서버 파라미터 설정
arxiv_server_params = StdioServerParameters(
    command="python",
    args=["mcp_server_arxiv.py"]
)

chembl_server_params = StdioServerParameters(
    command="python",
    args=["mcp_server_chembl.py"]
)

pubmed_server_params = StdioServerParameters(
    command="python",
    args=["mcp_server_pubmed.py"]
)

clinicaltrial_server_params = StdioServerParameters(
    command="python",
    args=["mcp_server_clinicaltrial.py"]
)

In [None]:
# MCP 클라이언트 생성
arxiv_client = MCPClient(arxiv_server_params)
chembl_client = MCPClient(chembl_server_params)
pubmed_client = MCPClient(pubmed_server_params)
clinicaltrial_client = MCPClient(clinicaltrial_server_params)

## 4. 개별 데이터베이스 에이전트 생성

각 MCP 클라이언트를 사용하는 전문 에이전트를 생성합니다.

In [None]:
# Bedrock 모델 설정
model = BedrockModel(
    model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    region="us-east-1"
)

In [None]:
# ArXiv 전문 에이전트
arxiv_agent = Agent(
    name="ArXivAgent",
    system_prompt="""당신은 arXiv에서 학술 논문을 검색하는 전문가입니다. 
    사용자의 질문에 따라 관련된 과학 문헌을 찾아 제공하세요.
    검색 결과는 논문 제목, 저자, 요약, URL을 포함해야 합니다.""",
    model=model,
    tools=[arxiv_client]
)

# ChEMBL 전문 에이전트
chembl_agent = Agent(
    name="ChEMBLAgent",
    system_prompt="""당신은 ChEMBL 데이터베이스에서 화학 화합물 정보를 검색하는 전문가입니다.
    화합물의 구조, 특성, 생물학적 활성 데이터를 제공하세요.
    검색 결과는 ChEMBL ID, 화합물명, 분자식, 분자량 등을 포함해야 합니다.""",
    model=model,
    tools=[chembl_client]
)

# PubMed 전문 에이전트
pubmed_agent = Agent(
    name="PubMedAgent",
    system_prompt="""당신은 PubMed에서 생의학 문헌을 검색하는 전문가입니다.
    의학, 생물학, 생명과학 관련 연구 논문을 찾아 제공하세요.
    검색 결과는 PMID, 논문 제목, 저자, 출판 정보를 포함해야 합니다.""",
    model=model,
    tools=[pubmed_client]
)

# ClinicalTrials 전문 에이전트
clinicaltrial_agent = Agent(
    name="ClinicalTrialAgent",
    system_prompt="""당신은 ClinicalTrials.gov에서 임상시험 정보를 검색하는 전문가입니다.
    진행 중이거나 완료된 임상 연구에 대한 정보를 제공하세요.
    검색 결과는 NCT ID, 연구 제목, 상태, 단계, 조건을 포함해야 합니다.""",
    model=model,
    tools=[clinicaltrial_client]
)

## 5. 메인 연구 에이전트 생성 (Agent-as-Tool 패턴)

개별 데이터베이스 에이전트들을 도구로 사용하는 통합 연구 에이전트를 생성합니다.

In [None]:
# 통합 연구 에이전트 생성
research_agent = Agent(
    name="생명과학연구에이전트",
    system_prompt="""당신은 종합적인 생명과학 연구 어시스턴트입니다. 
    다음과 같은 전문 데이터베이스 에이전트들을 도구로 사용할 수 있습니다:
    
    - ArXivAgent: 학술 논문 및 프리프린트 검색
    - ChEMBLAgent: 화학 화합물 데이터 검색
    - PubMedAgent: 생의학 문헌 검색
    - ClinicalTrialAgent: 임상시험 정보 검색
    
    사용자의 연구 질문에 따라 적절한 에이전트를 선택하여 사용하세요.
    포괄적인 답변을 위해 여러 에이전트를 조합하여 사용할 수 있습니다.
    각 에이전트의 검색 결과를 종합하여 유용한 정보를 제공하세요.""",
    model=model,
    tools=[
        arxiv_agent,
        chembl_agent,
        pubmed_agent,
        clinicaltrial_agent
    ]
)

## 6. 테스트 및 실습

### 6.1 개별 에이전트 테스트

In [None]:
# ArXiv 에이전트 테스트
print("=== ArXiv 에이전트 테스트 ===")
arxiv_result = arxiv_agent("machine learning drug discovery")
print(arxiv_result)
print("\n" + "="*50 + "\n")

In [None]:
# ChEMBL 에이전트 테스트
print("=== ChEMBL 에이전트 테스트 ===")
chembl_result = chembl_agent("aspirin")
print(chembl_result)
print("\n" + "="*50 + "\n")

In [None]:
# PubMed 에이전트 테스트
print("=== PubMed 에이전트 테스트 ===")
pubmed_result = pubmed_agent("COVID-19 치료법")
print(pubmed_result)
print("\n" + "="*50 + "\n")

In [None]:
# ClinicalTrial 에이전트 테스트
print("=== ClinicalTrial 에이전트 테스트 ===")
ct_result = clinicaltrial_agent("당뇨병")
print(ct_result)
print("\n" + "="*50 + "\n")

### 6.2 통합 연구 에이전트 테스트

In [None]:
# 종합적인 연구 질문 테스트
research_query = "심혈관 질환 예방을 위한 아스피린의 효과에 대한 최신 연구 동향을 알려주세요. 관련 논문, 화합물 정보, 임상시험 데이터를 포함해서 답변해주세요."

print(f"🔬 연구 질문: {research_query}\n")
print("=" * 80)

research_result = research_agent(research_query)
print(f"\n📊 연구 결과:\n{research_result}")

### 6.3 대화형 연구 세션

In [None]:
# 대화형 연구 함수
async def interactive_research(query: str):
    """대화형 연구 질문 처리 함수"""
    print(f"🔬 연구 질문: {query}\n")
    print("처리 중...\n")
    
    try:
        result = research_agent(query)
        print(f"📊 연구 결과:\n{result}\n")
        return result
    except Exception as e:
        print(f"❌ 오류 발생: {str(e)}")
        return None

# 사용 예시 (아래 셀에서 실행)
# "CRISPR 유전자 편집 기술의 최신 발전사항은?"
# "알츠하이머병 치료제 개발 현황을 알려주세요"
# "mRNA 백신 기술에 대한 최신 연구는?"

In [None]:
# 예시 질문들 실행
example_queries = [
    "CRISPR 유전자 편집 기술의 최신 발전사항은?",
    "알츠하이머병 치료제 개발 현황을 알려주세요",
    "mRNA 백신 기술에 대한 최신 연구는?"
]

for query in example_queries:
    await interactive_research(query)
    print("\n" + "="*80 + "\n")