In [10]:
import json
import arxiv
import os
from pathlib import Path
from typing import List
from dotenv import load_dotenv
import anthropic
from pprint import pprint

In [None]:
# Load environment variables from .env file
load_dotenv()

DATA_DIR = Path("data")
PAPER_DIR = DATA_DIR / "papers"

Path.mkdir(PAPER_DIR, exist_ok=True)

In [None]:
# Copied verbatim from course notebook
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

In [7]:
search_papers("AI")

Results are saved in: data\papers\ai\papers_info.json


['2409.12922v1',
 '2406.11563v3',
 '2402.07632v3',
 '2211.05075v1',
 '2403.15481v2']

In [8]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."

In [11]:
pprint(extract_info('2409.12922v1'))

('{\n'
 '  "title": "AI Thinking: A framework for rethinking artificial intelligence '
 'in practice",\n'
 '  "authors": [\n'
 '    "Denis Newman-Griffis"\n'
 '  ],\n'
 '  "summary": "Artificial intelligence is transforming the way we work with '
 'information\\nacross disciplines and practical contexts. A growing range of '
 'disciplines are\\nnow involved in studying, developing, and assessing the '
 'use of AI in practice,\\nbut these disciplines often employ conflicting '
 'understandings of what AI is and\\nwhat is involved in its use. New, '
 'interdisciplinary approaches are needed to\\nbridge competing '
 'conceptualisations of AI in practice and help shape the future\\nof AI use. '
 'I propose a novel conceptual framework called AI Thinking, which\\nmodels '
 'key decisions and considerations involved in AI use across '
 'disciplinary\\nperspectives. The AI Thinking model addresses five '
 'practice-based competencies\\ninvolved in applying AI in context: motivating '
 'AI use

## Tools Schema

In [12]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for"
                },
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results to retrieve (default: 5)",
                    "default": 5
                }
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for"
                }
            },
            "required": ["paper_id"]
        }
    }
]

In [13]:
# Handling tool execution
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name: str, args: dict):
    result = mapping_tool_function[tool_name](**args)
    if result is None:
        result = "The operation completed but didn't return any result."
    elif isinstance(result, list):
        result = ", ".join(result)
    elif isinstance(result, dict):
        result = json.dumps(result, indent=2)
    else:
        result = str(result)
    return result

In [14]:
client = anthropic.Anthropic()

def process_query(query):
    
    messages = [{'role': 'user', 'content': query}]
    
    response = client.messages.create(max_tokens = 2024,
                                model = 'claude-3-7-sonnet-20250219', 
                                tools = tools,
                                messages = messages)
    
    process_query = True
    while process_query:
        assistant_content = []

        for content in response.content:
            if content.type == 'text':
                
                print(content.text)
                assistant_content.append(content)
                
                if len(response.content) == 1:
                    process_query = False
            
            elif content.type == 'tool_use':
                
                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})
                
                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args {tool_args}")
                
                result = execute_tool(tool_name, tool_args)
                messages.append({"role": "user", 
                                  "content": [
                                      {
                                          "type": "tool_result",
                                          "tool_use_id": tool_id,
                                          "content": result
                                      }
                                  ]
                                })
                response = client.messages.create(max_tokens = 2024,
                                  model = 'claude-3-7-sonnet-20250219', 
                                  tools = tools,
                                  messages = messages) 
                
                if len(response.content) == 1 and response.content[0].type == "text":
                    print(response.content[0].text)
                    process_query = False

In [15]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
    
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

In [16]:
chat_loop()

Type your queries or 'quit' to exit.
I'll help you search for papers related to LLMs (Large Language Models). Let me search for relevant papers on arXiv for you.
Calling tool search_papers with args {'topic': 'LLM'}
Results are saved in: data\papers\llm\papers_info.json
I've found 5 papers related to LLMs. Let me get more detailed information about each of these papers for you:
Calling tool extract_info with args {'paper_id': '2412.18022v1'}
Calling tool extract_info with args {'paper_id': '2406.10300v1'}
Calling tool extract_info with args {'paper_id': '2405.19888v1'}
Calling tool extract_info with args {'paper_id': '2311.10372v2'}
Calling tool extract_info with args {'paper_id': '2411.15764v1'}
Based on my search, here are 5 recent papers related to LLMs (Large Language Models):

1. **Trustworthy and Efficient LLMs Meet Databases** (December 2024)
   - Focuses on making LLMs more trustworthy and efficient, particularly in reducing hallucinations and meeting high inference demands
   

## Creating an MCP server