In [3]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import anthropic

In [4]:
PAPER_DIR = "papers"


This function searches arXiv for academic papers and saves their metadata (NOT the full paper content). We create a new arXiv client each time the function runs. It's not a persistent connection  just an API wrapper that makes HTTP requests (Think of it like opening a web browser, doing a search, then closing it). 
In summary: 

- Takes a topic as input (e.g., "machine learning", "quantum computing")
- Creates a dedicated folder for each topic: `PAPER_DIR/topic_name/`
- Searches arXiv and saves paper metadata to `papers_info.json` in that folder
- Returns a list of paper IDs found


In [5]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

In [ ]:
search_papers("Neural Networks")

In [ ]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for topic_dir in os.listdir(PAPER_DIR):
        topic_path = os.path.join(PAPER_DIR, topic_dir)
        if os.path.isdir(topic_path):
            file_path = os.path.join(topic_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."

In [8]:
extract_info('1811.02784v1')

'{\n  "title": "Median Binary-Connect Method and a Binary Convolutional Neural Nework for Word Recognition",\n  "authors": [\n    "Spencer Sheen",\n    "Jiancheng Lyu"\n  ],\n  "summary": "We propose and study a new projection formula for training binary weight\\nconvolutional neural networks. The projection formula measures the error in\\napproximating a full precision (32 bit) vector by a 1-bit vector in the l_1\\nnorm instead of the standard l_2 norm. The l_1 projector is in closed\\nanalytical form and involves a median computation instead of an arithmatic\\naverage in the l_2 projector. Experiments on 10 keywords classification show\\nthat the l_1 (median) BinaryConnect (BC) method outperforms the regular BC,\\nregardless of cold or warm start. The binary network trained by median BC and a\\nrecent blending technique reaches test accuracy 92.4%, which is 1.1% lower than\\nthe full-precision network accuracy 93.5%. On Android phone app, the trained\\nbinary network doubles the spee

In [9]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for"
                }, 
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results to retrieve",
                    "default": 5
                }
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for"
                }
            },
            "required": ["paper_id"]
        }
    }
]

In [ ]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    
    result = mapping_tool_function[tool_name](**tool_args)

    if result is None:
        result = "The operation completed but didn't return any results."
        
    elif isinstance(result, list):
        result = ', '.join(result)
        
    elif isinstance(result, dict):
        # Convert dictionaries to formatted JSON strings
        result = json.dumps(result, indent=2)
    
    else:
        # For any other type, convert using str()
        result = str(result)
    return result

In [11]:
load_dotenv() 
client = anthropic.Anthropic()

In [ ]:
def process_query(query):
    
    messages = [{'role': 'user', 'content': query}]
    
    response = client.messages.create(max_tokens = 2024,
                                  model = 'claude-3-7-sonnet-20250219', 
                                  tools = tools,
                                  messages = messages)
    
    continue_processing = True
    while continue_processing:
        assistant_content = []

        for content in response.content:
            if content.type == 'text':
                
                print(content.text)
                assistant_content.append(content)
                
                if len(response.content) == 1:
                    continue_processing = False
            
            elif content.type == 'tool_use':
                
                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})
                
                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args {tool_args}")
                
                result = execute_tool(tool_name, tool_args)
                messages.append({"role": "user", 
                                  "content": [
                                      {
                                          "type": "tool_result",
                                          "tool_use_id": tool_id,
                                          "content": result
                                      }
                                  ]
                                })
                response = client.messages.create(max_tokens = 2024,
                                  model = 'claude-3-7-sonnet-20250219', 
                                  tools = tools,
                                  messages = messages) 
                
                if len(response.content) == 1 and response.content[0].type == "text":
                    print(response.content[0].text)
                    continue_processing = False

In [13]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
    
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

In [None]:
chat_loop()

Type your queries or 'quit' to exit.


## arXiv Paper Search System: Technical Implementation

This system implements a function-calling agent that interfaces with arXiv's API to search and retrieve academic papers. The architecture demonstrates fundamental patterns in LLM tool integration, specifically how to bridge Python functions with language model capabilities through the Model Context Protocol (MCP).

**Core Design Decision**: Store papers as JSON files in a hierarchical directory structure rather than a database. This choice optimizes for simplicity and human readability - each topic gets its own directory, making manual inspection straightforward. The trade-off is query performance at scale, but for typical research workflows this is acceptable.

```python
papers/
├── neural_networks/
│   └── papers_info.json
├── machine_learning/
│   └── papers_info.json
└── quantum_computing/
    └── papers_info.json
```

The `search_papers` function creates this structure dynamically. When searching for "Neural Networks", it normalizes the topic to `neural_networks` (lowercase, underscores for spaces) to ensure valid directory names. The function is idempotent - multiple calls with the same topic append new papers to the existing JSON file rather than overwriting.

```python
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    client = arxiv.Client()  # Not a persistent connection - just an API wrapper
    
    # Normalize topic for filesystem: "Neural Networks" → "neural_networks"
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)  # exist_ok prevents FileExistsError on repeated calls
```

The arxiv.Client() instantiation is lightweight - it's not maintaining a connection but rather creating an object that knows how to make HTTP requests to arXiv's API. Think of it as initializing a requests session with arXiv-specific configuration.

**Error Handling Pattern**: The function implements defensive programming by attempting to load existing data, falling back to an empty dictionary if the file doesn't exist or contains invalid JSON:

```python
try:
    with open(file_path, "r") as json_file:
        papers_info = json.load(json_file)
except (FileNotFoundError, json.JSONDecodeError):
    papers_info = {}
```

This pattern ensures the function never crashes due to missing or corrupted data files. The append-only design means we accumulate papers over time - useful for building a personal research database.

The `extract_info` function implements a linear search across all topic directories. While O(n) in the number of topics, this is acceptable because:
1. Topics are limited (typically dozens, not thousands)
2. File I/O dominates performance anyway
3. The alternative (maintaining a global index) adds complexity

```python
def extract_info(paper_id: str) -> str:
    for topic_dir in os.listdir(PAPER_DIR):  # Better name than 'item'
        topic_path = os.path.join(PAPER_DIR, topic_dir)
        if os.path.isdir(topic_path):
            json_file_path = os.path.join(topic_path, "papers_info.json")
            # ... load and search
```

**Function Registry Pattern**: The `mapping_tool_function` dictionary implements dynamic dispatch - a common pattern in plugin architectures and RPC systems:

```python
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    result = mapping_tool_function[tool_name](**tool_args)
```

This design enables adding new tools without modifying the dispatch logic. The `**tool_args` syntax unpacks the dictionary into keyword arguments, so `{"topic": "AI", "max_results": 10}` becomes `search_papers(topic="AI", max_results=10)`.

**String Conversion for LLM Compatibility**: Language models operate on text, not Python objects. The execute_tool function normalizes all return types to strings:

```python
if isinstance(result, list):
    result = ', '.join(result)  # ['id1', 'id2'] → "id1, id2"
elif isinstance(result, dict):
    result = json.dumps(result, indent=2)  # Pretty-printed JSON
```

This ensures the LLM receives parseable text regardless of what the underlying function returns.

**The Agent Loop**: The `process_query` function implements the core agent pattern - a while loop that alternates between LLM reasoning and tool execution:

```python
def process_query(query):
    messages = [{'role': 'user', 'content': query}]
    response = client.messages.create(model='claude-3-5-sonnet-20241022', 
                                    tools=tools, messages=messages)
    
    continue_processing = True  # Better name than shadowing function name
    while continue_processing:
        for content in response.content:
            if content.type == 'text':
                print(content.text)
                if len(response.content) == 1:
                    continue_processing = False
            
            elif content.type == 'tool_use':
                # Execute tool and append result to conversation
                result = execute_tool(content.name, content.input)
                messages.append({"role": "user", 
                                "content": [{"type": "tool_result",
                                           "tool_use_id": content.id,
                                           "content": result}]})
                # Get next response from model
                response = client.messages.create(...)
```

This implements the fundamental insight: **agents are while loops with function calls**. The loop continues until the model produces a text-only response (no tool calls), indicating it has completed the task.

**Message Protocol**: The conversation follows a specific format for tool results:

```python
{
    "role": "user",
    "content": [{
        "type": "tool_result",
        "tool_use_id": "unique-id-from-model",
        "content": "string-result-from-tool"
    }]
}
```

This format tells the model "here's what your tool call returned, continue processing". The model maintains conversation state through the messages array - each append adds to the context.

**Error Propagation**: The system implements graceful degradation at each layer:
- File operations: Continue searching other directories if one fails
- Tool execution: Catch exceptions and return error messages
- Chat loop: Display errors without crashing the interface

```python
def chat_loop():
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
            process_query(query)
        except Exception as e:
            print(f"\nError: {str(e)}")  # Show error but continue
```

**Performance Considerations**: 
- JSON files are loaded entirely into memory - fine for hundreds of papers per topic
- No caching between queries - each search reloads files
- Synchronous I/O throughout - could benefit from async for multiple searches

**Security Notes**: The system trusts user input for file paths (via topic names) which could enable directory traversal attacks in a production environment. The `.lower().replace(" ", "_")` normalization provides some protection but isn't comprehensive.

This implementation demonstrates core patterns in LLM tool integration: function registries for dynamic dispatch, string normalization for model compatibility, conversation state management through message arrays, and the fundamental agent loop. These patterns appear across frameworks like LangChain, LlamaIndex, and Pydantic AI - understanding this implementation provides insight into how those abstractions work internally.