In [1]:
from pickletools import code2i

import arxiv
import json
import os
from typing import List, Mapping
from dotenv import load_dotenv
import anthropic
from jinja2.runtime import LoopContext

In [5]:
paper_dir = "papers"

In [7]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    args:
    :param topic: The topic to search for papers on arXiv.
    :param max_results: The maximum number of results to retrieve. default is 5.
    :returns:
     List of paper IDs found in the search.
    """

    # use arxiv to find the papers
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query=topic,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)

    # create a directory to store the papers if it doesn't exist
    path = os.path.join(paper_dir, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)

    file_path = os.path.join(path, "papers_info.json")

    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.decoder.JSONDecodeError):
        papers_info = {}

    #     Process each paper and add to papers_info
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            "title": paper.title,
            "authors": [author.name for author in paper.authors],
            "summary": paper.summary,
            "pdf_url": paper.pdf_url,
            "published": str(paper.published.date()),
        }
        papers_info[paper.get_short_id()] = paper_info

    #     Save updated papers_info to JSON file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)

    print(f"Results are saved in {file_path}")
    return papers_info




In [8]:
search_papers("computers")

Results are saved in papers/computers/papers_info.json


{'1310.7911v2': {'title': 'Compact manifolds with computable boundaries',
  'authors': ['Zvonko Iljazovic'],
  'summary': 'We investigate conditions under which a co-computably enumerable closed set\nin a computable metric space is computable and prove that in each locally\ncomputable computable metric space each co-computably enumerable compact\nmanifold with computable boundary is computable. In fact, we examine the notion\nof a semi-computable compact set and we prove a more general result: in any\ncomputable metric space each semi-computable compact manifold with computable\nboundary is computable. In particular, each semi-computable compact\n(boundaryless) manifold is computable.',
  'pdf_url': 'http://arxiv.org/pdf/1310.7911v2',
  'published': '2013-10-29'},
 'math/9711204v1': {'title': 'Aspects of Computability in Physics',
  'authors': ['Joseph Shipman'],
  'summary': 'This paper reviews connections between physics and computation, and explores\ntheir implications. The main top

In [9]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.

    args:
        paper_id: The paper ID to look for
    returns:
        Json string with paper information if found, otherwise an error message.
    """

    for item in os.listdir(paper_dir):
        item_path = os.path.join(paper_dir, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.decoder.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    return f"Theres's no saved information related to paper ID {paper_id}"


In [10]:
extract_info('1310.7911v2')


'{\n  "title": "Compact manifolds with computable boundaries",\n  "authors": [\n    "Zvonko Iljazovic"\n  ],\n  "summary": "We investigate conditions under which a co-computably enumerable closed set\\nin a computable metric space is computable and prove that in each locally\\ncomputable computable metric space each co-computably enumerable compact\\nmanifold with computable boundary is computable. In fact, we examine the notion\\nof a semi-computable compact set and we prove a more general result: in any\\ncomputable metric space each semi-computable compact manifold with computable\\nboundary is computable. In particular, each semi-computable compact\\n(boundaryless) manifold is computable.",\n  "pdf_url": "http://arxiv.org/pdf/1310.7911v2",\n  "published": "2013-10-29"\n}'

In [11]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for"
                },
                "max_results": {
                    "type": "integer",
                    "description": "The maximum number of results to retrieve. Default is 5.",
                    "default": 5
                }
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The paper ID to look for"
                }
            },
            "required": ["paper_id"]
        }
    }
]

In [12]:
### Tool Mapping
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}


def execute_tool(tool_name, tool_args):
    result = mapping_tool_function[tool_name](**tool_args)
    if result is None:
        result = "The operation completed but didnot return any result."
    elif isinstance(result, list):
        result = ','.join(result)
    elif isinstance(result, dict):
        # convert dictionaries to a JSON string
        result = json.dumps(result, indent=2)

    else:
        # For other types, convert use str(result)
        result = str(result)
    return result


In [13]:
# chatbot code

load_dotenv()
client = anthropic.Anthropic()



In [14]:
def process_query(query):
    messages = [{"role": "user", "content": query}]

    response = client.messages.create(max_tokens=2024, model="claude-3-7-sonnet-2025-02-19",
                                      tools=tools,
                                      messages=messages)
    process_query = True
    while process_query:
        assistant_content = []

        for content in response.content:
            if content.type == 'text':
                print(content.text)
                assistant_content.append(content)
                if len(response.content) == 1:
                    process_query = False
            elif content.type == 'tool_use':
                assistant_content.append(content)
                messages.append({"role": "assistant", "content": assistant_content})

                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args: {tool_args}")

                result = execute_tool(tool_name, tool_args)
                messages.append({"role": "user",
                                 "content": [
                                     {
                                         "type": "tool_result",
                                         "tool_use_id": tool_id,
                                         "content": result
                                     }
                                 ]})

                response = client.messages.create(max_tokens=2024,
                                                  model="claude-3-7-sonnet-2025-02-19",
                                                  tools=tools,
                                                  messages=messages)

                if len(response.content) == 1 and response.content[0].type == 'text':
                    print(response.content[0].text)
                    process_query = False


In [None]:
def process_query(query):
    messages = [{'role': 'user', 'content': query}]

    response = client.messages.create(max_tokens=2024,
                                      model='claude-3-7-sonnet-20250219',
                                      tools=tools,
                                      messages=messages)

    process_query = True
    while process_query:
        assistant_content = []

        for content in response.content:
            if content.type == 'text':

                print(content.text)
                assistant_content.append(content)

                if len(response.content) == 1:
                    process_query = False

            elif content.type == 'tool_use':

                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})

                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args {tool_args}")

                result = execute_tool(tool_name, tool_args)
                messages.append({"role": "user",
                                 "content": [
                                     {
                                         "type": "tool_result",
                                         "tool_use_id": tool_id,
                                         "content": result
                                     }
                                 ]
                                 })
                response = client.messages.create(max_tokens=2024,
                                                  model='claude-3-7-sonnet-20250219',
                                                  tools=tools,
                                                  messages=messages)

                if len(response.content) == 1 and response.content[0].type == "text":
                    print(response.content[0].text)
                    process_query = False

In [15]:
# Chat Loop
def chat_loop():
    print("Type your query or 'exit' to quit:")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'exit':
                print("Exiting the chat.")
                break
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nExiting the chat.{str(e)}")



In [16]:
chat_loop()

Type your query or 'exit' to quit:

Exiting the chat."Could not resolve authentication method. Expected either api_key or auth_token to be set. Or for one of the `X-Api-Key` or `Authorization` headers to be explicitly omitted"
Exiting the chat.


In [None]:
{"role": "user", "content": [{"type": "text", "text": "query"}]}
{"role": "assistant", "content": [
    {"type": "tool_use", "name": "search_papers", "id": "a1", "input": {"topic": "computers", "max_results": 5}},
    {"type": "tool_use", "name": "extract_info", "id": "b1", "input": {"paper_ids": ["id1", "id2"]}}]}
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "a1", "content": "论文搜索结果"}]}
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "b1", "content": "论文提取的信息为。。。。。"}]}
{"role": "assistant", "content": [{"type": "text", "text": "我就是最终的结果啦"}]}


{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "a1", "content": "论文搜索结果"},{"type": "tool_result", "tool_use_id": "b1", "content": "论文提取的信息为。。。。。"}]}
{"role": "assistant", "content": [{"type": "text", "text": "我就是最终的结果啦"}]}
