In [2]:
from smolagents import tool, CodeAgent, LiteLLMModel
import requests, json
from bs4 import BeautifulSoup
from huggingface_hub import HfApi
import arxiv
from pypdf import PdfReader

# 1️⃣ Mehrere Paper holen
@tool
def get_top_three_papers() -> list:
    """
    Returns a list of the top 3 daily papers (titles) from Hugging Face.
    """
    url = "https://huggingface.co/papers"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
    titles = []

    for container in containers:
        data_props = container.get('data-props', '')
        if not data_props:
            continue
        try:
            data_json = json.loads(data_props.replace('&quot;', '"'))
            if 'dailyPapers' in data_json:
                for paper in data_json['dailyPapers'][:3]:
                    titles.append(paper['title'])
                break
        except json.JSONDecodeError:
            continue

    return titles or ["No papers found."]


# 2️⃣ ID finden
@tool
def get_paper_id_by_title(title: str) -> str:
    """
    Returns the arXiv paper ID by its title.
    Args:
        title: The paper title for which to get the ID.
    """
    api = HfApi()
    papers = api.list_papers(query=title)
    if papers:
        paper = next(iter(papers))
        return paper.id
    else:
        return "No paper ID found."


# 3️⃣ Paper herunterladen
@tool
def download_paper_by_id(paper_id: str) -> str:
    """
    Downloads the arXiv paper by ID and saves it as 'paper_<id>.pdf'.

    Args:
        paper_id: The arXiv ID of the paper to download (e.g., "1706.03762").
    """
    try:
        client = arxiv.Client()
        search = arxiv.Search(id_list=[paper_id])
        paper = next(client.results(search))
        filename = f"paper_{paper_id}.pdf"
        paper.download_pdf(filename=filename)
        return filename
    except Exception as e:
        return f"Error downloading paper: {e}"


# 4️⃣ PDF lesen
@tool
def read_pdf_file(file_path: str) -> str:
    """
    Reads the first 3 pages of a PDF and returns text.
    Args:
        file_path: Path to the PDF file.
    """
    try:
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages[:3]:
            text += page.extract_text() or ""
        return text.strip()
    except Exception as e:
        return f"Error reading {file_path}: {e}"


# 5️⃣ Agent definieren
model = LiteLLMModel(model_id="ollama_chat/glm-4.6:cloud")

agent = CodeAgent(
    tools=[get_top_three_papers, get_paper_id_by_title, download_paper_by_id, read_pdf_file],
    model=model,
    stream_outputs=True
)

# 6️⃣ Ziel definieren
agent.run("""
Fetch the top 3 papers from Hugging Face daily papers.
For each paper:
- get its arXiv ID,
- download it,
- read the first pages,
- and summarize it.
Then create a short consolidated summary comparing the three papers.
""")

{'papers': [{'title': 'JanusCoder: Towards a Foundational Visual-Programmatic Interface for Code Intelligence',
   'id': '2510.23538',
   'summary': 'JanusCoder: Towards a Foundational Visual-Programmatic Interface for Code Intelligence\nThis paper introduces JanusCoder, a system that combines visual and programmatic interfaces for code intelligence tasks. The work aims to create a foundational interface that bridges visual representations with programmatic approaches to enhance code understanding and generation. The authors include researchers from University of Hong Kong, Shanghai AI Laboratory, Nanjing University, Carnegie Mellon University, and Shanghai Innovation Institute. The paper appears to focus on creating a novel interface paradigm for AI-assisted coding that integrates multiple modalities.'},
  {'title': 'Video-Thinker: Sparking "Thinking with Videos" via Reinforcement Learning',
   'id': '2510.23473',
   'summary': 'Video-Thinker: Sparking "Thinking with Videos" via Reinf