In [1]:
import arxiv
import crewai
import crewai_tools
import datetime
import os
import pydantic
import time
import typing

In [2]:
llm = crewai.LLM(
    model='ollama/llama3.2:latest',
    base_url='http://localhost:11434',
)

In [3]:
working_directory = '.'

In [4]:
docs_tool = crewai_tools.DirectoryReadTool(directory=working_directory)
file_tool = crewai_tools.FileReadTool()
search_tool = crewai_tools.SerperDevTool()
web_rag_tool = crewai_tools.WebsiteSearchTool()

  util.warn_deprecated(
/home/gjb/mambaforge/envs/agentic_ai/lib/python3.12/site-packages/chromadb/types.py:144: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  return self.model_fields  # pydantic 2.x


In [5]:
class FetchArxivPapersInput(pydantic.BaseModel):
    """Input schema for FetchArxivPapersTool."""
    target_date: datetime.date = pydantic.Field(..., description="Target date to fetch papers for.")

In [6]:
class FetchArxivPapersTool(crewai.tools.BaseTool):
    name: str = "fetch_arxiv_papers"
    description: str = "Fetches all ArXiv papers from selected categories submitted on the target date."
    args_schema: typing.Type[pydantic.BaseModel] = FetchArxivPapersInput

    def _run(self, target_date: datetime.date) -> list[dict]:
        # List of AI-related categories. 
        # You can also include ["cs.AI", "cs.LG", "cs.CV", "cs.MA", "cs.RO"]
        AI_CATEGORIES = ["cs.CL"]

        # Define the date range for the target date
        start_date = target_date.strftime('%Y%m%d%H%M')
        end_date = (target_date + datetime.timedelta(days=1)).strftime('%Y%m%d%H%M')

        # Initialize the ArXiv client
        client = arxiv.Client(
            page_size=100,  # Fetch 100 results per page
            delay_seconds=3  # Delay between requests to respect rate limits
        )

        all_papers = []

        for category in AI_CATEGORIES:
            print(f"Fetching papers for category: {category}")

            search_query = f"cat:{category} AND submittedDate:[{start_date} TO {end_date}]"

            search = arxiv.Search(
                query=search_query,
                sort_by=arxiv.SortCriterion.SubmittedDate,
                max_results=None  # Fetch all results
            )

            # Collect results for the category
            category_papers = []
            for result in client.results(search):
                category_papers.append({
                    'title': result.title,
                    'authors': [author.name for author in result.authors],
                    'summary': result.summary,
                    'published': result.published,
                    'url': result.entry_id
                })

                # Delay between requests to respect rate limits
                time.sleep(3)

            print(f"Fetched {len(category_papers)} papers from {category}")
            all_papers.extend(category_papers)

        return all_papers

In [7]:
arxiv_search_tool = FetchArxivPapersTool()

In [8]:
researcher = crewai.Agent(
    role="Senior Researcher",
    goal="Find the top 10 papers from the search results from ArXiv on {date}."
         "Rank them appropirately.",
    backstory="You are a senior researcher with a deep understanding of all topics in AI and AI research."
              "You are able to identify the best research papers based on the title and abstract.",
    verbose=True,
    tools=[arxiv_search_tool],
    llm=llm,
)

In [9]:
frontend_engineer = crewai.Agent(
    role="Senior Frontend & AI Engineer",
    goal="Compile the results into a HTML file.",
    backstory="You are a competent frontend engineer writing HTML and CSS with decades of experience."
               "You have also been working with AI for decades and understand it well.",
    verbose=True,
    llm=llm,
)

In [10]:
research_task = crewai.Task(
    description=(" Find the top 10 research papers from the search results from ArXiv on {date}."),
    expected_output=(
        "A list of top 10 research papers with the following information in the following format:"
        "- Title"
        "- Authors"
        "- Abstract"
        "- Link to the paper"
    ),
    agent=researcher,
    human_input=True,
)

In [11]:
reporting_task = crewai.Task(
    description=("Compile the results into a detailed report in a HTML file."),
    expected_output=(
        "An HTML file with the results in the following format:"
        "Top 10 AI Research Papers published on {date}"
        "- Title (which on clicking opens the paper in a new tab)"
        "- Authors"
        "- Short summary of the abstract (2-4 sentences)"
    ),
    agent=frontend_engineer,
    context=[research_task],
    output_file="./ai_research_report.html",
    human_input=True,
)

In [12]:
arxiv_research_crew = crewai.Crew(
    agents=[researcher, frontend_engineer],
    tasks=[research_task, reporting_task],
    verbose=True,
)

In [13]:
crew_inputs = {
    "date": "2025-05-25"
}

result = arxiv_research_crew.kickoff(inputs=crew_inputs)

Output()

RecursionError: maximum recursion depth exceeded