In [None]:
!uv pip install crewai crewai_tools arxiv

In [None]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set up environment variables
import os

openai_api_key = "YOUR_API_KEY"
os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["OPENAI_MODEL_NAME"] = "gpt-4o-mini"

In [None]:
from crewai import Agent, Task, Crew

# Creating Arxiv Search Tool

In [None]:
from typing import Type, List
from pydantic import BaseModel, Field
from crewai.tools import BaseTool
import arxiv
import datetime
import time

class FetchArxivPapersInput(BaseModel):
    """Input schema for FetchArxivPapersTool."""
    target_date: datetime.date = Field(..., description="Target date to fetch papers for.")

class FetchArxivPapersTool(BaseTool):
    name: str = "fetch_arxiv_papers"
    description: str = "Fetches all arXiv papers from selected categories submitted on the target date."
    args_schema: Type[BaseModel] = FetchArxivPapersInput

    def _run(self, target_date: datetime.date) -> List[dict]:
        # List of AI-related categories
        AI_CATEGORIES = ["cs.AI"]

        # Define the date range for the target date
        start_date = target_date.strftime('%Y%m%d%H%M')
        end_date = (target_date + datetime.timedelta(days=1)).strftime('%Y%m%d%H%M')

        # Initialize the arXiv client
        client = arxiv.Client(
            page_size=100,  # Fetch 100 results per page
            delay_seconds=3  # Delay between requests to respect rate limits
        )

        all_papers = []

        for category in AI_CATEGORIES:
            print(f"Fetching papers for category: {category}")

            search_query = f"cat:{category} AND submittedDate:[{start_date} TO {end_date}]"

            search = arxiv.Search(
                query=search_query,
                sort_by=arxiv.SortCriterion.SubmittedDate,
                max_results=None  # Fetch all results
            )

            # Collect results for the category
            category_papers = []
            for result in client.results(search):
                category_papers.append({
                    'title': result.title,
                    'authors': [author.name for author in result.authors],
                    'summary': result.summary,
                    'published': result.published,
                    'url': result.entry_id
                })

                # Delay between requests to respect rate limits
                time.sleep(3)

            print(f"Fetched {len(category_papers)} papers from {category}")
            all_papers.extend(category_papers)

        return all_papers

In [None]:
arxiv_search_tool = FetchArxivPapersTool()

# Creating Agents

In [None]:
# Agent 1: Arxiv Researcher

researcher = Agent(
    role = "Senior Researcher",
    goal = "Find the top 10 papers from the search results from arXiv on {date}."
            "Rank them appropirately.",
    backstory = "You are a senior researcher with a deep understanding of all topics in AI and AI research."
                "You are able to identify the best research papers based on the title and abstract.",
    verbose = True,
    tools = [arxiv_search_tool],
)

In [None]:
# Agent 2: Frontend Engineer

frontend_engineer = Agent(
    role = "Senior Frontend & AI Engineer",
    goal = "Compile the results into a HTML file.",
    backstory = "You are a competent frontend engineer writing HTML and CSS with decades of experience."
                "You have also been working with AI for decades and understand it well",
    verbose = True,
)

# Create Tasks

In [None]:
# Task for Arxiv Researcher

research_task = Task(
    description = (" Find the top 10 research papers from the search results from arXiv on {date}."),
    expected_output = (
        "A list of top 10 research papers with the following information in the following format:"
        "- Title"
        "- Authors"
        "- Abstract"
        "- Link to the paper"
    ),
    agent = researcher,
    human_input = True,
)

In [None]:
# Task for Frontend Engineer

reporting_task = Task(
    description = ("Compile the results into a detailed report in a HTML file."),
    expected_output = (
        "An HTML file with the results in the following format:"
        "Top 10 AI Research Papers published on {date}"
        "Use the tabular format for the following:"
        "- Title (which on clicking opens the paper in a new tab)"
        "- Authors"
        "- Short summary of the abstract (2-4 sentences)"
        "Please do not add '''html''' to the top and bottom of the final file."
    ),
    agent = frontend_engineer,
    context = [research_task],
    output_file = "./ai_research_report.html",
    human_input = True,
)

# Creating the Crew

In [None]:
arxiv_research_crew = Crew(
    agents = [researcher, frontend_engineer],
    tasks = [research_task, reporting_task],
    verbose = True,
)

# Running the Crew

In [None]:
crew_inputs = {
    "date" : "2025-03-13"
}

In [None]:
result = arxiv_research_crew.kickoff(inputs = crew_inputs)

[1m[94m 
[2025-03-15 00:49:04][🚀 CREW 'CREW' STARTED, 03B5FF71-49EF-43F9-85D3-3B37AFB8712D]: 2025-03-15 00:49:04.230703[00m
[1m[94m 
[2025-03-15 00:49:04][📋 TASK STARTED:  FIND THE TOP 10 RESEARCH PAPERS FROM THE SEARCH RESULTS FROM ARXIV ON 2025-03-13.]: 2025-03-15 00:49:04.248097[00m
[1m[94m 
[2025-03-15 00:49:04][🤖 AGENT 'SENIOR RESEARCHER' STARTED TASK]: 2025-03-15 00:49:04.249293[00m
[1m[95m# Agent:[00m [1m[92mSenior Researcher[00m
[95m## Task:[00m [92m Find the top 10 research papers from the search results from arXiv on 2025-03-13.[00m
[1m[94m 
[2025-03-15 00:49:04][🤖 LLM CALL STARTED]: 2025-03-15 00:49:04.249499[00m
[1m[94m 
[2025-03-15 00:49:05][✅ LLM CALL COMPLETED]: 2025-03-15 00:49:05.901017[00m
[1m[94m 
[2025-03-15 00:49:05][🤖 TOOL USAGE STARTED: 'FETCH_ARXIV_PAPERS']: 2025-03-15 00:49:05.901755[00m
Fetching papers for category: cs.AI
Fetched 81 papers from cs.AI
[1m[94m 
[2025-03-15 00:53:09][✅ TOOL USAGE FINISHED: 'FETCH_ARXIV_PAPERS']: 2025