In [None]:
from crewai_tools import (
    SerperDevTool,
    ScrapeWebsiteTool,
    DirectoryReadTool,
    FileReadTool,
    PDFSearchTool,
)
from crewai import Agent, Task, Crew

from dotenv import load_dotenv
# NOTE: need to setup a SERPER_API_KEY in the .env file
# https://serper.dev/api-key
load_dotenv()


In [None]:
search_tool = SerperDevTool()
scrape_tool = ScrapeWebsiteTool()
directory_read_tool = DirectoryReadTool(directory='./instructions')
file_read_tool = FileReadTool()
pdf_search_tool = PDFSearchTool()

researcher = Agent(
    role="Article researcher",
    goal="Search in arxhiv.org for a concrete article who's name or description is '{article_description}' and find the URL of the PDF file that allows to download it.",
    backstory="You are a researcher who is responsible for finding and downloading articles from arxiv.org on a specific topic."
              "The article name or description is: {article_description}."
              "You search the web arxiv.org to find the information about the article."
              "Search for a link in to the article in PDF format."
              "If you find several articles, choose the one that is most relevant to the topic."
              "If you cannot find the article, say you didn't found any article related",
    allow_delegation=False,
    verbose=True,
    tools=[search_tool, scrape_tool]
)

from pydantic import BaseModel, Field

class ArticleSearchOutput(BaseModel):
    url: str = Field(..., description="URL of the PDF file of the article to download")

researcher_task = Task(
    description="Find in arxiv.org the URL of the PDF file of a concrete article who's name or the description is provided here '{article_description}'",
    expected_output="A dictionary with keys 'url' for a URL of the PDF file of the article to download.",
    output_json=ArticleSearchOutput,
    agent=researcher,
)

In [None]:
article_search_crew = Crew(
    agents=[researcher,],
    tasks=[researcher_task,],
    verbose=True,
)

In [None]:
search_input = {
    'article_description': 'HippoRAG insepired long-term memory language models'
}

In [None]:
result = article_search_crew.kickoff(inputs=search_input)

In [None]:
print(result)