In [None]:
import logging
from typing import Optional

from crewai import Agent, Task, Crew
from pydantic import BaseModel, Field

# Configure logging
logging.basicConfig(level=logging.DEBUG)

# set the environment variables
from dotenv import load_dotenv
load_dotenv()


In [None]:
from hackathon.tools import FileDownloadTool, sha256_filename_generator

# Create an instance of the FileDownloadTool class with the SHA-256 function
download_tool = FileDownloadTool(filename_generator=sha256_filename_generator)
download_tool.run('https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf', 'dummy.pdf', './downloads')

In [None]:
# Create an agent
downloader_agent = Agent(
    role='File Downloader',
    goal='Download files from given URLs efficiently and safely to the give directory.',
    backstory='You are an expert in downloading files from the internet, ensuring the process is smooth and secure.'
              'You are provided a URL to download the file from and a directory name to save it.',
    verbose=True,
    allow_delegation=False,
    tools=[download_tool]
)

# Create a task
class FileDownloadDetails(BaseModel):
    url: str = Field(..., description="The original source URL from where the file was downloaded")
    filepath: Optional[str] = Field(..., description="If the download was success, the filepath where the file was saved")
    status: str = Field(..., description="Status of the download operation, either Success or Failure")

download_task = Task(
    description="Download a file from the URLs: '{urls}'. If an output directory is provided {directory}, save the file there.",
    agent=downloader_agent,
    #output_file='{filename}',
    output_json=FileDownloadDetails,
    expected_output='The file download operation details including source URL and filepath of the saved file will be provided in JSON.'
                    'The status will be either Success or Failure.'
                    'If the download fails, the filepath will be None and the status will be Failure.'
                    'Use the output details from the tools to complete the task.',
)

# Create the crew
file_download_crew = Crew(
    agents=[downloader_agent],
    tasks=[download_task],
    verbose=True
)


In [None]:
# Run the crew
result = file_download_crew.kickoff(
    inputs={
        'urls': ['https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf', 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy1.pdf', 'https://arxiv.org/pdf/2405.14831' ],
        'directory': './downloads',
    }
)

print(result)
