## Set up

In [None]:
from google.colab import drive
#drive.flush_and_unmount()
drive.mount('/content/drive/')

Mounted at /content/drive/


#Install the required libraries

In [None]:
!pip install -qqq crewai_tools
!pip install -qqq crewai
!pip install -qqq openai

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m226.9 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.2/84.2 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m525.5/525.5 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

#Load the api keys

In [134]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('openai-key')
os.environ["PINECONE_API_KEY"] = userdata.get('pinecone-key')
# Agent will automatically use the model defined in the environment variable
os.environ["OPENAI_MODEL_NAME"] = 'gpt-4'

In [None]:
from crewai import Agent, Crew, Process, Task
from crewai_tools import PDFSearchTool
from dotenv import load_dotenv
load_dotenv()

False

# Creating Tools

Used the PDFSearchTool provided by crewAI, which is built on RAG using OpenAI's embeddings and stores the vector databases on ChromaDB.

In [None]:
import os

print(os.listdir('/content'))


['.config', 'drive', 'db', 'fifa_worldcup_final.pdf', 'indian_politics.pdf', 'india_pak_war.pdf', 'sample_data']


In [135]:
pdf_search_tool1 = PDFSearchTool(
pdf='/content/fifa_worldcup_final.pdf',
config=dict(
llm=dict(
provider="openai",
),
embedder=dict(
provider="openai",
config=dict(
model="text-embedding-3-small",
),
),
)
)

pdf_search_tool2 = PDFSearchTool(
pdf='/content/india_pak_war.pdf',
config=dict(
llm=dict(
provider="openai",
config=dict(
model="gpt-3.5-turbo",
),
),
embedder=dict(
provider="openai",
config=dict(
model="text-embedding-3-small",
),
),
)
)
pdf_search_tool3 = PDFSearchTool(
pdf='/content/indian_politics.pdf',
config=dict(
llm=dict(
provider="openai",
config=dict(
model="gpt-3.5-turbo",
),
),
embedder=dict(
provider="openai",
config=dict(
model="text-embedding-3-small",
),
),
)
)

#User Query

In [None]:
# user_query = """
#         When won the last fifa worldcup 2022??
#         """

## Creating three agents for reading their assigned pdf's

In [136]:
agent1 = Agent(
    role="Read and retrive relevant info from pdf1",
    goal="Search through the PDF to find relevant answers to the query",
    verbose=True,
    memory = True,
    backstory=(
        """
        You are expert at searching and extracting data only from the first pdf
        document provided, ensuring accurate and prompt responses.
        """
    ),
    allow_delegation=False
)

agent2 = Agent(
    role="Read and retrive relevant info from pdf2",
    goal="Search through the PDF to find relevant answers to the query",
    verbose=True,
    memory = True,
    backstory=(
        """
        You are expert at searching and extracting data only from the second pdf
        document provided, ensuring accurate and prompt responses.
        """
    ),
    allow_delegation=False
)

agent3 = Agent(
    role="Read and retrive relevant info from pdf3",
    goal="Search through the PDF to find relevant answers to the query",
    verbose=True,
    memory = True,
    backstory=(
        """
        You are expert at searching and extracting data only from the third pdf
        document provided, ensuring accurate and prompt responses.
        """
    ),
    allow_delegation=False
)

#Query Manager Agent

In [137]:
query_manager = Agent(
role="Question Routing Manager",
goal="Direct questions to the appropriate PDF retrieval agent",
allow_delegation=True,
verbose=True,
backstory=(
"""
This agent identifies which PDF is best suited to answer a question
and routes the task to the relevant retrieval agent.
"""
),
tools=[],
)

# Creating Tasks for agent1, agent2 and agent3

> Add blockquote



In [138]:
response_task1 = Task(
description=(
"""
Utilize PDF 1 to provide answers to the question.
Ensure that the answers are clear, accurate, and based on PDF 1.
Question:
{question}
"""
),
expected_output="""
Accurate answers derived from the content of PDF 1.
""",
tools=[pdf_search_tool1],
agent=agent1,
)

response_task2 = Task(
description=(
"""
Utilize PDF 2 to provide answers to the question.
Ensure that the answers are clear, accurate, and based on PDF 2.
Question:
{question}
"""
),
expected_output="""
Accurate answers derived from the content of PDF 2.
""",
tools=[pdf_search_tool2],
agent=agent2,
)

response_task3 = Task(
description=(
"""
Utilize PDF 3 to provide answers to the question.
Ensure that the answers are clear, accurate, and based on PDF 3.
Question:
{question}
"""
),
expected_output="""
Accurate answers derived from the content of PDF 3.
""",
tools=[pdf_search_tool3],
agent=agent3,
)

#Creating task for Manager Agent

In [139]:
task_route = Task(
description=(
"""
Identify the most appropriate PDF for the question and
assign the task to the correct retrieval agent.
Question:
{question}
"""
),
expected_output="""
Determine which PDF is relevant and assign the retrieval task accordingly.
""",
tools=[],
agent=query_manager,
)

## Creating Crew

In [140]:
team = Crew(
    tasks = [task_route, response_task1, response_task2, response_task3],
    agents = [query_manager, agent1, agent2, agent3],
    process = Process.sequential,
)





In [143]:
user_question = input("Please ask your question?\n")
result = team.kickoff(inputs={"question": user_question})
print(result)

Please ask your question?
In which stadium 2022 FIFA World Cup ﬁnal was played?


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mTo answer this question, I need to understand the content of each PDF and determine which one has the information about the 2022 FIFA World Cup final stadium. I don't have this information right now, so I need to ask each of my coworkers who are in charge of each PDF.

Action: 
Ask question to coworker

Action Input: 
{"question": "Does your PDF contain information about the 2022 FIFA World Cup final stadium?", "context": "The user wants to know in which stadium the 2022 FIFA World Cup final was played", "coworker": "Read and retrive relevant info from pdf1"} 
[0m

[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI need to scan through the PDF to find any mention of the 2022 FIFA World Cup final stadium.

Final Answer:
After thoroughly scanning the PDF, I found that the 2022 FIFA World Cup final was played at the Lusail Iconic S

Let the work Beginnnn!!!

Display the final result as Markdown