In [1]:
import os
from crewai import Agent, Task, Crew
from dotenv import load_dotenv # Import load_dotenv to load environment variables

# Load environment variables from .env file
load_dotenv()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"), # Correctly gets OpenAI API key from .env
    model="gpt-4o",                      # Uses gpt-4o as the model
    temperature=0.7,                     # Adjusted to 0.7 for balance, but you can keep 0 if preferred
    max_tokens=1000,                     # Keep max_tokens
)

In [2]:
from crewai_tools import PDFSearchTool

# Corrected PDF path: assuming 'agentic_rag.ipynb' is in the root directory
# and 'dspy.pdf' is in the 'knowledge' folder directly under the root.
rag_tool = PDFSearchTool(pdf='./knowledge/dspy.pdf',
    config=dict(
        llm=dict(
            provider="openai", # Changed provider to "openai"
            config=dict(
                model="gpt-4o",  # Changed model to "gpt-4o"
                # You can add temperature, top_p, stream if needed for this tool's LLM config
                # temperature=0.7,
            ),
        ),
        embedder=dict(
            provider="huggingface", # Keep huggingface for embeddings
            config=dict(
                model="BAAI/bge-small-en-v1.5",
            ),
        ),
    )
)

  util.warn_deprecated(
  embeddings = HuggingFaceEmbeddings(model_name=self.config.model, model_kwargs=self.config.model_kwargs)
Inserting batches in chromadb:   0%|          | 0/2 [00:00<?, ?it/s]/Users/avinashgohite/Desktop/agentic_RAG/.venv/lib/python3.11/site-packages/chromadb/types.py:144: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  return self.model_fields  # pydantic 2.x
Inserting batches in chromadb: 100%|██████████| 2/2 [00:03<00:00,  1.79s/it]


In [3]:
rag_tool.run("How does exercise price determine for ESOP?")

Using Tool: Search a PDF's content


'Relevant Content:\nPreprint 1 Given the fields ‘question‘, produce the fields ‘answer‘. 2 3 - 4 5 Follow the following format. 6 7 Question: ${question} 8 Answer: ${answer} 9 10 - 11 12 Question: Jimmy and Irene go shopping for clothes on a Tuesday, where senior citizens get a 10% discount on their purchases. Jimmy picks out 3 shorts from the $15 rack. Irene grabs 5 shirts from the $17 rack. How much money do they give to the cashier? 13 Answer: Jimmy picks out 3 shorts at $15 each = $45. Irene grabs 5 shirts at $17 each = $85. Total cost = $45 + $85 = $130. Since senior citizens get a 10% discount, they will pay 10% of $130 = $13. So, they will give the cashier $130 - $13 = $117. - 14 15 - 16 17 Question: Figure 9: Copy of the prompt automatically generated by DSPy for GSM8K Llama2-13b-chat vanilla program compiled with bootstrap×2. 1 Given the fields ‘question‘, produce the fields ‘answer‘. 2 3 - 4 5 Follow the following format. 6 7 Question: ${question} 8 Reasoning: Let’s think ste

In [4]:
import os
from langchain_community.tools.tavily_search import TavilySearchResults
# TAVILY_API_KEY should be loaded via load_dotenv() from your .env file already.
# So, no need for the os.environ assignment here unless it's strictly for this session.
# If you keep it, ensure it's os.getenv and not userdata.get.
# os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY') # If you prefer to set it here
web_search_tool = TavilySearchResults(k=3)

  web_search_tool = TavilySearchResults(k=3)


In [5]:
web_search_tool.run("How does exercise price determine for ESOP?")


[{'title': 'What is Exercise Price in ESOPs and How is it Calculated',
  'url': 'https://www.tricaequity.com/equity/blogs/how-to-calculate-exercise-price-in-esops',
  'content': "To begin with, the **exercise price in ESOP** is used to determine the amount required to exercise the options and the tax ramifications of doing so. In return for the shares, the employee must pay the strike price multiplied by the number of vested options the employee desires to exercise.The difference between the stock's current Fair Market Value (FMV) and the strike price is used to compute taxes. [Taxes](https://www.trica.co/equity/blog/restricted-stock-unit-rsu-tax-strategies/) are [...] The **exercise price in ESOP** is when stock option holders have the right but not the obligation to purchase vested options during the term period. Under ESOP, the company is free to set the exercise price as long as it follows accounting policies and regulations.On the grant date, an employer and employee agree on ESOP

In [6]:
from crewai_tools  import tool
@tool
def router_tool(question):
  """Router Function"""
  if 'ESOP' in question:
    return 'vectorstore'
  else:
    return 'web_search'

ImportError: cannot import name 'tool' from 'crewai_tools' (/Users/avinashgohite/Desktop/agentic_RAG/.venv/lib/python3.11/site-packages/crewai_tools/__init__.py)

Router Agent

In [None]:
Router_Agent = Agent(
  role='Router',
  goal='Route user question to a vectorstore or web search',
  backstory=(
    "You are an expert at routing a user question to a vectorstore or web search."
    "Use the vectorstore for questions on concepta related to Retrieval-Augmented Generation."
    "You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use web-search."
  ),
  verbose=False,
  allow_delegation=False,
  llm=llm,
)

Retriever_Agent

In [None]:
Retriever_Agent = Agent(
role="Retriever",
goal="Use the information retrieved from the vectorstore to answer the question",
backstory=(
    "You are an assistant for question-answering tasks."
    "Use the information present in the retrieved context to answer the question."
    "You have to provide a clear concise answer."
),
verbose=False,
allow_delegation=False,
llm=llm,
)

Grader Agent

In [None]:
Grader_agent =  Agent(
  role='Answer Grader',
  goal='Filter out erroneous retrievals',
  backstory=(
    "You are a grader assessing relevance of a retrieved document to a user question."
    "If the document contains keywords related to the user question, grade it as relevant."
    "It does not need to be a stringent test.You have to make sure that the answer is relevant to the question."
  ),
  verbose=False,
  allow_delegation=False,
  llm=llm,
)

Hallucination Grader Agent

In [None]:
hallucination_grader = Agent(
    role="Hallucination Grader",
    goal="Filter out hallucination",
    backstory=(
        "You are a hallucination grader assessing whether an answer is grounded in / supported by a set of facts."
        "Make sure you meticulously review the answer and check if the response provided is in alignmnet with the question asked"
    ),
    verbose=False,
    allow_delegation=False,
    llm=llm,
)

Answer Grader Agent

In [None]:
answer_grader = Agent(
    role="Answer Grader",
    goal="Filter out hallucination from the answer.",
    backstory=(
        "You are a grader assessing whether an answer is useful to resolve a question."
        "Make sure you meticulously review the answer and check if it makes sense for the question asked"
        "If the answer is relevant generate a clear and concise response."
        "If the answer gnerated is not relevant then perform a websearch using 'web_search_tool'"
    ),
    verbose=False,
    allow_delegation=False,
    llm=llm,
)

Router Task

In [None]:
router_task = Task(
    description=("Analyse the keywords in the question {question}"
    "Based on the keywords decide whether it is eligible for a vectorstore search or a web search."
    "Return a single word 'vectorstore' if it is eligible for vectorstore search."
    "Return a single word 'websearch' if it is eligible for web search." 
    "Do not provide any other premable or explaination."
    ),
    expected_output=("Give a binary choice 'websearch' or 'vectorstore' based on the question"
    "Do not provide any other premable or explaination."),
    agent=Router_Agent,
    tools=[router_tool],
)

Retriever Task

In [None]:
retriever_task = Task(
    description=("Based on the response from the router task extract information for the question {question} with the help of the respective tool."
    "Use the web_serach_tool to retrieve information from the web in case the router task output is 'websearch'."
    "Use the rag_tool to retrieve information from the vectorstore in case the router task output is 'vectorstore'."
    ),
    expected_output=("You should analyse the output of the 'router_task'"
    "If the response is 'websearch' then use the web_search_tool to retrieve information from the web."
    "If the response is 'vectorstore' then use the rag_tool to retrieve information from the vectorstore."
    "Return a claer and consise text as response."),
    agent=Retriever_Agent,
    context=[router_task],
   #tools=[retriever_tool],
)

Grader Task

In [None]:
grader_task = Task(
    description=("Based on the response from the retriever task for the quetion {question} evaluate whether the retrieved content is relevant to the question."
    ),
    expected_output=("Binary score 'yes' or 'no' score to indicate whether the document is relevant to the question"
    "You must answer 'yes' if the response from the 'retriever_task' is in alignment with the question asked."
    "You must answer 'no' if the response from the 'retriever_task' is not in alignment with the question asked."
    "Do not provide any preamble or explanations except for 'yes' or 'no'."),
    agent=Grader_agent,
    context=[retriever_task],
)

Hallucination Grader Task

In [None]:
hallucination_task = Task(
    description=("Based on the response from the grader task for the quetion {question} evaluate whether the answer is grounded in / supported by a set of facts."),
    expected_output=("Binary score 'yes' or 'no' score to indicate whether the answer is sync with the question asked"
    "Respond 'yes' if the answer is in useful and contains fact about the question asked."
    "Respond 'no' if the answer is not useful and does not contains fact about the question asked."
    "Do not provide any preamble or explanations except for 'yes' or 'no'."),
    agent=hallucination_grader,
    context=[grader_task],
)

Answer grader Task

In [None]:
answer_task = Task( 
    description=("Based on the response from the hallucination task for the quetion {question} evaluate whether the answer is useful to resolve the question."
    "If the answer is 'yes' return a clear and concise answer."
    "If the answer is 'no' then perform a 'websearch' and return the response"),
    expected_output=("Return a clear and concise response if the response from 'hallucination_task' is 'yes'."
    "Perform a web search using 'web_search_tool' and return ta clear and concise response only if the response from 'hallucination_task' is 'no'."
    "Otherwise respond as 'Sorry! unable to find a valid response'."),  
    context=[hallucination_task],
    agent=answer_grader,
    #tools=[answer_grader_tool],
)

Setup the Crew

In [None]:
rag_crew = Crew(
    agents=[Router_Agent, Retriever_Agent, Grader_agent, hallucination_grader, answer_grader],
    tasks=[router_task, retriever_task, grader_task, hallucination_task, answer_task],
    verbose=False,
 
)

In [None]:
inputs ={"question":"Does the ESOP supplement the salary of an employee?"}

In [None]:
result = rag_crew.kickoff(inputs=inputs)

In [None]:
result