In [24]:
from langchain_openai import ChatOpenAI
from crewai_tools import PDFSearchTool
from langchain_community.tools.tavily_search import TavilySearchResults
from crewai_tools import tools
from crewai import tools
from crewai import Crew 
from crewai import Task 
from crewai import Agent

In [25]:
import os

os.environ["GROQ_API_KEY "] = "GROQ_API_KEY "

In [26]:
llm = ChatOpenAI(
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=os.environ["GROQ_API_KEY"],
    model_name="llama3-8b-8192",
    temperature=0.1,
    max_tokens=1000,
)


# LETS DOWNLOAD THE PAPER FROM THE INTERNET

In [27]:
import requests

pdf_url = "https://arxiv.org/pdf/1706.03762"
response = requests.get(pdf_url)

with open ("attention_is_all_you_need_paper.pdf", "wb") as file:
    file.write(response.content)

# Creating the Retrieval Object (Knowledge Base)

In [28]:
rag_tool = PDFSearchTool(
    pdf="attention_is_all_you_need_paper.pdf",
    config=dict(
        llm=dict(
            provider="groq",  # or google, openai, anthropic, llama2, ...
            config=dict(
                model="llama3-8b-8192",
                # temperature=0.5,
                # top_p=1,
                # stream=True,
            ),
        ),
        embedder=dict(
            provider="huggingface",  # <-- Fixed: added '='
            config=dict(
                model="BAAI/bge-small-en-v1.5",
                # task_type="retrieval_document",
                # title="Embeddings",
            ),
        ),
    )
)


In [29]:
rag_tool.run("What is transformer mechanism in 10 words?")

Using Tool: Search a PDF's content


'Relevant Content:\nFigure 1: The Transformer - model architecture. The Transformer follows this overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1, respectively. 3.1 Encoder and Decoder Stacks Encoder: The encoder is composed of a stack of N= 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- wise fully connected feed-forward network. We employ a residual connection [ 11] around each of the two sub-layers, followed by layer normalization [ 1]. That is, the output of each sub-layer is LayerNorm( x+ Sublayer( x), where Sublayer( x)is the function implemented by the sub-layer itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding layers, produce outputs of dimension dmodel = 512 . Decoder: The decoder is also composed of a stack of N=

# Lets Integrate our Search Tool (TAVILY API)

In [30]:
import os

os.environ["TAVILY_API_KEY"] = "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"

In [31]:
web_search_tool = TavilySearchResults(k=3)

In [32]:
web_search_tool.run("What is transformer mechanism in 10 words?")

[{'url': 'https://en.wikipedia.org/wiki/Transformer_(deep_learning_architecture)',
  'content': 'A transformer is a deep learning architecture that was developed by researchers at Google and is based on the multi-head attention mechanism, which was proposed in the 2017 paper "Attention Is All You Need".[1] Text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel [...] Exact dimension counts within an attention head module\nThe attention mechanism used in the Transformer architecture are scaled dot-product attention units. For each unit, the transformer model learns three weight matrices: the query weights \n𝑊\n𝑄\n, the key weights \n𝑊\n𝐾\n, and the value weights \n𝑊\n𝑉\n.\nThe module takes three sequences, a query sequence, a key sequence, and a value sequence. The qu

# TOOLS TO DETERMINE WHERE TO USE VECTOR STORE OR WEBSEARCGH

In [33]:

from langchain.tools import tool

@tool
def router_tool(question):
    """Router Function"""
    if 'self-attention' in question:
        return 'vectorstore'
    else:
        return 'web_search'

# ROUTER AGENT

In [34]:
Router_Agent = Agent(
    role = "Router",
    goal = 'Route user question to a vector or web search',
    backstory = (
        "You are a expert at routing a user question to a vector store or web search."
        "Use the vectorestore for questions on concept related to Retrieval-Augmented Generation."
        "You do not need to be stringent with the keywords in the question related to these topic. Otherwise, use web-search."   
    ),
    verbose = True,
    allow_delegation= False,
    llm = llm,
)

# RETRIEVAL AGENT 

In [35]:
Retriever_Agent = Agent(
    role = "Retriever",
    goal = 'Use the information retrieved from the vectorstore to answer the question',
    backstory = (
        "You are an assistant for question-answer task."
        "Use the information present in the retrieved context to answer the question."
        "You have to provide a clear concise answer."   
    ),
    verbose = True,
    allow_delegation= False,
    llm = llm,
)

# GRADER AGENT

In [36]:
Grader_agent = Agent(
    role = 'Answer Grader',
    goal = 'Filter out erroneous retrieval',
    backstory = (
        "You are a grader assessing relevance of a retrieved document to a user question."
        "If the document contains keywords related to the user question, grade it as relevant."
        "It does not need to be a strinent test. You have to make sure that the answer is relevant to the question."   
    ),
    verbose = True,
    allow_delegation= False,
    llm = llm,
)

# HALLUCINATION GRADER AGENT

In [37]:
hallucination_grader = Agent(
    role = 'Hallucination Grader',
    goal = 'Filter out hallucination',
    backstory = (
        "You are a hallucination grader assessing whether an answer is grounded in / supported by a set of facts."
        "Make sure you meticulously review the answer and check if the response provided is in alignment with the question asked." 
    ),
    verbose = True,
    allow_delegation= False,
    llm = llm,
)

# ANSWER GRADER AGENT

In [38]:
answer_grader = Agent(
    role = 'Answer Grader',
    goal = 'Filter out hallucination from the answer',
    backstory = (
        "You are a grader assessing whether an answer is useful to resolve a question"
        "Make sure you meticulously review the answer and check if the response provided is in alignment with the question asked." 
        "If the answer is relevant generate a clear and concise response."
        "If the answer generated is not relevant then perform a websearch using 'web_search_tool' "
    ),
    verbose = True,
    allow_delegation= False,
    llm = llm,
)

# ROUTER TASK

In [None]:
router_task = Task(
    description=(
        "Analyze the keywords in the question: {question}. "
        "Based on the keywords, decide whether it is eligible for a vectorstore search or a web search. "
        "Return a single word 'vectorstore' if it is eligible for vectorstore search. "
        "Return a single word 'websearch' if it is eligible for web search. "
        "Do not provide any other preamble or explanation."
    ),
    expected_output=(
        "Give a binary choice: 'websearch' or 'vectorstore'. "
        "Do not provide any other preamble or explanation."
    ),
    agent=Router_Agent,
    tools=[router_tool],
)


# RETRIEVER TASK

In [None]:
retriever_task = Task(
    description=(
        "Your job is to retrieve relevant information for the question: {question}. "
        "If the previous output (from the routing step) was 'websearch', use the web_search_tool. "
        "If it was 'vectorstore', use the rag_tool. "
        "Do not call any task. Only use the tools provided to gather information."
    ),
    expected_output=(
        "Return a clear and concise answer to the question. "
        "Choose the appropriate tool (web_search_tool or rag_tool) based on the previous routing decision."
    ),
    agent=Retriever_Agent,
    context=[router_task],
    tools=[web_search_tool, rag_tool],
)



# GRADER TASK

In [None]:
grader_task = Task(
    description=(
        "Based on the response from the retriever task and the original question: {question}, "
        "evaluate whether the retrieved content is aligned with the question."
    ),
    expected_output=(
        "Return a binary score: 'yes' or 'no'. "
        "Return 'yes' if the response from 'retriever_task' aligns with the question. "
        "Return 'no' if it does not. "
        "Do not provide any other explanation or preamble."
    ),
    agent=Grader_agent,
    context=[retriever_task],
)


# HALLUCINATION TASK

In [None]:
hallucination_task = Task(
    description=(
        "Based on the response from the retriever task and the original question: {question}, "
        "evaluate whether the answer is grounded in fact or hallucinated."
    ),
    expected_output=(
        "Return a binary score: 'yes' or 'no'. "
        "Return 'yes' if the answer is useful and factually aligned with the question. "
        "Return 'no' if the answer is not useful or not factual. "
        "Do not provide any other explanation or preamble."
    ),
    agent=hallucination_grader,
    context=[grader_task],
)


# ANSWER TASK

In [None]:
answer_task = Task(
    description=(
        "Based on the response from the retriever task and the original question: {question}, "
        "determine whether the answer is useful. "
        "If the hallucination_task output is 'yes', return a clear and concise final answer. "
        "If the hallucination_task output is 'no', perform a web search using the web_search_tool and return the response."
    ),
    expected_output=(
        "Return a clear and concise answer if the hallucination_task result is 'yes'. "
        "If it is 'no', perform a web search using the web_search_tool and return the result. "
        "If no relevant result is found, return: 'Sorry! Unable to find a valid response.'"
    ),
    agent=answer_grader,
    context=[hallucination_task],
    tools=[web_search_tool],  # required if fallback is triggered
)


# RAG CREW

In [44]:
rag_crew = Crew(
    agents = [Router_Agent, Router_Agent, Grader_agent, hallucination_grader, answer_grader],
    tasks = [router_task, retriever_task, grader_task, hallucination_task, answer_task],
    verbose = True,
)



# Asking Question 1

In [45]:
inputs = {"question": "How does self-attention mechanism help large language models?"}
result = rag_crew.kickoff(inputs=inputs)

[1m[95m [DEBUG]: == Working Agent: Router[00m
[1m[95m [INFO]: == Starting Task: Analyze the keywords in the question: How does self-attention mechanism help large language models?. Based on the keywords, decide whether it is eligible for a vectorstore search or a web search. Return a single word 'vectorstore' if it is eligible for vectorstore search. Return a single word 'websearch' if it is eligible for web search. Do not provide any other preamble or explanation.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: The question seems to be related to the concept of self-attention mechanism in large language models, which is a topic in the field of Retrieval-Augmented Generation.
Action: router_tool
Action Input: {"question": "How does self-attention mechanism help large language models?"}[0m[95m 

vectorstore
[00m
[32;1m[1;3mFinal Answer: vectorstore[0m

[1m> Finished chain.[0m
[1m[92m [DEBUG]: == [Router] Task output: vectorstore

[00m
[1m

In [47]:
print(result)

The self-attention mechanism helps large language models by enabling them to weigh the importance of different elements in an input sequence and dynamically adjust their influence on the output. This is especially important for language processing tasks, where the meaning of a word can change based on its context within a sentence or document. The self-attention mechanism allows the model to selectively focus on the parts of an input sequence that are most salient, and to build up context and relations over tokens in the natural way as it would for the previous set of tokens (even across long sequences).


# Asking question 2

In [49]:
inputs = {"question": "Explain LLM using the web_search?"}
result = rag_crew.kickoff(inputs=inputs)

[1m[95m [DEBUG]: == Working Agent: Router[00m
[1m[95m [INFO]: == Starting Task: Analyze the keywords in the question: Explain LLM using the web_search?. Based on the keywords, decide whether it is eligible for a vectorstore search or a web search. Return a single word 'vectorstore' if it is eligible for vectorstore search. Return a single word 'websearch' if it is eligible for web search. Do not provide any other preamble or explanation.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I need to analyze the keywords in the question to determine whether it's eligible for a vectorstore search or a web search.

Action: router_tool

Action Input: {"question": "Explain LLM using the web_search?"}
[0m[95m 

web_search
[00m
[32;1m[1;3mFinal Answer: websearch[0m

[1m> Finished chain.[0m
[1m[92m [DEBUG]: == [Router] Task output: websearch

[00m
[1m[95m [DEBUG]: == Working Agent: Retriever[00m
[1m[95m [INFO]: == Starting Task: Your job is to ret

In [50]:
print(result)

Large language models, also known as LLMs, are very large deep learning models that are pre-trained on vast amounts of data. The underlying transformer is a set of neural networks that consist of an encoder and a decoder with self-attention capabilities. The encoder and decoder extract meanings from a sequence of text and understand the relationships between words and phrases in it.
