In [None]:
!pip install -q langchain_community tiktoken langchain-groq chromadb langchain-core langchain_google_genai faiss-cpu cohere

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m584.3/584.3 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m49.3 MB/s[0m eta [36m0:00:00[

In [None]:
import os

from google.colab import userdata
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
os.environ['COHERE_API_KEY'] = userdata.get('COHERE_API_KEY')

In [None]:
# required libraries

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS, Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.load import dumps, loads
from operator import itemgetter

In [None]:
# load data
loader = WebBaseLoader('https://lilianweng.github.io/posts/2023-06-23-agent/')
data = loader.load()

In [None]:
# split data into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(data)

In [None]:
# embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vectorstore = Chroma.from_documents(chunks, embeddings)

In [None]:
# retriever
retriever = vectorstore.as_retriever()

In [None]:
# RAG Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiplt search queries realted to: {question} \n
Output (4 queries):
"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [None]:
llm = ChatGroq(model_name='llama-3.1-70b-versatile')

generate_queries = (
    prompt_rag_fusion
    |llm
    |StrOutputParser()
    |(lambda x: x.split("\n"))
)

In [None]:
# generated queries
generate_queries

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='You are a helpful assistant that generates multiple search queries based on a single input query. \n\nGenerate multiplt search queries realted to: {question} \n\nOutput (4 queries):\n'))])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7d0965b180a0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7d0965b19b40>, model_name='llama-3.1-70b-versatile', groq_api_key=SecretStr('**********'))
| StrOutputParser()
| RunnableLambda(...)

In [None]:
def reciprocal_rank_fusion(results: list[list], k=60):
  """ Reciprocal_rank_fusion that takes multiple lists of ranked documents
        and an optional parameter k used in the RRF formula """

  fused_scores = {}

  for docs in results:

    for rank, doc in enumerate(docs):

      doc_str = dumps(doc)

      if doc_str not in fused_scores:
        fused_scores[doc_str] = 0

      previous_scores = fused_scores[doc_str]

      fused_scores[doc_str] += 1 / (rank + k)

  reranked_results = [
      (loads(doc), score)
      for doc, score in sorted(fused_scores.items(), key=lambda x:x[1], reverse=True)
  ]


  return reranked_results

In [None]:
# retrieval Chain
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion

In [None]:
# RAG

template = """Answer the following questions based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatGroq(model_name='llama-3.1-70b-versatile')

final_rag_chain = ({"context":retrieval_chain_rag_fusion, 'question':itemgetter('question')}
 |prompt
 |llm
 |StrOutputParser()
 )

In [None]:
final_rag_chain.invoke({'question':'What is task decomposition for LLM agents?'})

'Task decomposition for LLM agents is a process where a complicated task is broken down into smaller, simpler steps that can be executed by the agent. This can be achieved through various methods, including:\n\n1. Using chain of thought (CoT) prompting technique, where the model is instructed to "think step by step" to decompose hard tasks into smaller and simpler steps.\n2. Exploring multiple reasoning possibilities at each step using Tree of Thoughts (Yao et al. 2023).\n3. Decomposing the problem into multiple thought steps and generating multiple thoughts per step, creating a tree structure.\n4. Using simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?".\n5. Using task-specific instructions, such as "Write a story outline." for writing a novel.\n6. Using human inputs.\n\nTask decomposition allows the LLM agent to plan ahead and execute complex tasks by breaking them down into manageable and simpler steps.'

In [None]:
question = "explain the Maximum Inner Product Search (MIPS)"

final_rag_chain.invoke({'question':question})

"Maximum Inner Product Search (MIPS) is a technique used in large language models (LLMs) to optimize the retrieval speed of information from an external memory. In an LLM-powered autonomous agent system, LLM functions as the agent's brain, complemented by several key components, including memory.\n\nThe external memory can alleviate the restriction of finite attention span by saving the embedding representation of information into a vector store database that can support fast MIPS. To optimize the retrieval speed, the common choice is the approximate nearest neighbors (ANN) algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup.\n\nIn other words, MIPS is a method used to quickly find the most similar vectors in a large database, which is a crucial task in many applications, including natural language processing and computer vision. The idea is to use an ANN algorithm to quickly narrow down the search space and then perform a mo