In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY= os.getenv('OPENAI_API_KEY')
LANGCHAIN_TRACING_V2= os.getenv('LANGCHAIN_TRACING_V2')
LANGCHAIN_ENDPOINT= os.getenv('LANGCHAIN_ENDPOINT')
LANGCHAIN_API_KEY= os.getenv('LANGCHAIN_API_KEY')

### Indexing

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

## Load documents
loader= WebBaseLoader(
    web_paths= ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs= dict(
        parse_only= bs4.SoupStrainer(
            class_= ("post-content", "post_title", "post-header")
        )
    ),
)
## Load content
blog_docs= loader.load()


### Splitting the docs and creating a retriever

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter= RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)
splits= text_splitter.split_documents(blog_docs)

##Retriever
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
vectorstore= Chroma.from_documents(documents= splits,
                                   embedding= OpenAIEmbeddings())
retriever= vectorstore.as_retriever()


## Usage of Multi Query
* Here we learn how multi query concept is used to pick the best chunks of data
* Usually there may be questions which might not have exact meaning that user wants to ask
* So In Multi query We will create a chian link which says as below:
    * Take the question
    * Create 5 different forms of same question
    * Loop each question
        * Get the related chunks of data of question
        * Append all the chunks (unique chunk of data)
        * Create a chain which gives related chunks of data (just as we create from llm output)
    * Get all the chunks of data and append to the context and pass it to LLM

### Create prompt and chain

In [8]:
from langchain.prompts import ChatPromptTemplate
## Multi Query
template= """
You are an AI language model assistant. Your task is to generate five different versions of the
given user question to retrieve relevant documents from a vector database. By generating multiple 
perspectives on the user question, your goal is to help the user overcome some of the limitations of 
the distance-based similarity search.
Provide these alternative questions seperated by new line.
Original question: {question}
"""
prompt_perspective= ChatPromptTemplate.from_template(template)

##Create chain
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries= (
    prompt_perspective
    | ChatOpenAI(temperature=0.1)
    | StrOutputParser()
    | (lambda x: x.split("\n")) ## This will return list of questions with different perspectives
)



* Get the questions loop every question and get the required docs from vectorstore db

In [10]:
from langchain.load import loads, dumps

##Function to get the list of docs
def get_unique_union(documents: list[list]):
    """Unique union of required docs"""
    ## Flatten the list of list and convert each document to string
    flatten_docs= [dumps(doc) for sublist in documents for doc in sublist]
    ## Get unique docs
    unique_docs= list(set(flatten_docs))
    return [loads(doc) for doc in unique_docs]

##Retriever
question= "What is task decomposition for LLM agents?"
retrieval_chain= (
    generate_queries
    | retriever.map() | get_unique_union
)
docs= retrieval_chain.invoke({"question": question})
len(docs)
    


  warn_beta(


6

### Now get these docs and check question

In [15]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

## RAG
template= """  
Answer the following question based on this context:
{context}

Question: {question}
"""

prompt= ChatPromptTemplate.from_template(template)
llm= ChatOpenAI(temperature=0.1)
final_rag_chain=(
    {"context": retrieval_chain,
        "question":itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

# final_rag_chain.invoke( question)
final_rag_chain.invoke({"question": question})

'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This enables the agent to efficiently handle complex tasks by dividing them into smaller and simpler steps. Task decomposition can be achieved through techniques such as Chain of Thought (CoT) and Tree of Thoughts, as well as through simple prompting or task-specific instructions.'

# RAG-Fusion
* This is just like Multi query where it asks to create different questions out of single provided question.
* But when the docs are retrieved it will rank those documents using Reciprocal rank fusion
* Picks the best docs and then respond back to the actual question

In [16]:
## Prompt
from langchain.prompts import ChatPromptTemplate
# Rag Fusion related prompt
template=""" 
You are a helpful assistant that generates multiple search queries based on a single query input query.
Generate multiple search queries related to: {question}\n
output (4 queries):
"""
prompt_rag_fusion= ChatPromptTemplate.from_template(template= template)

In [17]:
### Generate queries
from langchain.output_parsers import StructuredOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion
    | ChatOpenAI(temperature=0.1)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [18]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

Failed to batch ingest runs: TypeError('sequence item 0: expected str instance, ReadTimeoutError found')


7

In [20]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template= """
    Answer the following question based on this context:
    {context}
    
    Question: {question}
"""

prompt= ChatPromptTemplate.from_template(template)

final_rag_chain= (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task decomposition for LLM agents involves breaking down complex tasks into smaller and simpler steps. This can be achieved through techniques like Chain of Thought (CoT) and Tree of Thoughts, which prompt the model to think step by step and explore multiple reasoning possibilities at each step. Task decomposition can be done using simple prompting, task-specific instructions, or with human inputs to help the LLM agent effectively plan and execute tasks.'

## Decomposition

* We will split the question into sub questions and pass it to llm to get docs
    * Questions we get will be decomposed to multiple questions
    * Eg: What is the structure of Ameoba in sea?
    * The above question is divided into 3 parts(3 is dynamic)
        * In what sea do we find Ameoba
        * What is the life history of Ameoba
        * Waht is the structure of Ameoba
    * The prompt dynamically changes for each question
    * eg: We have got first question please find the answer for that and over all context
    * For third question: We have got details of first and second question and over all context, please answer for the third question
    * and so on..
    * The above method is call Answering recursively
* One more method is answering individually where the answers we take from different questions doesn't need to recursively go to next question context. Instead it will append one by one

In [21]:
# prompt
template="""
        You are helpful assitant that generates multiple sub-questions related to an input question.
        The goal is to break down the input into a set of Sub-problems / Sub-questions that can be answered in isolation.
        \n Generate multiple search queries related to: {question} \n
        Output (3 queries):
"""
prompt_decomposition= ChatPromptTemplate.from_template(template)

In [22]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

## llm
llm= ChatOpenAI(temperature=0.1)
#Chain
generate_queries_decomposition= (
    prompt_decomposition
    | llm
    | StrOutputParser()
    | (lambda x: x.split('\n'))
)
# Run
question= "What are the main components of an LLM-powered autonomous agent system?"
questions= generate_queries_decomposition.invoke({"question": question})

In [23]:
questions

['1. What is the role of machine learning in an LLM-powered autonomous agent system?',
 '2. How do the components of an LLM-powered autonomous agent system work together?',
 '3. What are the specific functions of each component in an LLM-powered autonomous agent system?']

## Answering recursively
* We create a template which answers recursively