In [1]:
import add_packages
import config
from pprint import pprint	
import os
import bs4

import ast

from toolkit.langchain import (
	document_loaders, text_splitters, text_embedding_models, stores, 
	models, prompts, utils, output_parsers, agents, output_parsers, documents,
	runnables, chains

)

# Usecase

## Q&A with RAG 

### Base

In [2]:
import bs4
from langchain import hub
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.messages import AIMessage, HumanMessage

In [3]:
# Indexing: Load
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = document_loaders.web_base_loader(
  web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
  bs_kwargs={"parse_only": bs4_strainer}
)
docs = loader.load()

# Indexing: Split
text_splitter = text_splitters.recursive_character_text_splitter(
  chunk_size=1000, chunk_overlap=200, add_start_index=True,
)
all_splits = text_splitter.split_documents(docs)

In [2]:
qdrant_instance = stores.QdrantWrapper(
  collection_name="my-rag",
  qdrant_host=os.getenv("QDRANT_HOST"),
  qdrant_api_key=os.getenv("QDRANT_API_KEY"),
  default_search_type="similarity",
  default_search_kwargs={"k": 6},
  retriever_tool_name="search_state_of_union",
  retriever_tool_description="Searches and returns excerpts from the 2022 State of the Union.",
)

[32m2024-03-07 10:45:10.146[0m | [1mINFO    [0m | [36mtoolkit.langchain.stores[0m:[36m__init__[0m:[36m99[0m - [1mFound collection: `my-rag`.[0m


In [6]:
# Indexing: Store
qdrant_instance.vector_store.add_documents(documents=all_splits)

In [7]:
# Retrieval and Generation: Retrieve
query = "What are the approaches to Task Decomposition?"
retrieved_docs = qdrant_instance.invoke_retriever(query)

In [8]:
# Retrieval and Generation: Generate

chat = models.chat_openai

prompt = prompts.rag_prompt

rag_chain = (
  {
    "context": qdrant_instance.retriever | utils.format_docs, 
    "question": RunnablePassthrough()
  }
  | prompt
  | chat
  | output_parsers.StrOutputParser()
)

example_messages = prompt.invoke(
  {
    "context": "filter context",
    "question": "filter question"
  }
).to_messages()

In [9]:
for chunk in rag_chain.stream("What is Task Decomposition?"):
  print(chunk, end="", flush=True)

Task decomposition involves breaking down complex tasks into smaller and simpler steps to enhance model performance. Techniques like Chain of Thought and Tree of Thoughts help in transforming big tasks into manageable ones by exploring multiple reasoning possibilities at each step. Task decomposition can be done using simple prompting, task-specific instructions, or human inputs.

### Add Source

In [10]:
# Adding sources
rag_chain_from_dos = (
  RunnablePassthrough.assign(context=(lambda x: utils.format_docs(x["context"])))
  | prompt
  | chat
  | output_parsers.str_output_parser()
)
rag_chain_with_source = RunnableParallel(
  {
    "context": qdrant_instance.retriever,
    "question": RunnablePassthrough()
  }
).assign(answer=rag_chain_from_dos)

In [17]:
rag_chain_with_source.invoke("What is Task Decomposition?")

{'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 1585, '_id': '3e36d9d5-a5f2-48cd-9bf9-0e1d127bf068', '_collection_name': 'my-rag'}),
  Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multip

### Add chat history

In [11]:
contextualize_q_chain = (
  prompts.contextualize_q_prompt 
  | chat
  | output_parsers.str_output_parser()
)

def contextualized_question(input: dict):
  if input.get("chat_history"):
    return contextualize_q_chain
  else:
    return input["question"]

rag_chain = (
  RunnablePassthrough.assign(
    context=contextualized_question | qdrant_instance.retriever | utils.format_docs
  )
  | prompts.qa_prompt
  | chat
)


In [12]:
chat_history = []

questions = [
  "What is Task Decomposition?",
  "What are common ways of doing it?"
]

for question in questions:
  ai_msg = rag_chain.invoke({
    "question": question, "chat_history": chat_history
  })
  chat_history.extend([HumanMessage(content=question), ai_msg])

### Streaming

In [19]:
for chunk in rag_chain.stream({
  "question": "What is Task Decomposition", "chat_history": []
}):
  print(chunk.content, flush=True, end='')

Task decomposition involves breaking down a complex task into smaller and simpler steps to make it more manageable for an agent or model. Techniques like Chain of Thought (CoT) and Tree of Thoughts help in decomposing hard tasks into multiple manageable tasks by guiding the model to think step by step or explore multiple reasoning possibilities at each step. Task decomposition can be done using simple prompting, task-specific instructions, or human inputs to guide the agent or model in achieving the overall task goal.

### Per-User Retrieval

### Citations

### Use Agents

In [17]:
tools = [
  qdrant_instance.retriever_tool
]

agent_prompt = prompts.prompt_agent_openai_tools

agent = agents.create_openai_tools_agent(
  llm=models.chat_openai,
  tools=tools,
  prompt=agent_prompt,
)
agent_executor = agents.AgentExecutor(agent=agent, tools=tools, verbose=True)

In [25]:
agent_executor.invoke({"input": "hi, i am Bob"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello Bob! How can I assist you today?[0m

[1m> Finished chain.[0m


{'input': 'hi, i am Bob', 'output': 'Hello Bob! How can I assist you today?'}

### Use Local Models

# Tutorials

## [Build a RAG App](https://python.langchain.com/v0.2/docs/tutorials/rag/)

In [7]:
llm = models.chat_openai

In [4]:
loader = document_loaders.WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
		parse_only=bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	),
)
docs = loader.load()

splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200,
)
splits = splitter.split_documents(docs)


In [49]:
vectorstore = stores.chroma.Chroma.from_documents(
	documents=splits, embedding=text_embedding_models.OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever(
	search_type="similarity",
	search_kwargs={
		"k": 6,
	}
)

In [98]:
prompt_tpl_filter_context = """\
You are an AI assistant tasked with filtering a list of retrieved text chunks to only the most relevant ones for answering a given question.

Here is the question:
<question>
{question}
</question>

And here are the retrieved text chunks:
<retrieved_chunks>
{context}
</retrieved_chunks>

Carefully analyze each chunk for how relevant it is to answering the question. Consider the following:
- Does the chunk contain information that helps answer the question?
- Does the chunk provide important context or background for the question?
- Is the chunk focused on the key concepts and entities mentioned in the question?

Create a Python list containing only the most relevant chunks. The list should be a subset of the original retrieved_chunks list. Omit any chunks that are not directly helpful for answering the question. 

Output this filtered list of the most relevant chunks. The format should be a valid Python list, like:
['chunk 1 text', 
'chunk 2 text',
'chunk 3 text']

Remember, the goal is to create a focused list of only the most relevant chunks for answering the original question. Do not include any irrelevant or tangential chunks in your final result list.
"""
prompt_filter_context = prompts.ChatPromptTemplate.from_template(prompt_tpl_filter_context)

prompt_tpl_rag = """\
Here is the question to answer:
<question>
{question}
</question>

And here are the relevant pieces of context that may help answer the question:
<context>
{context_filtered}
</context>

Carefully read the question and context. Think through how the context can be used to answer the question in the <scratchpad> area below:

Now provide your final answer to the question. If the question cannot be answered based on the provided context, simply say "I don't know." Keep your answer to 3 sentences maximum and prioritize conciseness.

Helpful Answer:\
"""
prompt_rag = prompts.ChatPromptTemplate.from_template(prompt_tpl_rag)

def format_docs_to_str(docs: list[document_loaders.Document]):
	return "\n\n".join(doc.page_content for doc in docs)

def format_docs_to_list(docs: list[document_loaders.Document]):
	return [doc.page_content for doc in docs]

chain_rag = runnables.RunnableParallel(
	{
		"question": runnables.RunnablePassthrough(),
		"context": retriever | format_docs_to_list,
	}
)\
  .assign(context_filtered=(
		prompt_filter_context | llm | output_parsers.StrOutputParser() | ast.literal_eval
  )).with_retry()\
  .assign(output=(
		prompt_rag	| llm	| output_parsers.StrOutputParser()
	)).with_retry()

In [99]:
result = chain_rag.invoke(
  "What is Task Decomposition?"
)


# [RAG From Scratch](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&si=dE6TOhGs5KMC1zc7)

[Git](https://github.com/langchain-ai/rag-from-scratch/tree/main)

In [2]:
embeddings = text_embedding_models.OpenAIEmbeddings()
llm = models.chat_openai

## Basic Flow

- [Indexing](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&si=dE6TOhGs5KMC1zc7), [Slide](https://docs.google.com/presentation/d/1MhsCqZs7wTX6P19TFnA9qRSlxH3u-1-0gWkhBiDG9lQ/edit#slide=id.p)
- [Retrieval](https://youtu.be/LxNVgdIz9sU?si=rmu8kYV1BH_hwEvo), [Slide](https://docs.google.com/presentation/d/124I8jlBRCbb0LAUhdmDwbn4nREqxSxZU1RF_eTGXUGc/edit#slide=id.g267060cc54f_0_0)
- [Generation](https://youtu.be/Vw52xyyFsB8?si=pQqUluFZUrxTnwZP), [Slide](https://docs.google.com/presentation/d/1eRJwzbdSv71e9Ou9yeqziZrz1UagwX8B1kL4TbL5_Gc/edit#slide=id.g2b46f2cb556_0_0)


In [4]:
loader = document_loaders.WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
		parse_only=bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	),
)
doc = loader.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=300, chunk_overlap=50,
)
docs = text_splitter.split_documents(doc)

vectorstore = stores.chroma.Chroma.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()

In [5]:
template = """\
Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = prompts.ChatPromptTemplate.from_template(template)

chain_rag: runnables.Runnable = (
	{
		"context": retriever,
		"question": runnables.RunnablePassthrough()
	}
	| prompt
	| llm
	| output_parsers.StrOutputParser()
)

In [15]:
chain_rag.invoke("What is Task Decomposition?")

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach helps agents to plan and execute tasks more efficiently by transforming big tasks into manageable subtasks. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'

## Query Translation

Query transformations are a set of approaches focused on re-writing and / or modifying questions for retrieval.



### [Multi Query](https://youtu.be/JChPi0CRnDY?si=wEgjcc0NHINTvQVh), [Slide](https://docs.google.com/presentation/d/15pWydIszbQG3Ipur9COfTduutTZm6ULdkkyX-MNry8I/edit#slide=id.g268cd4ba153_0_0), [LangChain](https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever/)



### [RAG Fusion](https://youtu.be/77qELPbNgxA?si=uyfzuemn02ktS2xe), [Slide](https://docs.google.com/presentation/d/1EwykmdVSQqlh6XpGt8APOMYp4q1CZqqeclAx61pUcjI/edit#slide=id.g268cfa48f45_0_0), [LangChain](https://python.langchain.com/docs/integrations/retrievers/cohere-reranker/)



### [Decomposition](https://youtu.be/h0OPWlEOank?si=jU3DecxsmxDWi9az), [Slide](https://docs.google.com/presentation/d/1O97KYrsmYEmhpQ6nkvOVAqQYMJvIaZulGFGmz4cuuVE/edit#slide=id.g268fdc1fda2_0_0)



### [Step Back](https://youtu.be/xn1jEjRyJ2U?si=63WfDLTQwBKmUsdW), [Slide](https://docs.google.com/presentation/d/1L0MRGVDxYA1eLOR0L_6Ze1l2YV8AhN1QKUtmNA-fJlU/edit#slide=id.g268cfa65240_0_0)



### [HyDE](https://youtu.be/SaDzIVkYqyY?si=7tFx5bpTiBpy5KkV), [Slide](https://docs.google.com/presentation/d/10MmB_QEiS4m00xdyu-92muY-8jC3CdaMpMXbXjzQXsM/edit#slide=id.g2b872e9a17e_0_0)

### [Routing](https://youtu.be/pfpIndq7Fi8?si=m6SerpLuJdKzIV6A), [Slide](https://docs.google.com/presentation/d/1kC6jFj8C_1ZXDYcFaJ8vhJvCYEwxwsVqk2VVeKKuyx4/edit#slide=id.g26bc3116f45_0_0)



### [Query Structuring](https://youtu.be/kl6NwWYxvbM?si=Vm0MiQL13kI0nr-Q), [Blog](https://blog.langchain.dev/query-construction/)

## Indexing

### [Multi-Representation Indexing](https://youtu.be/gTCU9I6QqCE?si=jQ3Aj9ko3DYVQ1vU), [Slide](https://blog.langchain.dev/semi-structured-multi-modal-rag/)

### [RAPTOR](https://youtu.be/z_6EeA2LDSw?si=E09-N68W93TgBNBC), [Code](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)

### [ColBERT](https://youtu.be/cN6S0Ehm7_8?si=LGBLo-VUonJMXnmR)

# Test

In [2]:
llm = models.chat_openai

In [3]:
loader = document_loaders.WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
		parse_only=bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	),
)
docs = loader.load()

splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200,
)
splits = splitter.split_documents(docs)


In [4]:

vectorstore = stores.chroma.Chroma.from_documents(
	documents=splits, embedding=text_embedding_models.OpenAIEmbeddings(),
)

retriever = vectorstore.as_retriever(
	search_type="similarity",
	search_kwargs={
		"k": 6,
	}
)

my_retriever = stores.create_retriever(
	vectorstore=vectorstore,
	llm=llm,
	retriever_types=['base', 'MultiQueryRetriever', 'RePhraseQueryRetriever'],
)

[32m2024-06-11 11:39:50.722[0m | [1mINFO    [0m | [36mtoolkit.langchain.stores[0m:[36mcreate_retriever[0m:[36m267[0m - [1mRetrievers: ['base', 'MultiQueryRetriever', 'RePhraseQueryRetriever'][0m


In [9]:
prompt_tpl_filter_context = """\
You are an AI assistant tasked with filtering a list of retrieved text chunks to only the most relevant ones for answering a given question.

Here is the question:
<question>
{question}
</question>

And here are the retrieved text chunks:
<retrieved_chunks>
{context}
</retrieved_chunks>

Carefully analyze each chunk for how relevant it is to answering the question. Consider the following:
- Does the chunk contain information that helps answer the question?
- Does the chunk provide important context or background for the question?
- Is the chunk focused on the key concepts and entities mentioned in the question?

Create a Python list containing only the most relevant chunks. The list should be a subset of the original retrieved_chunks list. Omit any chunks that are not directly helpful for answering the question. 

Output this filtered list of the most relevant chunks. The format should be a valid Python list, like:
['chunk 1 text', 
'chunk 2 text',
'chunk 3 text']

Remember, the goal is to create a focused list of only the most relevant chunks for answering the original question. Do not include any irrelevant or tangential chunks in your final result list.
"""
prompt_filter_context = prompts.ChatPromptTemplate.from_template(prompt_tpl_filter_context)

prompt_tpl_rag = """\
Here is the question to answer:
<question>
{question}
</question>

And here are the relevant pieces of context that may help answer the question:
<context>
{context_filtered}
</context>

Carefully read the question and context. Think through how the context can be used to answer the question. If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know".

Provide your final answer to the question. If the question cannot be answered based on the provided context, simply say "I don't know." Keep your answer to 3 sentences maximum and prioritize conciseness.

Helpful Answer:\
"""
prompt_rag = prompts.ChatPromptTemplate.from_template(prompt_tpl_rag)

def format_docs_to_str(docs: list[document_loaders.Document]):
	return "\n\n".join(doc.page_content for doc in docs)

def format_docs_to_list(docs: list[document_loaders.Document]):
	return [doc.page_content for doc in docs]

chain_rag = runnables.RunnableParallel(
	{
		"question": runnables.RunnablePassthrough(),
		"context": my_retriever | format_docs_to_list,
	}
)\
  .assign(context_filtered=(
		prompt_filter_context | llm | output_parsers.StrOutputParser() | ast.literal_eval
  )).with_retry()\
  .assign(output=(
		prompt_rag	| llm	| output_parsers.StrOutputParser()
	)).with_retry()

In [10]:
result = chain_rag.invoke(
  "What is Task Decomposition?"
)


In [11]:
result

{'question': 'What is Task Decomposition?',
 'context': ['The AI assistant can parse user input to several tasks: [{"task": task, "id", task_id, "dep": dependency_task_ids, "args": {"text": text, "image": URL, "audio": URL, "video": URL}}]. The "dep" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag "-task_id" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.',
  'Finite context length: The restricted context capacity limits the inclusion of h