In [None]:
import add_packages
import config
from pprint import pprint	
import os
import bs4

import ast
from langchain_core.retrievers import BaseRetriever

from toolkit.langchain import (
	document_loaders, text_splitters, text_embedding_models, stores, 
	models, prompts, utils, output_parsers, agents, output_parsers, documents,
	runnables, chains

)

In [None]:
from operator import itemgetter
from toolkit import sql 
from typing import Any, Awaitable, Callable, Optional, Type, Union, Dict, List

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.conversation.base import ConversationChain
from langchain.chains.llm import LLMChain
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.chains.sql_database.query import create_sql_query_chain
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, tool
from langchain.tools import StructuredTool
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_community.utilities import SQLDatabase
from langchain_core.callbacks import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun
from langchain_core.embeddings import Embeddings
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_core.language_models.chat_models import  BaseChatModel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, Runnable
from langchain_core.tools import ToolException, ValidationError
from langchain_core.vectorstores import VectorStore

# Usecase

## Q&A with RAG 

### Base

In [None]:
import bs4
from langchain import hub
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.messages import AIMessage, HumanMessage

In [None]:
# Indexing: Load
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = document_loaders.web_base_loader(
  web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
  bs_kwargs={"parse_only": bs4_strainer}
)
docs = loader.load()

# Indexing: Split
text_splitter = text_splitters.recursive_character_text_splitter(
  chunk_size=1000, chunk_overlap=200, add_start_index=True,
)
all_splits = text_splitter.split_documents(docs)

In [None]:
qdrant_instance = stores.QdrantWrapper(
  collection_name="my-rag",
  qdrant_host=os.getenv("QDRANT_HOST"),
  qdrant_api_key=os.getenv("QDRANT_API_KEY"),
  default_search_type="similarity",
  default_search_kwargs={"k": 6},
  retriever_tool_name="search_state_of_union",
  retriever_tool_description="Searches and returns excerpts from the 2022 State of the Union.",
)

In [None]:
# Indexing: Store
qdrant_instance.vector_store.add_documents(documents=all_splits)

In [None]:
# Retrieval and Generation: Retrieve
query = "What are the approaches to Task Decomposition?"
retrieved_docs = qdrant_instance.invoke_retriever(query)

In [None]:
# Retrieval and Generation: Generate

chat = models.chat_openai

prompt = prompts.rag_prompt

rag_chain = (
  {
    "context": qdrant_instance.retriever | utils.format_docs, 
    "question": RunnablePassthrough()
  }
  | prompt
  | chat
  | output_parsers.StrOutputParser()
)

example_messages = prompt.invoke(
  {
    "context": "filter context",
    "question": "filter question"
  }
).to_messages()

In [None]:
for chunk in rag_chain.stream("What is Task Decomposition?"):
  print(chunk, end="", flush=True)

### Add Source

In [None]:
# Adding sources
rag_chain_from_dos = (
  RunnablePassthrough.assign(context=(lambda x: utils.format_docs(x["context"])))
  | prompt
  | chat
  | output_parsers.str_output_parser()
)
rag_chain_with_source = RunnableParallel(
  {
    "context": qdrant_instance.retriever,
    "question": RunnablePassthrough()
  }
).assign(answer=rag_chain_from_dos)

In [None]:
rag_chain_with_source.invoke("What is Task Decomposition?")

### Add chat history

In [None]:
contextualize_q_chain = (
  prompts.contextualize_q_prompt 
  | chat
  | output_parsers.str_output_parser()
)

def contextualized_question(input: dict):
  if input.get("chat_history"):
    return contextualize_q_chain
  else:
    return input["question"]

rag_chain = (
  RunnablePassthrough.assign(
    context=contextualized_question | qdrant_instance.retriever | utils.format_docs
  )
  | prompts.qa_prompt
  | chat
)


In [None]:
chat_history = []

questions = [
  "What is Task Decomposition?",
  "What are common ways of doing it?"
]

for question in questions:
  ai_msg = rag_chain.invoke({
    "question": question, "chat_history": chat_history
  })
  chat_history.extend([HumanMessage(content=question), ai_msg])

### Streaming

In [None]:
for chunk in rag_chain.stream({
  "question": "What is Task Decomposition", "chat_history": []
}):
  print(chunk.content, flush=True, end='')

### Per-User Retrieval

### Citations

### Use Agents

In [None]:
tools = [
  qdrant_instance.retriever_tool
]

agent_prompt = prompts.prompt_agent_openai_tools

agent = agents.create_openai_tools_agent(
  llm=models.chat_openai,
  tools=tools,
  prompt=agent_prompt,
)
agent_executor = agents.AgentExecutor(agent=agent, tools=tools, verbose=True)

In [None]:
agent_executor.invoke({"input": "hi, i am Bob"})

### Use Local Models

# Tutorials

## [Build a RAG App](https://python.langchain.com/v0.2/docs/tutorials/rag/)

In [None]:
llm = models.chat_openai

In [None]:
loader = document_loaders.WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
		parse_only=bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	),
)
docs = loader.load()

splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200,
)
splits = splitter.split_documents(docs)


In [None]:
vectorstore = stores.chroma.Chroma.from_documents(
	documents=splits, embedding=text_embedding_models.OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever(
	search_type="similarity",
	search_kwargs={
		"k": 6,
	}
)

In [None]:
prompt_tpl_filter_context = """\
You are an AI assistant tasked with filtering a list of retrieved text chunks to only the most relevant ones for answering a given question.

Here is the question:
<question>
{question}
</question>

And here are the retrieved text chunks:
<retrieved_chunks>
{context}
</retrieved_chunks>

Carefully analyze each chunk for how relevant it is to answering the question. Consider the following:
- Does the chunk contain information that helps answer the question?
- Does the chunk provide important context or background for the question?
- Is the chunk focused on the key concepts and entities mentioned in the question?

Create a Python list containing only the most relevant chunks. The list should be a subset of the original retrieved_chunks list. Omit any chunks that are not directly helpful for answering the question. 

Output this filtered list of the most relevant chunks. The format should be a valid Python list, like:
['chunk 1 text', 
'chunk 2 text',
'chunk 3 text']

Remember, the goal is to create a focused list of only the most relevant chunks for answering the original question. Do not include any irrelevant or tangential chunks in your final result list.
"""
prompt_filter_context = prompts.ChatPromptTemplate.from_template(prompt_tpl_filter_context)

prompt_tpl_rag = """\
Here is the question to answer:
<question>
{question}
</question>

And here are the relevant pieces of context that may help answer the question:
<context>
{context_filtered}
</context>

Carefully read the question and context. Think through how the context can be used to answer the question in the <scratchpad> area below:

Now provide your final answer to the question. If the question cannot be answered based on the provided context, simply say "I don't know." Keep your answer to 3 sentences maximum and prioritize conciseness.

Helpful Answer:\
"""
prompt_rag = prompts.ChatPromptTemplate.from_template(prompt_tpl_rag)

def format_docs_to_str(docs: list[document_loaders.Document]):
	return "\n\n".join(doc.page_content for doc in docs)

def format_docs_to_list(docs: list[document_loaders.Document]):
	return [doc.page_content for doc in docs]

chain_rag = runnables.RunnableParallel(
	{
		"question": runnables.RunnablePassthrough(),
		"context": retriever | format_docs_to_list,
	}
)\
  .assign(context_filtered=(
		prompt_filter_context | llm | output_parsers.StrOutputParser() | ast.literal_eval
  )).with_retry()\
  .assign(output=(
		prompt_rag	| llm	| output_parsers.StrOutputParser()
	)).with_retry()

In [None]:
result = chain_rag.invoke(
  "What is Task Decomposition?"
)


# [RAG From Scratch](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&si=dE6TOhGs5KMC1zc7)

[Git](https://github.com/langchain-ai/rag-from-scratch/tree/main)

In [None]:
embeddings = text_embedding_models.OpenAIEmbeddings()
llm = models.chat_openai

## Basic Flow

- [Indexing](https://youtube.com/playlist?list=PLfaIDFEXuae2LXbO1_PKyVJiQ23ZztA0x&si=dE6TOhGs5KMC1zc7), [Slide](https://docs.google.com/presentation/d/1MhsCqZs7wTX6P19TFnA9qRSlxH3u-1-0gWkhBiDG9lQ/edit#slide=id.p)
- [Retrieval](https://youtu.be/LxNVgdIz9sU?si=rmu8kYV1BH_hwEvo), [Slide](https://docs.google.com/presentation/d/124I8jlBRCbb0LAUhdmDwbn4nREqxSxZU1RF_eTGXUGc/edit#slide=id.g267060cc54f_0_0)
- [Generation](https://youtu.be/Vw52xyyFsB8?si=pQqUluFZUrxTnwZP), [Slide](https://docs.google.com/presentation/d/1eRJwzbdSv71e9Ou9yeqziZrz1UagwX8B1kL4TbL5_Gc/edit#slide=id.g2b46f2cb556_0_0)


In [None]:
loader = document_loaders.WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
		parse_only=bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	),
)
doc = loader.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=300, chunk_overlap=50,
)
docs = text_splitter.split_documents(doc)

vectorstore = stores.chroma.Chroma.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()

In [None]:
template = """\
Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = prompts.ChatPromptTemplate.from_template(template)

chain_rag: runnables.Runnable = (
	{
		"context": retriever,
		"question": runnables.RunnablePassthrough()
	}
	| prompt
	| llm
	| output_parsers.StrOutputParser()
)

In [None]:
chain_rag.invoke("What is Task Decomposition?")

## Query Translation

Query transformations are a set of approaches focused on re-writing and / or modifying questions for retrieval.



### [Multi Query](https://youtu.be/JChPi0CRnDY?si=wEgjcc0NHINTvQVh), [Slide](https://docs.google.com/presentation/d/15pWydIszbQG3Ipur9COfTduutTZm6ULdkkyX-MNry8I/edit#slide=id.g268cd4ba153_0_0), [LangChain](https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever/)



### [RAG Fusion](https://youtu.be/77qELPbNgxA?si=uyfzuemn02ktS2xe), [Slide](https://docs.google.com/presentation/d/1EwykmdVSQqlh6XpGt8APOMYp4q1CZqqeclAx61pUcjI/edit#slide=id.g268cfa48f45_0_0), [LangChain](https://python.langchain.com/docs/integrations/retrievers/cohere-reranker/)



### [Decomposition](https://youtu.be/h0OPWlEOank?si=jU3DecxsmxDWi9az), [Slide](https://docs.google.com/presentation/d/1O97KYrsmYEmhpQ6nkvOVAqQYMJvIaZulGFGmz4cuuVE/edit#slide=id.g268fdc1fda2_0_0)



### [Step Back](https://youtu.be/xn1jEjRyJ2U?si=63WfDLTQwBKmUsdW), [Slide](https://docs.google.com/presentation/d/1L0MRGVDxYA1eLOR0L_6Ze1l2YV8AhN1QKUtmNA-fJlU/edit#slide=id.g268cfa65240_0_0)



### [HyDE](https://youtu.be/SaDzIVkYqyY?si=7tFx5bpTiBpy5KkV), [Slide](https://docs.google.com/presentation/d/10MmB_QEiS4m00xdyu-92muY-8jC3CdaMpMXbXjzQXsM/edit#slide=id.g2b872e9a17e_0_0)

### [Routing](https://youtu.be/pfpIndq7Fi8?si=m6SerpLuJdKzIV6A), [Slide](https://docs.google.com/presentation/d/1kC6jFj8C_1ZXDYcFaJ8vhJvCYEwxwsVqk2VVeKKuyx4/edit#slide=id.g26bc3116f45_0_0)



### [Query Structuring](https://youtu.be/kl6NwWYxvbM?si=Vm0MiQL13kI0nr-Q), [Blog](https://blog.langchain.dev/query-construction/)

## Indexing

### [Multi-Representation Indexing](https://youtu.be/gTCU9I6QqCE?si=jQ3Aj9ko3DYVQ1vU), [Slide](https://blog.langchain.dev/semi-structured-multi-modal-rag/)

### [RAPTOR](https://youtu.be/z_6EeA2LDSw?si=E09-N68W93TgBNBC), [Code](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)

### [ColBERT](https://youtu.be/cN6S0Ehm7_8?si=LGBLo-VUonJMXnmR)

# Test

In [None]:
llm = models.chat_openai

In [None]:
loader = document_loaders.WebBaseLoader(
	web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
	bs_kwargs=dict(
		parse_only=bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	),
)
docs = loader.load()

splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200,
)
splits = splitter.split_documents(docs)


In [None]:

vectorstore = stores.chroma.Chroma.from_documents(
	documents=splits, embedding=text_embedding_models.OpenAIEmbeddings(),
)

retriever = vectorstore.as_retriever(
	search_type="similarity",
	search_kwargs={
		"k": 6,
	}
)

my_retriever = stores.create_retriever(
	vectorstore=vectorstore,
	llm=llm,
	retriever_types=['base', 'MultiQueryRetriever', 'RePhraseQueryRetriever'],
)

In [None]:
prompt_tpl_filter_context = """\
You are an AI assistant tasked with filtering a list of retrieved text chunks to only the most relevant ones for answering a given question.

Here is the question:
<question>
{question}
</question>

And here are the retrieved text chunks:
<retrieved_chunks>
{context}
</retrieved_chunks>

Carefully analyze each chunk for how relevant it is to answering the question. Consider the following:
- Does the chunk contain information that helps answer the question?
- Does the chunk provide important context or background for the question?
- Is the chunk focused on the key concepts and entities mentioned in the question?

Create a Python list containing only the most relevant chunks. The list should be a subset of the original retrieved_chunks list. Omit any chunks that are not directly helpful for answering the question. 

Output this filtered list of the most relevant chunks. The format should be a valid Python list, like:
['chunk 1 text', 
'chunk 2 text',
'chunk 3 text']

Remember, the goal is to create a focused list of only the most relevant chunks for answering the original question. Do not include any irrelevant or tangential chunks in your final result list.
"""
prompt_filter_context = prompts.ChatPromptTemplate.from_template(prompt_tpl_filter_context)

prompt_tpl_rag = """\
Here is the question to answer:
<question>
{question}
</question>

And here are the relevant pieces of context that may help answer the question:
<context>
{context_filtered}
</context>

Carefully read the question and context. Think through how the context can be used to answer the question. If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know".

Provide your final answer to the question. If the question cannot be answered based on the provided context, simply say "I don't know." Keep your answer to 3 sentences maximum and prioritize conciseness.

Helpful Answer:\
"""
prompt_rag = prompts.ChatPromptTemplate.from_template(prompt_tpl_rag)

def format_docs_to_str(docs: list[document_loaders.Document]):
	return "\n\n".join(doc.page_content for doc in docs)

def format_docs_to_list(docs: list[document_loaders.Document]):
	return [doc.page_content for doc in docs]

chain_rag = runnables.RunnableParallel(
	{
		"question": runnables.RunnablePassthrough(),
		"context": my_retriever | format_docs_to_list,
	}
)\
  .assign(context_filtered=(
		prompt_filter_context | llm | output_parsers.StrOutputParser() | ast.literal_eval
  )).with_retry()\
  .assign(output=(
		prompt_rag	| llm	| output_parsers.StrOutputParser()
	)).with_retry()

In [14]:
class InputChainRag(BaseModel):
	question: str = Field(description="user question, natural language, NOT sql query")

class MyRagChain:
	def __init__(
		self,
		retriever: BaseRetriever,
		is_debug: bool = False,
		just_return_ctx: bool = False,
  
		tool_name: str = None,
		tool_description: str = None,
		tool_metadata: Optional[Dict[str, Any]] = None,
		tool_tags: Optional[List[str]] = None,
  ) -> None:
   
		self.is_debug = is_debug
		self.just_return_ctx = just_return_ctx
  
		self.tool_name = tool_name
		self.tool_description = tool_description
		self.tool_metadata = tool_metadata
		self.tool_tags = tool_tags
  
		self.prompt_tpl_filter_context = """\
		You are an AI assistant tasked with filtering a list of retrieved text chunks to only the most relevant ones for answering a given question.

		Here is the question:
		<question>
		{question}
		</question>

		And here are the retrieved text chunks:
		<retrieved_chunks>
		{context}
		</retrieved_chunks>

		Carefully analyze each chunk for how relevant it is to answering the question. Consider the following:
		- Does the chunk contain information that helps answer the question?
		- Does the chunk provide important context or background for the question?
		- Is the chunk focused on the key concepts and entities mentioned in the question?

		Create a Python list containing only the most relevant chunks. The list should be a subset of the original retrieved_chunks list. Omit any chunks that are not directly helpful for answering the question. 

		Output this filtered list of the most relevant chunks. The format should be a valid Python list, like:
		['chunk 1 text', 
		'chunk 2 text',
		'chunk 3 text']

		Remember, the goal is to create a focused list of only the most relevant chunks for answering the original question. Do not include any irrelevant or tangential chunks in your final result list.
		"""
		self.prompt_filter_context = prompts.ChatPromptTemplate.from_template(self.prompt_tpl_filter_context)

		self.prompt_tpl_rag = """\
		Here is the question to answer:
		<question>
		{question}
		</question>

		And here are the relevant pieces of context that may help answer the question:
		<context>
		{context_filtered}
		</context>

		Carefully read the question and context. Think through how the context can be used to answer the question. If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know".

		Provide your final answer to the question. If the question cannot be answered based on the provided context, simply say "I don't know." Keep your answer to 3 sentences maximum and prioritize conciseness.

		Helpful Answer:\
		"""
		self.prompt_rag = prompts.ChatPromptTemplate.from_template(self.prompt_tpl_rag)

		self.retriever = retriever
  
		self.chain_rag = runnables.RunnableParallel(
			{
				"question": runnables.RunnablePassthrough(),
				"context": self.retriever | self.format_docs_to_list,
			}
		)\
			.assign(context_filtered=(
				self.prompt_filter_context | llm | output_parsers.StrOutputParser() | ast.literal_eval
			)).with_retry()
		if self.just_return_ctx:
			self.chain_rag.assign(result=(
				self.prompt_rag	| llm	| output_parsers.StrOutputParser()
			)).with_retry()
		
		self.metadata_chain_rag = {"is_my_rag_chain_run": True}

	def format_docs_to_str(self, docs: list[document_loaders.Document]):
		return "\n\n".join(doc.page_content for doc in docs)

	def format_docs_to_list(self, docs: list[document_loaders.Document]):
		return [doc.page_content for doc in docs]

	def prepare_chain_input(self, question: str):
		result = question
		return result
	
	def invoke_chain(self, question: str) -> Union[str, dict]:
		"""Get natural user question, turn it into SQL query and execute"""
		# question = self.prepare_chain_input(question)
		result = self.chain_rag.invoke(
			question,
			config={"metadata": self.metadata_chain_rag},
		)
		
		if not self.is_debug and self.just_return_ctx:
			result = result["context_filtered"]

		return result
	
	async def ainvoke_chain(self, question: str) -> Union[str, dict]:
		"""Get natural user question, turn it into SQL query and execute"""
		# question = self.prepare_chain_input(question)
		result = await self.chain_rag.ainvoke(
			question,
			config={"metadata": self.metadata_chain_rag},
		)
		
		if not self.is_debug and self.just_return_ctx:
			result = result["context_filtered"]

		return result

	def create_tool_chain_rag(
		self,
		func: Callable = None,
		args_schema: Type[BaseModel] = None,
		coroutine: Optional[Callable[..., Awaitable[Any]]] = None,
		name: str = None,
		description: str = None,
		return_direct: bool = False, # True: Agent will stop after tool completed
		handle_tool_error: Optional[Union[bool, str, Callable[[ToolException], str]]] = True,
		handle_validation_error: Optional[Union[bool, str, Callable[[ValidationError], str]]] = True,
		verbose: bool = False,
		metadata: Optional[Dict[str, Any]] = None,
		tags: Optional[List[str]] = None,
	):
		func = self.invoke_chain if func is None else func
		args_schema = InputChainRag if args_schema is None else args_schema
		coroutine = self.ainvoke_chain if coroutine is None else coroutine
		
		name = self.tool_name if name is None else name
		description = self.tool_description if description is None else description
		metadata = self.tool_metadata if metadata is None else metadata
		tags = self.tool_tags if tags is None else tags
		
		tool_chain_rag = StructuredTool.from_function(
			func=func,
			args_schema=args_schema,
			coroutine=coroutine,
			name=name,
			description=description,
			return_direct=return_direct,
			handle_tool_error=handle_tool_error,
			handle_validation_error=handle_validation_error,
			verbose=verbose,
			metadata=metadata,
			tags=tags,
		)

		return tool_chain_rag

...

Ellipsis

In [15]:
my_rag_chain = MyRagChain(
  retriever=my_retriever,
  is_debug=False,
	just_return_ctx=True,
	tool_name=None,
	tool_description=None,
	tool_metadata=None,
	tool_tags=None,
)

In [None]:
result = my_rag_chain.chain_rag.invoke(
  "What is Task Decomposition?"
)

In [16]:
result = my_rag_chain.invoke_chain(
  question="What is Task Decomposition?"
)

In [18]:
pprint(len(result))

2
