# Imports

In [None]:
import packages

from context.utils import typer as t
from context.infra.clients import logger

from toolkit.utils import utils
from toolkit.utils.utils import rp_print
from toolkit.utils.llm import main as utils_llm

import context.instances as inst
import context.consts as const
import context.settings.main as settings_main

from toolkit.llm.langchain.core import integration, utils as utils_lc
from toolkit.llm.langchain.data.indexing import (
    documents, document_loaders, text_splitters,
)
from toolkit.llm.langchain.data.persistence import retrievers
from toolkit.llm.langchain.execution import (
    runnables, graphs, tools, agents, tools as tools_lc
)
from toolkit.llm.langchain.models import (
    prompts as prompts_lc, llms, messages,
)

In [2]:
vector_store = inst.vector_store_in_memory
vector_stores = inst.vector_stores_qdrant

COLLS = settings_main.VEC_STR_COLLS

llm = inst.llm_main

# Dev

## Load Data

In [6]:
loader = document_loaders.WebBaseLoader(
	web_path="https://lilianweng.github.io/posts/2023-06-23-agent/",
	bs_kwargs={
		"parse_only": document_loaders.bs4.SoupStrainer(
			class_=("post-content", "post-title", "post-header")
		)
	}
)
docs = loader.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=200, add_start_index=True,
)
splits = text_splitter.split_documents(docs)

len_splits = len(splits)
for i, doc in enumerate(splits):
	if i < len_splits//3:
		doc.metadata["section"] = "beginning"
	elif i < (len_splits*2)//3:
		doc.metadata["section"] = "middle"
	else:
		doc.metadata["section"] = "end"

document_ids = vector_store.add_documents(documents=splits)


## Algo

### Basic

In [2]:
# prompt: runnables.Runnable = integration.hub.pull("rlm/rag-prompt")
# prompt = prompts.PromptTemplate.from_template(prompts.prompts["RAG"])

vector_store = inst.vector_stores_qdrant["vehicle"]

class Search(t.TypedDict):
	"""Search Query"""

	query: t.Annotated[str, ..., "Search query to run."]
	section: t.Annotated[
		t.Literal["beginning", "middle", "end"],
		...,
		"Section to query.",
	]

class State(t.TypedDict):
	question: str
	query: Search
	context: t.List[documents.Document]
	answer: str

def node_analyze_query(state: State):
	llm_structured = llms.create_structured_llm(inst.llm_main, Search)
	query = llm_structured.invoke(state["question"])
	return {
		"query": query
	}

@tools.tool(response_format="content_and_artifact")
def node_retrieve(query: str):
	"""Retrieve information related to a query."""
	retrieved_docs = vector_store.similarity_search(
		query=query, k=2,
		# filter=lambda doc: doc.metadata.get("section") == query["section"],
  )
	serialized = "\n\n".join(
		(f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
		for doc in retrieved_docs
	)
	return serialized, retrieved_docs

node_tools = graphs.ToolNode([node_retrieve])

def node_query_or_respond(state: graphs.MessagesState):
	"""
	Generate an AI message that may include a tool call for information 
	retrieval or respond accordingly.
	"""
	tooled_llm = llms.create_tooled_llm(inst.llm_main, [node_retrieve])
	response = tooled_llm.invoke(state["messages"])
	# MessagesState appends messages to state instead of overwriting
	return {
		"messages": [response],
	}

def node_generate(state: graphs.MessagesState):
	"""Generate a response using the retrieved content."""
	messages_tool = []
	for message in reversed(state["messages"]):
		if message.type == messages.TypeMsg.TOOL:
			messages_tool.append(message)
		else: break
	
	messages_tool = messages_tool[::-1]

	docs_content = utils_lc.extract_content(messages_tool)

	prompt_system_rag: str = prompts.prompts["RAG"]["system"]
	prompt_system_rag = prompt_system_rag.replace("{context}", docs_content)

	message_system = messages.SystemMessage(prompt_system_rag)
 
	messages_conversation = [
		message
		for message in state["messages"]
		if message.type in (messages.TypeMsg.HUMAN, messages.TypeMsg.SYSTEM)
		or (message.type == messages.TypeMsg.AI and not message.tool_calls)
	]
 
	prompt = [message_system] + messages_conversation
 
	response = inst.llm_main.invoke(prompt)

	return {
		"messages": [response]
	}

# graph_builder = graphs.StateGraph(state_schema=State)
graph_builder = graphs.StateGraph(graphs.MessagesState)

graph_builder.add_node(node_query_or_respond)
graph_builder.add_node(node_tools)
graph_builder.add_node(node_generate)

graph_builder.set_entry_point(node_query_or_respond.__name__)
graph_builder.add_conditional_edges(
	node_query_or_respond.__name__,
	graphs.tools_condition,
	{
		graphs.END: graphs.END,
		node_tools.name: node_tools.name,
	}
)
graph_builder.add_edge(node_tools.name, node_generate.__name__)
graph_builder.add_edge(node_generate.__name__, graphs.END)

memory = graphs.MemorySaver()
graph = graph_builder.compile(
  # checkpointer=memory
)

config = {
	"configurable": {
		"thread_id": "abc123",
	}
}

# utils.display(utils.Image(graph.get_graph().draw_mermaid_png()))

In [3]:
questions = [
  "Hello",
  "What is Task Decomposition?",
	"What does the end of the post say about Task Decomposition?",
	(
		"What is the standard method for Task Decomposition?\n\n"
		"Once you get the answer, look up common extensions of that method."
	),
]

questions_vehicle = [
	"What are the notes to consider before driving?",
]

question = questions_vehicle[0]


In [None]:
inputs = {
	"messages": [
		{
			"role": "user",
			"content": question,
		}
	]
}
async for step in graph.astream(inputs, stream_mode="updates", config=config):
  rp_print(step)

### Agents

#### Basic

In [4]:
agent_executor = agents.create_react_agent(
	model=inst.llm_main, tools=[node_retrieve], checkpointer=memory,
)

In [None]:
graphs.display_graph(agent_executor)

# Ref

- https://python.langchain.com/docs/tutorials/rag/ ✅
- https://python.langchain.com/docs/tutorials/qa_chat_history/ ✅
- https://python.langchain.com/docs/how_to/qa_sources/
- https://python.langchain.com/docs/concepts/retrieval/
- https://python.langchain.com/docs/integrations/vector_stores/qdrant/
- Graph
  - https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_agentic_rag/
  - https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_adaptive_rag/
  - https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_crag/
  - https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_self_rag/
  - https://langchain-ai.github.io/langgraph/tutorials/sql-agent/

**Components**

https://python.langchain.com/docs/integrations/components/

- Document Loaders
  - https://python.langchain.com/docs/how_to/#document-loaders
  - https://python.langchain.com/docs/integrations/document_loaders/
  - https://python.langchain.com/api_reference/core/document_loaders/langchain_core.document_loaders.base.BaseLoader.html
- Text Splitters
  - https://python.langchain.com/docs/concepts/text_splitters/
  - https://python.langchain.com/docs/how_to/#text-splitters
  - https://python.langchain.com/api_reference/text_splitters/base/langchain_text_splitters.base.TextSplitter.html
- Embeddings
  - https://python.langchain.com/docs/concepts/embedding_models/
  - https://python.langchain.com/docs/how_to/embed_text/
  - https://python.langchain.com/docs/integrations/text_embedding/
  - https://python.langchain.com/api_reference/core/embeddings/langchain_core.embeddings.Embeddings.html
- Vector Stores
  - https://python.langchain.com/docs/concepts/vector_stores/
  - https://python.langchain.com/docs/how_to/vector_stores/
  - https://python.langchain.com/docs/integrations/vector_stores/
  - https://python.langchain.com/api_reference/core/vector_stores/langchain_core.vector_stores.base.vector_store.html
- Retrievers
  - https://python.langchain.com/docs/concepts/retrievers/
  - https://python.langchain.com/docs/how_to/#retrievers
- LLMs
  - https://python.langchain.com/docs/concepts/chat_models/
  - https://python.langchain.com/docs/how_to/#chat-models
  - https://python.langchain.com/docs/integrations/chat/
  - https://python.langchain.com/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html
- Tools
  - https://python.langchain.com/docs/concepts/tools/
  - https://python.langchain.com/api_reference/core/tools/langchain_core.tools.convert.tool.html
  - https://python.langchain.com/docs/how_to/custom_tools/

**Concepts**

- https://python.langchain.com/docs/concepts/structured_outputs/
- https://python.langchain.com/docs/concepts/chat_history/

**Techniques**

- https://python.langchain.com/docs/how_to/#query-analysis
- https://python.langchain.com/docs/how_to/message_history/