# Imports

In [1]:
import packages

from context.utils import typer as t
from context.infra.clients import logger

from toolkit.utils import utils
from toolkit.utils.utils import rp_print
from toolkit.utils.llm import main as utils_llm

import context.instances as inst
import context.consts as const
import context.settings.main as settings_main

from toolkit.llm.langchain.core import integration, utils as utils_lc
from toolkit.llm.langchain.data.indexing import (
    documents, document_loaders, text_splitters,
)
from toolkit.llm.langchain.data.persistence import retrievers
from toolkit.llm.langchain.execution import (
    runnables, graphs, tools, agents, tools as tools_lc
)
from toolkit.llm.langchain.models import (
    prompts as prompts_lc, llms, messages,
)

[32m2025-02-04 14:47:57.049[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m68[0m - [1mapps directory: /Users/thung/Documents/Me/Coding/Embedded-AI/apps[0m
[32m2025-02-04 14:47:57.049[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m69[0m - [1mToolkit path: /Users/thung/Documents/Me/Coding/Embedded-AI/apps/toolkit[0m
[32m2025-02-04 14:47:57.049[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m70[0m - [1mEnvironment files loaded:[0m
[32m2025-02-04 14:47:57.050[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m72[0m - [1m  - /Users/thung/Documents/Me/Coding/Embedded-AI/apps/ports.env[0m
[32m2025-02-04 14:47:57.050[0m | [1mINFO    [0m | [36mpackages[0m:[36m<module>[0m:[36m72[0m - [1m  - /Users/thung/Documents/Me/Coding/Embedded-AI/apps/.env[0m
  from .autonotebook import tqdm as notebook_tqdm
[32m2025-02-04 14:47:58.195[0m | [1mINFO    [0m | [36mtoolkit.db.mongodb[0m:[36mconnect[0m:[36

In [None]:
# vector_store = inst.vector_store_in_memory
vector_stores = inst.vector_stores_qdrant

COLLS = settings_main.VEC_STR_COLLS

llm = inst.llm_main
embedding = inst.embedding_main

prompts = prompts_lc.prompts
# prompt_system_rag = prompt_system_rag.replace("{context}", docs_content)


# Tutorials


## Get started


### Chat models and prompts


### Semantic search


### Classification


### Extraction


In [None]:
class Person(t.BaseModel):
	"""Information about a person."""

	# ^ Doc-string for the entity Person.
	# This doc-string is sent to the LLM as the description of the schema Person,
	# and it can help to improve extraction results.

	# Note that:
	# 1. Each field is an `optional` -- this allows the model to decline to extract it!
	# 2. Each field has a `description` -- this description is used by the LLM.
	# Having a good description can help improve extraction results.
	name: t.Optional[str] = t.Field(
	default=None, description="The name of the person"
	)
	hair_color: t.Optional[str] = t.Field(
		default=None, description="The color of the person's hair if known"
	)
	height_in_meters: t.Optional[str] = t.Field(
		default=None, description="Height measured in meters"
	)

class Group(t.BaseModel):
  # Creates a model so that we can extract multiple entities.
	people: t.List[Person]

prompt_tpl = prompts_lc.ChatPromptTemplate.from_messages([
	(
		"system",
		"You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.",
	),
	(
   	"human", "{user_input}"
  ),
])

llm_structured = inst.llm_main.with_structured_output(schema=Group)

user_input = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
prompt = prompt_tpl.invoke({"user_input": user_input})
response = llm_structured.invoke(prompt)

rp_print(response)

## Orchestration


### Chatbots


### Agents


### Retrieval Augmented Generation (RAG)


### Question-Answering with SQL


### Summarization


### Question-Answering with Graph Databases


## LangSmith


# How-to guides


## Installation


### install LangChain packages


### use LangChain with different Pydantic versions


## Key features


### return structured data from a model


### use a model to call tools


### stream runnables


### debug your LLM apps


## LangChain Expression Language (LCEL)


### chain runnables


### stream runnables


### invoke runnables in parallel


### add default invocation args to runnables


### turn any function into a runnable


### pass through inputs from one chain step to the next


### configure runnable behavior at runtime


### add message history (memory) to a chain


### route between sub-chains


### create a dynamic (self-constructing) chain


### inspect runnables


### add fallbacks to a runnable


### pass runtime secrets to a runnable


## Components


### Prompt templates


#### use few shot examples


#### use few shot examples in chat models


#### partially format prompt templates


#### compose prompts together


### Example selectors


#### use example selectors


#### select examples by length


#### select examples by semantic similarity


In [17]:
example_prompt_tpl = prompts_lc.PromptTemplate(
	input_variables=["input", "output"],
	template="Input: {input}\nOutput: {output}",
)

examples = [
	{"input": "happy", "output": "sad"},
	{"input": "tall", "output": "short"},
	{"input": "energetic", "output": "lethargic"},
	{"input": "sunny", "output": "gloomy"},
	{"input": "windy", "output": "calm"},
]

example_selector = prompts_lc.SemanticSimilarityExampleSelector.from_examples(
	examples=examples,
	embeddings=inst.embedding_main,
	vectorstore_cls=inst.vector_store_in_memory,
	k=2,
)

prompt_tpl = prompts_lc.FewShotPromptTemplate(
	example_selector=example_selector,
	example_prompt=example_prompt_tpl,
	prefix="Give the antonym of every input",
	suffix="Input: {input}\nOutput:",
	input_variables=["input"]
)


In [14]:

print(prompt_tpl.format(input="worried"))
# result = inst.llm_main.invoke(prompt_tpl.invoke({"input": "enthusiastic"}))
# rp_print(result.content)

Give the antonym of every input

Input: happy
Output: sad

Input: energetic
Output: lethargic

Input: worried
Output:


#### select examples by semantic ngram overlap


#### select examples by maximal marginal relevance


#### select examples from LangSmith few-shot datasets


### Chat models


#### do function/tool calling


#### get models to return structured output


#### cache model responses


#### get log probabilities


#### create a custom chat model class


#### stream a response back


#### track token usage


#### track response metadata across providers


#### use chat model to call tools


#### stream tool calls


#### handle rate limits


#### few shot prompt tool behavior


#### bind model-specific formatted tools


#### force a specific tool call


#### work with local models


#### init any model in one line


### Messages


#### trim messages


#### filter messages


#### merge consecutive messages of the same type


### LLMs


#### cache model responses


#### create a custom LLM class


#### stream a response back


#### track token usage


#### work with local models


### Output parsers


#### parse text from message objects


#### use output parsers to parse an LLM response into structured format


#### parse JSON output


#### parse XML output


#### parse YAML output


#### retry when output parsing errors occur


#### try to fix errors in output parsing


#### write a custom output parser class


### Document loaders


#### load PDF files


#### load web pages


#### load CSV data


#### load data from a directory


#### load HTML data


#### load JSON data


#### load Markdown data


#### load Microsoft Office data


#### write a custom document loader


### Text splitters


#### recursively split text


#### split HTML


#### split by character


#### split code


#### split Markdown by headers


#### recursively split JSON


#### split text into semantic chunks


#### split by tokens


### Embedding models


#### embed text data


#### cache embedding results


#### create a custom embeddings class


### Vector stores


#### use a vector store to retrieve data


### Retrievers


#### use a vector store to retrieve data


#### generate multiple queries to retrieve data for


#### use contextual compression to compress the data retrieved


#### write a custom retriever class


#### add similarity scores to retriever results


#### combine the results from multiple retrievers


#### reorder retrieved results to mitigate the "lost in the middle" effect


#### generate multiple embeddings per document


#### retrieve the whole document for a chunk


#### generate metadata filters


#### create a time-weighted retriever


#### use hybrid vector and keyword retrieval


### Indexing


#### reindex data to keep your vectorstore in-sync with the underlying data source


### Tools


#### create tools


#### use built-in tools and toolkits


#### use chat models to call tools


#### pass tool outputs to chat models


#### pass run time values to tools


#### add a human-in-the-loop for tools


#### handle tool errors


#### force models to call a tool


#### disable parallel tool calling


#### access the RunnableConfig from a tool


#### stream events from a tool


#### return artifacts from a tool


#### convert Runnables to tools


#### add ad-hoc tool calling capability to models


#### pass in runtime secrets


### Multimodal


#### pass multimodal data directly to models


#### use multimodal prompts


### Agents


#### use legacy LangChain Agents (AgentExecutor)


#### migrate from legacy LangChain agents to LangGraph


### Callbacks


#### pass in callbacks at runtime


#### attach callbacks to a module


#### pass callbacks into a module constructor


#### create custom callback handlers


#### use callbacks in async environments


#### dispatch custom callback events


### Custom


#### create a custom chat model class


#### create a custom LLM class


#### create a custom embeddings class


#### write a custom retriever class


#### write a custom document loader


#### write a custom output parser class


#### create custom callback handlers


#### define a custom tool


#### dispatch custom callback events


### Serialization


#### save and load LangChain objects


## Use cases


### Q&A with RAG


#### add chat history


#### stream


#### return sources


#### return citations


#### do per-user retrieval


### Extraction


#### use reference examples


#### handle long text


#### do extraction without using function calling


### Chatbots


#### manage memory


#### do retrieval


#### use tools


#### manage large chat history


### Query analysis


#### add examples to the prompt


#### handle cases where no queries are generated


#### handle multiple queries


#### handle multiple retrievers


#### construct filters


#### deal with high cardinality categorical variables


### Q&A over SQL + CSV


#### use prompting to improve results


#### do query validation


#### deal with large databases


#### deal with CSV files


### Q&A over graph databases


#### add a semantic layer over the database


#### construct knowledge graphs


### Summarization


#### summarize text in a single LLM call


#### summarize text through parallelization


#### summarize text through iterative refinement


## LangSmith


# Conceptual guide


## High level


### Why LangChain?


### Architecture


## Concepts


### Chat models


### Messages


### Chat history


### Tools


### Tool calling


### Structured output


### Memory


### Multimodality


### Runnable interface


### Streaming


### LangChain Expression Language (LCEL)


### Document loaders


### Retrieval


### Text splitters


### Embedding models


### Vector stores


### Retriever


### Retrieval Augmented Generation (RAG)


### Agents


### Prompt templates


### Output parsers


### Few-shot prompting


### Example selectors


### Async programming


### Callbacks


### Tracing


### Evaluation


### Testing


## Glossary

# Ref

- Tutorials
	- Get started
		- Chat models and prompts
		- Semantic search
		- Classification
		- [Extraction](https://python.langchain.com/docs/tutorials/extraction/)
  		- [How to use reference examples when doing extraction](https://python.langchain.com/docs/how_to/extraction_examples/) 🚧
	- Orchestration
		- Chatbots
		- Agents
		- Retrieval Augmented Generation (RAG)
		- Question-Answering with SQL
		- Summarization
		- Question-Answering with Graph Databases
	- LangSmith
- How-to guides
	- Installation
		- install LangChain packages
		- use LangChain with different Pydantic versions
	- Key features
		- [return structured data from a model](https://python.langchain.com/docs/how_to/structured_output/) 🚧
		- use a model to call tools
		- stream runnables
		- debug your LLM apps
	- LangChain Expression Language (LCEL)
		- chain runnables
		- stream runnables
		- invoke runnables in parallel
		- add default invocation args to runnables
		- turn any function into a runnable
		- pass through inputs from one chain step to the next
		- configure runnable behavior at runtime
		- add message history (memory) to a chain
		- route between sub-chains
		- create a dynamic (self-constructing) chain
		- inspect runnables
		- add fallbacks to a runnable
		- pass runtime secrets to a runnable
	- Components
		- Prompt templates
			- use few shot examples
			- use few shot examples in chat models
			- partially format prompt templates
			- compose prompts together
		- Example selectors
			- [use example selectors](https://python.langchain.com/docs/how_to/example_selectors/)
			- select examples by length
			- [select examples by semantic similarity](https://python.langchain.com/docs/how_to/example_selectors_similarity/)
			- select examples by semantic ngram overlap
			- select examples by maximal marginal relevance
			- select examples from LangSmith few-shot datasets
		- Chat models
			- do function/tool calling
			- [get models to return structured output](https://python.langchain.com/docs/how_to/structured_output/) 🚧
			- cache model responses
			- get log probabilities
			- create a custom chat model class
			- stream a response back
			- track token usage
			- track response metadata across providers
			- use chat model to call tools
			- stream tool calls
			- handle rate limits
			- few shot prompt tool behavior
			- bind model-specific formatted tools
			- force a specific tool call
			- work with local models
			- init any model in one line
		- Messages
			- trim messages
			- filter messages
			- merge consecutive messages of the same type
		- LLMs
			- cache model responses
			- create a custom LLM class
			- stream a response back
			- track token usage
			- work with local models
		- Output parsers
			- parse text from message objects
			- [use output parsers to parse an LLM response into structured format](https://python.langchain.com/docs/how_to/output_parser_structured/) 🚧
			- parse JSON output
			- parse XML output
			- parse YAML output
			- retry when output parsing errors occur
			- try to fix errors in output parsing
			- write a custom output parser class
		- Document loaders
			- load PDF files
			- load web pages
			- load CSV data
			- load data from a directory
			- load HTML data
			- load JSON data
			- load Markdown data
			- load Microsoft Office data
			- write a custom document loader
		- Text splitters
			- recursively split text
			- split HTML
			- split by character
			- split code
			- split Markdown by headers
			- recursively split JSON
			- split text into semantic chunks
			- split by tokens
		- Embedding models
			- embed text data
			- cache embedding results
			- create a custom embeddings class
		- Vector stores
			- use a vector store to retrieve data
		- Retrievers
			- use a vector store to retrieve data
			- generate multiple queries to retrieve data for
			- use contextual compression to compress the data retrieved
			- write a custom retriever class
			- add similarity scores to retriever results
			- combine the results from multiple retrievers
			- reorder retrieved results to mitigate the "lost in the middle" effect
			- generate multiple embeddings per document
			- retrieve the whole document for a chunk
			- generate metadata filters
			- create a time-weighted retriever
			- use hybrid vector and keyword retrieval
		- Indexing
			- reindex data to keep your vectorstore in-sync with the underlying data source
		- Tools
			- create tools
			- use built-in tools and toolkits
			- use chat models to call tools
			- pass tool outputs to chat models
			- pass run time values to tools
			- add a human-in-the-loop for tools
			- handle tool errors
			- force models to call a tool
			- disable parallel tool calling
			- access the RunnableConfig from a tool
			- stream events from a tool
			- return artifacts from a tool
			- convert Runnables to tools
			- add ad-hoc tool calling capability to models
			- pass in runtime secrets
		- Multimodal
			- pass multimodal data directly to models
			- use multimodal prompts
		- Agents
			- use legacy LangChain Agents (AgentExecutor)
			- migrate from legacy LangChain agents to LangGraph
		- Callbacks
			- pass in callbacks at runtime
			- attach callbacks to a module
			- pass callbacks into a module constructor
			- create custom callback handlers
			- use callbacks in async environments
			- dispatch custom callback events
		- Custom
			- create a custom chat model class
			- create a custom LLM class
			- create a custom embeddings class
			- write a custom retriever class
			- write a custom document loader
			- write a custom output parser class
			- create custom callback handlers
			- define a custom tool
			- dispatch custom callback events
		- Serialization
			- save and load LangChain objects
	- Use cases
		- Q&A with RAG
			- add chat history
			- stream
			- return sources
			- return citations
			- do per-user retrieval
		- Extraction
			- [use reference examples](https://python.langchain.com/docs/how_to/extraction_examples/) 🚧
			- [handle long text](https://python.langchain.com/docs/how_to/extraction_long_text/) 🚧
			- [do extraction without using function calling](https://python.langchain.com/docs/how_to/extraction_parse/) 🚧
		- Chatbots
			- manage memory
			- do retrieval
			- use tools
			- manage large chat history
		- Query analysis
			- add examples to the prompt
			- handle cases where no queries are generated
			- handle multiple queries
			- handle multiple retrievers
			- construct filters
			- deal with high cardinality categorical variables
		- Q&A over SQL + CSV
			- use prompting to improve results
			- do query validation
			- deal with large databases
			- deal with CSV files
		- Q&A over graph databases
			- add a semantic layer over the database
			- construct knowledge graphs
		- Summarization
			- summarize text in a single LLM call
			- summarize text through parallelization
			- summarize text through iterative refinement
	- LangSmith
- Conceptual guide
	- High level
		- Why LangChain?
		- Architecture
	- Concepts
		- Chat models
		- Messages
		- Chat history
		- Tools
		- Tool calling
		- Structured output
		- Memory
		- Multimodality
		- Runnable interface
		- Streaming
		- LangChain Expression Language (LCEL)
		- Document loaders
		- Retrieval
		- Text splitters
		- Embedding models
		- Vector stores
		- Retriever
		- Retrieval Augmented Generation (RAG)
		- Agents
		- Prompt templates
		- Output parsers
		- Few-shot prompting
		- Example selectors
		- Async programming
		- Callbacks
		- Tracing
		- Evaluation
		- Testing
	- Glossary