In [1]:
from config import set_environment
set_environment()


def pretty_print_docs(docs):
  print(
    f"\n{'-' * 100}\n".join(
      [f"Document {i+1}:\n\n" +
        d.page_content for i, d in enumerate(docs)]
  )
)

# Quickstart


## LangSmith


In [None]:
# export LANGCHAIN_TRACING_V2="true"
# export LANGCHAIN_API_KEY="..."

## Building with LangChain


## LLM Chain


### OpenAI

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI()

prompt = ChatPromptTemplate.from_messages([
  ('system', 'You are world class technical documentation writer.'),
  ('user', '{input}')
])

output_parser = StrOutputParser()

chain = prompt | llm | output_parser

result = chain.invoke({'input': 'how can langsmith help with testing?'})
print(result)

### Local

## Retrieval Chain


### OpenAI

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.faiss import FAISS
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

llm = ChatOpenAI()

# To index data, load it using WebBaseLoader.
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

# To create a vectorstore, need an embedding model and a vectorstore. 
# Use embedding model to ingest documents into a vectorstore.
embeddings = OpenAIEmbeddings()

# Build the index.
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
# Simple local vectorstore, FAISS
vector = FAISS.from_documents(documents, embeddings)
# Data is indexed in the vectorstore

# Create a retrieval chain that takes an incoming question, looks up relevant 
# documents, and passes them along with the original question into an LLM to 
# answer the question.
# The retrieval method should consider the entire input history
# The final LLM chain should take into account the entire history
template = """
Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}
"""
prompt = ChatPromptTemplate.from_template(template)
document_chain = create_stuff_documents_chain(llm, prompt)
retriever = vector.as_retriever()

# Chain that takes in recent input and conversation history, 
# using an LLM to generate a search query.
retrieval_chain = create_retrieval_chain(retriever, document_chain)

# Invoke the chain, which returns a dictionary with the response from the LLM 
# in the answer key.
response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})
print(response["answer"])

## Conversation Retrieval Chain


### OpenAI

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.faiss import FAISS
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever

llm = ChatOpenAI()

# Index data using WebBaseLoader.
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

# Ingest documents into the vectorstore using the embedding model.
embeddings = OpenAIEmbeddings()

# Build the index.
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
# Simple local vectorstore, FAISS
vector = FAISS.from_documents(documents, embeddings)
retriever = vector.as_retriever()
# Data is indexed in the vectorstore

# Create a retrieval chain for an incoming question, lookup relevant documents,
# and pass them, along with the original question, to an LLM for answering.
# The retriever and LLM considers the entire input history,
# The prompt is passed to an LLM to generate a search query.
prompt = ChatPromptTemplate.from_messages([
  ("system", "Answer the user's questions based on the below context:\n\n{context}"),
  MessagesPlaceholder(variable_name="chat_history"),
  ("user", "{input}"),
])
document_chain = create_stuff_documents_chain(llm, prompt)

# Chain: recent input + conversation history -> LLM -> generate search query
retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)

# Test by passing a follow-up question from the user.
chat_history = [
  HumanMessage(content="Can LangSmith help test my LLM applications?"),
  AIMessage(content="Yes!")
]
retrieval_chain.invoke({
  "chat_history": chat_history,
  "input": "Tell me how"
})

## Agent


In [None]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import (create_openai_functions_agent,
                              AgentExecutor)
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WebBaseLoader

# Index data using WebBaseLoader.
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

embeddings = OpenAIEmbeddings()  # Ingest documents into the vectorstore

# Build index
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)  # Local vectorstore
retriever = vector.as_retriever()
# Data is indexed in the vectorstore

"""
Create an agent using OpenAI models to determine its steps.

Determine necessary tools for agent. For example, it will have access to:
1. The retriever created for answering questions about LangSmith
2. A search tool (Tavily) for providing up-to-date information.
"""

retriever_tool = create_retriever_tool(
  retriever=retriever,
  name="langsmith_search",
  description=("Search for information about LangSmith. For any questions "
               "about LangSmith, you must use this tool!"),
)
# https://python.langchain.com/docs/integrations/retrievers/tavily
search_tool = TavilySearchResults()
tools = [retriever_tool, search_tool]

# Use tools to get a predefined prompt
prompt = hub.pull("hwchase17/openai-functions-agent")
llm = ChatOpenAI(temperature=0)
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

agent_executor.invoke({"input": "how can langsmith help with testing?"})

## Serving with LangServe


In [None]:
# LangServe allows developers to deploy LangChain chains as a REST API.

### Server


In [None]:
"""
serve.py contains server logic for serving the application. 
It includes the definition of the chain, FastAPI app, and a route for serving 
the chain (langserve.add_routes). 
"""



### Playground


### Client

# LangChain Expression Language


## Get started


### prompt + model + output parser

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Create a chain that generates a joke from a topic.
prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
model = ChatOpenAI()
output_parser = StrOutputParser()

"""
Utilize LCEL to combine multiple components into one chain.

The | symbol works like a unix pipe operator, connecting components to feed 
output as input for the next.

The user input flows through the prompt template, model, and output parser in 
this chain.
"""
chain = prompt | model | output_parser

chain.invoke({"topic": "ice cream"})

In [None]:
"""
The prompt is a BasePromptTemplate that takes in a dictionary of template 
variables to produce a PromptValue, a wrapper around a completed prompt that can
be used by LLM/ChatModel. It can work with both language model 
types, as it has the ability to produce both BaseMessages and strings.
"""

prompt_value = prompt.invoke({"topic": "ice cream"})
print(prompt_value.__str__)
print(prompt_value.to_messages())
print(prompt_value.to_string())

In [None]:
# The PromptValue is passed to the ChatModel, which outputs a BaseMessage.
message = model.invoke(prompt_value)
print(message.__str__)

# Model outputs a string if it were an LLM.
from langchain_openai.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo-instruct")
llm.invoke(prompt_value)

In [None]:
# We pass the model output to the output_parser, a BaseOutputParser that takes a 
# string/BaseMessage as input. The StrOutputParser converts any input to a string.
output_parser.invoke(message)

### RAG Search Example

In [None]:
# Retrieval-augmented generation chain for added context in question responses.
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings

# query -> inmemory store -> retrieved documents
# runnable component can be used alone or in conjunction with other components.
vectorstore = DocArrayInMemorySearch.from_texts(
  ["harrison worked at kensho", "bears like to eat honey"],
  embedding=OpenAIEmbeddings()
)
# Retrieve documents and include them in the context
retriever = vectorstore.as_retriever()

# The prompt template takes in context and question as values to be 
# substituted in the prompt. 
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()
output_parser = StrOutputParser()

# Prepare expected inputs by using retrieved document entries and original user
# question, using the retriever for document search, and RunnablePassthrough to
# pass the user's question.
setup_and_retrieval = RunnableParallel(
  {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("where did harrison work?")

In [None]:
retriever.invoke("where did harrison work?")

## Why use LCEL


In [None]:
from langchain_community.chat_models import ChatAnthropic
from langchain_openai import ChatOpenAI, OpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, ConfigurableField

prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
output_parser = StrOutputParser()

chat_openai = ChatOpenAI(model="gpt-3.5-turbo")
openai = OpenAI(model="gpt-3.5-turbo-instruct")
# anthropic = ChatAnthropic(model="claude-2")

model = (
  chat_openai
  .configurable_alternatives(
    ConfigurableField(id="model"),
    default_key="chat_openai",
    openai=openai,
    # anthropic=anthropic,
  )
  .with_fallbacks([openai])
)

chain = (
  {"topic": RunnablePassthrough()}
  | prompt
  | chat_openai
  | output_parser
)

configurable_chain = (
  {"topic": RunnablePassthrough()}
  | prompt
  | model
  | output_parser
)

llm_chain = (
    {"topic": RunnablePassthrough()}
    | prompt
    | openai
    | output_parser
)

# anthropic_chain = (
#     {"topic": RunnablePassthrough()}
#     | prompt
#     | anthropic
#     | output_parser
# )

fallback_chain = chain.with_fallbacks([llm_chain])

In [None]:
# INVOKE
# Input a topic and receive a joke
print(chain.invoke("ice cream"))

In [None]:
# STREAM
for chunk in chain.stream("ice cream"):
  print(chunk, end="", flush=True)

In [None]:
# BATCH
print(chain.batch(["ice cream", "spaghetti", "dumplings"]))


In [None]:
# ASYNC
print(chain.ainvoke("ice cream"))

In [None]:
configurable_chain.invoke(
  input="ice cream",
  config={"model": "openai"}
)

In [None]:
stream = configurable_chain.stream(
    input="ice cream",
    config={"model": "openai"}
)
for chunk in stream:
  print(chunk, end="", flush=True)

In [None]:
print(configurable_chain.batch(["ice cream", "spaghetti", "dumplings"]))

In [None]:
# LLM instead of chat model
llm_chain.invoke("ice cream")

In [None]:
# Different model provider
# anthropic_chain.invoke("ice cream")

## Interface


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

model = ChatOpenAI()
prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}")
chain = prompt | model

#### Input/Output Schema


In [None]:
"""
Description of inputs/outputs accepted by a Runnable Pydantic model dynamically 
generated from any Runnable's structure. 
Call .schema() to obtain a JSONSchema representation.
"""

# The input schema of the chain is the input schema of its first part, the prompt.
print(chain.input_schema.schema())
print(prompt.input_schema.schema())
print(model.input_schema.schema())
print()

# The output schema of the chain is the output schema of its last part, 
# in this case a ChatModel, which outputs a ChatMessage
print(chain.output_schema.schema())
print(prompt.output_schema.schema())
print(model.output_schema.schema())
print()

#### Stream, Invoke, Batch


In [None]:
for s in chain.stream({"topic": "bears"}):
  print(s.content, end="", flush=True)

print(chain.invoke({"topic": "bears"}))

print(chain.batch([{"topic": "bears"}, {"topic": "cats"}])

#### Async Stream, Invoke, Batch


In [None]:
async for s in chain.astream({"topic": "bears"}):
    print(s.content, end="", flush=True)

await chain.ainvoke({"topic": "bears"})

await chain.abatch([{"topic": "bears"}])

#### Async Stream Intermediate Steps


##### Streaming JSONPatch chunks


In [None]:
from typing import TypedDict, List, Dict, Any, Optional

class LogEntry(TypedDict):
    id: str
    """ID of the sub-run."""
    name: str
    """Name of the object being run."""
    type: str
    """Type of the object being run, eg. prompt, chain, llm, etc."""
    tags: List[str]
    """List of tags for the run."""
    metadata: Dict[str, Any]
    """Key-value pairs of metadata for the run."""
    start_time: str
    """ISO-8601 timestamp of when the run started."""

    streamed_output_str: List[str]
    """List of LLM tokens streamed by this run, if applicable."""
    final_output: Optional[Any]
    """Final output of this run.
    Only available after the run has finished successfully."""
    end_time: Optional[str]
    """ISO-8601 timestamp of when the run ended.
    Only available after the run has finished."""


class RunState(TypedDict):
    id: str
    """ID of the run."""
    streamed_output: List[Any]
    """List of output chunks streamed by Runnable.stream()"""
    final_output: Optional[Any]
    """Final output of the run, usually the result of aggregating (`+`) streamed_output.
    Only available after the run has finished successfully."""

    logs: Dict[str, LogEntry]
    """Map of run names to sub-runs. If filters were supplied, this list will
    contain only the runs that matched the filters."""


# This is useful for streaming JSONPatch through an HTTP server and applying
# the ops on the client to rebuild the run state.
async for chunk in retrieval_chain.astream_log(
    "where did harrison work?", include_names=["Docs"]
):
    print("-" * 40)
    print(chunk)

##### Streaming the incremental RunState


In [None]:
# Passing diff=False will give incremental values of RunState. More 
# verbose output is produced when there are repetitive parts.
async for chunk in retrieval_chain.astream_log(
    "where did harrison work?", include_names=["Docs"], diff=False
):
    print("-" * 70)
    print(chunk)

#### Parallelism


In [None]:
# LECL supports parallel requests through the use of RunnableParallel. 
# This executes every element in parallel.
import time
from langchain_core.runnables import RunnableParallel
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

model = ChatOpenAI()

template1 = "tell me a joke about {topic}"
chain1 = ChatPromptTemplate.from_template(template1) | model
template2 = "write a short (2 line) poem about {topic}"
chain2 = ChatPromptTemplate.from_template(template2) | model
combined = RunnableParallel(joke=chain1, poem=chain2)

In [None]:
%%time
chain1.invoke({"topic": "bears"})

In [None]:
%%time
chain2.invoke({"topic": "bears"})

In [None]:
%%time
combined.invoke({"topic": "bears"})

##### Parallelism on batches

In [None]:
%%time
combined.batch([{"topic": "bears"}, {"topic": "cats"}])

## Streaming


## How to


## Cookbook


# Modules


## Model I/O


### Quickstart


#### Models


In [None]:
from langchain_openai import ChatOpenAI, OpenAI
from langchain.schema import HumanMessage

llm = OpenAI()
chat_model = ChatOpenAI()
# Configurations for specific models, initialized with parameters like temperature.
# LLM objects take strings as input and output strings,
# ChatModel objects take a list of messages as input and output a message.
  
text = "What would be a good company name for a company that makes colorful socks?"
messages = [HumanMessage(content=text)]
print(f"LLM: {llm.invoke(messages)}")
print(f"Chat Model: {chat_model.invoke(messages)}")

#### Prompt Templates


In [None]:
"""
LLM applications don't input user data directly. 
They incorporate it into a prompt template, which gives more context for the task. 

Example, our application would simply require the user provide a description of
the company/product, without having to give specific instructions to the model.

PromptTemplates simplify the process of converting user input into a properly
formatted prompt. 

They allow to selectively format variables, combine templates, and create a 
single prompt.
"""
from langchain.prompts import PromptTemplate

template = "What would be a good company name for a company that makes {product}?"
prompt = PromptTemplate.from_template(template)
prompt.format(product="colorful socks")

In [None]:
"""
Generate a list of messages, including information about their content, role, 
and position in the list.

ChatPromptTemplate is a list of ChatMessageTemplates, each specifying the 
formatting instructions for a ChatMessage. 

The ChatMessageTemplate includes the message's role and content. 
"""
from langchain.prompts.chat import ChatPromptTemplate

system_template = ("You are helpful assistant that translate {input_language} "
                   "to {output_language}")
human_template = "{text}"
chat_prompt = ChatPromptTemplate.from_messages([
  ("system", system_template),
  ("human", human_template)
])

chat_prompt.format_messages(input_language="English",
                            output_language="French",
                            text="I love programming")


#### Output parsers


In [None]:
# Transform language model raw output into usable formats. 
# Types include: converting LLM text into JSON, turning a ChatMessage into a 
# string, and converting additional information from a call into a string.

# Parser for comma separated values.
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
output_parser.parse("hi, bye")

#### Composing with LCEL


In [None]:
# Combine steps into one chain. 
# The chain will take input variables, pass them to a prompt template, create 
# a prompt, run it through a language model, the output will be passed through
# an output parser. 
template = "Generate a list of 5 {text}.\n\n{format_instructions}"

chat_prompt = ChatPromptTemplate.from_template(template)
chat_prompt = chat_prompt.partial(format_instructions=output_parser.get_format_instructions())
# | syntax to join components.
chain = chat_prompt | chat_model | output_parser
chain.invoke({"text": "colors"})

### Prompts


#### Quick Start



##### PromptTemplate


In [None]:
# Use PromptTemplate to create a string prompt template. 
# PromptTemplate uses Python's str.format syntax by default.
# The template allows for unlimited variables, even without any.
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
  "Tell me a {adjective} joke about {content}."
)
prompt_template.format(adjective="funny", content="chickens")

##### ChatPromptTemplate


In [None]:
# The chat prompt consists of a list of chat messages
# each: [content, role] parameter.
# In the OpenAI Chat Completions API, a message can be linked to 
# an AI assistant, a human, or a system role.
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage
from langchain.prompts import HumanMessagePromptTemplate
from langchain_openai import ChatOpenAI


# ChatPromptTemplate.from_messages accepts various message representations. 
# These include the 2-tuple format of (type, content), 
chat_template1 = ChatPromptTemplate.from_messages([
  ("system", "You are helpful AI bot. Your name is {name}"),
  ("human", "Hello, how are you doing?"),
  ("ai", "I'm doing well, thanks!"),
  ("human", "{user_input}"),
])

messages1 = chat_template1.format_messages(name="Bob", 
                                         user_input="What is your name?")
print(messages1)

# or instances of MessagePromptTemplate or BaseMessage.
chat_template2 = ChatPromptTemplate.from_messages([
  SystemMessage(content=(
    "You are a helpful assistant that re-writes the user's text to sound more "
    "upbeat."
  )),
  HumanMessagePromptTemplate.from_template("{text}"),
])
messages2 = chat_template2.format_messages(text="I don't like eating tasting things.")
print(messages2)

##### LCEL

In [None]:
# PromptTemplate, ChatPromptTemplate are Runnable interfaces, LCEL's core
# Support (a)invoke, (a)stream, (a)batch, and astream_log calls. 
# dictionary of prompt variables -> PromptTemplate -> StringPromptValue
# dictionary -> ChatPromptTemplate -> ChatPromptValue.
prompt_val = prompt_template.invoke({"adjective":"funny", "content": "chickens"})
print(prompt_val.__str__)
print(prompt_val.to_string().__str__())
print(prompt_val.to_messages(), "\n")

chat_val = chat_template2.invoke({"text": "i dont like eating tasty things."})
print(chat_val.to_string())
print(chat_val.to_messages())

#### Composition



##### String prompt composition


In [None]:
# Each template is joined together
# Can work with prompts or strings (with the first element being a prompt)
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI

prompt = (
  PromptTemplate.from_template("Tell me a joke about {topic}")
  + ", make it funny"
  + "\n\n and in {language}"
)
print(prompt.__str__, "\n")
print(prompt.format(topic="sports", language="spanish"), "\n")

model = ChatOpenAI()
chain = LLMChain(llm=model,  prompt=prompt)
chain.run(topic="sports", language="spanish")

##### Chat prompt composition

In [None]:
# A chat prompt is a list of messages. 
# Each element represents a new message within the completed prompt.
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI

prompt = SystemMessage(content="You are a nice pirate")

# Create a pipeline by combining the message with other messages (templates). 
# Use a Message when there are no variables to format
# Use a MessageTemplate when variables are present. 
# A string can be used (inferred as a HumanMessagePromptTemplate).
new_prompt = (
  prompt + HumanMessage(content="hi") + AIMessage(content="what?") + "{input}"
)
new_prompt.format_messages(input="i said hi")

model = ChatOpenAI()
chain = LLMChain(llm=model, prompt=new_prompt)
chain.run("i said hi")

#### Example Selector Types



##### Select by length


In [None]:
"""
Manages examples based on length, ensuring prompt construction within the 
context window limit. It adjusts the number of selected examples: fewer for 
longer inputs and more for shorter ones.
"""

from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector

# Examples of a pretend task of creating antonyms.
examples = [
  {"input": "happy", "output": "sad"},
  {"input": "tall", "output": "short"},
  {"input": "energetic", "output": "lethargic"},
  {"input": "sunny", "output": "gloomy"},
  {"input": "windy", "output": "calm"},
]
example_prompt = PromptTemplate(
  input_variables=["input", "output"],
  template="Input: {input}\nOutput: {output}"
)
example_selector = LengthBasedExampleSelector(
  # The examples it has available to choose from.
  examples=examples,
  # The PromptTemplate being used to format the examples.
  example_prompt=example_prompt,
  # The maximum length that the formatted examples should be.
  # Length is measured by the get_text_length function below.
  max_length=25,
  # The function used to get the length of a string, which is used
  # to determine which examples to include. It is commented out because
  # it is provided as a default value if none is specified.
  # get_text_length: Callable[[str], int] = lambda x: len(re.split("\n| ", x))
)
dynamic_prompt = FewShotPromptTemplate(
  example_selector=example_selector,
  example_prompt=example_prompt,
  prefix="Give me the antonym of every input",
  suffix="Input: {adjective}\nOutput:",
  input_variables=["adjective"]
)

# An example with small input, so it selects all examples.
print(dynamic_prompt.format(adjective="big"))

In [None]:
# An example with long input, so it selects only one example.
long_string = ("big and huge and massive and large and gigantic and tall and "
               "much much much much much bigger than everything else")
print(dynamic_prompt.format(adjective=long_string))

In [None]:
# You can add an example to an example selector as well.
new_example = {"input": "big", "output": "small"}
dynamic_prompt.example_selector.add_example(new_example)
print(dynamic_prompt.format(adjective="enthusiastic"))


##### Select by maximal marginal relevance (MMR)


In [None]:
"""
Combines similarity to inputs and diversity. It selects examples with embeddings
having the highest cosine similarity to inputs, adding them iteratively while 
penalizing closeness to already selected examples.
"""

from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts.example_selector import (
  MaxMarginalRelevanceExampleSelector, SemanticSimilarityExampleSelector
)
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# Examples of a pretend task of creating antonyms.
examples = [
  {"input": "happy", "output": "sad"},
  {"input": "tall", "output": "short"},
  {"input": "energetic", "output": "lethargic"},
  {"input": "sunny", "output": "gloomy"},
  {"input": "windy", "output": "calm"},
]
example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
  # The list of examples available to select from.
  examples=examples,
  # Embedding class for semantic similarity measurement.
  embeddings=OpenAIEmbeddings(),
  # VectorStore class for embedding storage and similarity search.
  vectorstore_cls=FAISS,
  # The number of examples to produce.
  k=2
)
example_prompt = PromptTemplate(
  input_variables=["input", "output"],
  template="Input: {input}\nOutput: {output}"
)
mnr_prompt = FewShotPromptTemplate(
  example_selector=example_selector,
  example_prompt=example_prompt,
  prefix="Give the antonum of every input",
  suffix="Input: {adjective}\nOutput:",
  input_variables=["adjective"]
)
print(mnr_prompt.format(adjective="worried"))


##### Select by n-gram overlap


In [None]:
"""
The NGramOverlapExampleSelector orders examples by their similarity to the input,
measured by an ngram overlap score (float between 0.0 and 1.0). 
A threshold score can be set, excluding examples with a score below or equal to 
the threshold. The default threshold is -1.0, reordering without exclusion. 
A threshold of 0.0 excludes examples with no ngram overlaps.
"""

from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts.example_selector.ngram_overlap import NGramOverlapExampleSelector

example_prompt = PromptTemplate(
  input_variables=["input", "output"],
  template="Input: {input}\nOutput: {output}"
)
# Examples of a fictional translation task.
examples = [
    {"input": "See Spot run.", "output": "Ver correr a Spot."},
    {"input": "My dog barks.", "output": "Mi perro ladra."},
    {"input": "Spot can run.", "output": "Spot puede correr."},
]
example_selector = NGramOverlapExampleSelector(
  # The examples it has available to choose from.
  examples=examples,
  # The PromptTemplate being used to format the examples.
  example_prompt=example_prompt,
  # The threshold, at which selector stops. Default -1.0
  threshold=-1.0,
  # Negative Threshold: sorts examples by ngram overlap score, including none.
  # Threshold > 1.0: excludes all examples, returning an empty list.
  # Threshold = 0.0: sorts examples by ngram overlap score,
  # excluding those with no ngram overlap with the input.
)
dynamic_prompt = FewShotPromptTemplate(
  example_selector=example_selector,
  example_prompt=example_prompt,
  prefix="Give the Spanish translation of every input",
  suffix="Input: {sentence}\nOutput:",
  input_variables=["sentence"],
)

# Example input with significant ngram overlap: "Spot can run." 
# No overlap with: "My dog barks."
print(dynamic_prompt.format(sentence="Spot can run fast."))

In [None]:
# Examples can be added to NGramOverlapExampleSelector.
new_example = {"input": "Spot plays fetch.", "output": "Spot juega a buscar."}
example_selector.add_example(new_example)
print(dynamic_prompt.format(sentence="Spot can run fast."))

In [None]:
# Set a threshold to exclude examples. 
# 0.0 excludes examples with no ngram overlaps with the input. 
# The example "My dog barks." is excluded because it has no ngram overlaps with
# "Spot can run fast."
example_selector.threshold = 0.0
print(dynamic_prompt.format(sentence="Spot can run fast."))


##### Select by similarity

In [None]:
# Selects examples based on cosine similarity of embeddings to the inputs.
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

example_prompt = PromptTemplate(
  input_variables=["input", "output"],
  template="Input: {input}\nOutput: {output}"
)

# Examples of a pretend task of creating antonyms.
examples = [
    {"input": "happy", "output": "sad"},
    {"input": "tall", "output": "short"},
    {"input": "energetic", "output": "lethargic"},
    {"input": "sunny", "output": "gloomy"},
    {"input": "windy", "output": "calm"},
]

example_selector = SemanticSimilarityExampleSelector.from_examples(
  # The list of examples available to select from.
  examples=examples,
  # Embedding class for generating semantic similarity measurements.
  embeddings=OpenAIEmbeddings(),
  # The VectorStore class stores embeddings and performs similarity searches.
  vectorstore_cls=Chroma,
  # Number of examples to generate.
  k=1,
)

similar_prompt = FewShotPromptTemplate(
  example_selector=example_selector,
  example_prompt=example_prompt,
  prefix="Give the antonym of every input",
  suffix="Input: {adjective}\nOutput:",
  input_variables=["adjective"],
)

# Input denotes emotion; choose the happy/sad example.
print(similar_prompt.format(adjective="worried"))

#### Example selectors


In [None]:
from abc import ABC, abstractmethod

# The Example Selector is responsible for choosing which examples to include in
# the prompt, especially when there are a lot of examples. 
# Its base interface is defined below:
class BaseExampleSelector(ABC):
  """Interface for selecting examples to include in prompts."""
  
  @abstractmethod
  def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
    """Select which examples to use based on the inputs."""
    
  @abstractmethod
  def add_example(self, example: Dict[str, str]) -> Any:
    """Add new examle to store."""

# LangChain requires a select_examples method, which returns a list of examples 
# based on input variables. There are different types of example selectors 
# available.

In [None]:
from langchain_core.example_selectors.base import BaseExampleSelector
from langchain_core.prompts.few_shot import FewShotPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate

# To use an example selector, create a list of examples (input-output pairs). 
# Selecting examples for translation task.
examples = [
  {"input": "hi", "output": "ciao"},
  {"input": "bye", "output": "arrivaderci"},
  {"input": "soccer", "output": "calcio"},
]

# Example selector based on word length.
class CustomExampleSelector(BaseExampleSelector):
  def __init__(self, examples):
    self.examples : List = examples
  
  def add_example(self, example):
    self.examples.append(example)
  
  def select_examples(self, input_variables):
    # Assumes input includes a 'input' key.
    new_word = input_variables["input"]
    new_word_length = len(new_word)
    
    # Initialize variables for the best match and its length difference.
    best_match = None
    smallest_diff = float("inf")
    
    # Iterate through examples.
    for example in self.examples:
      # Calculate the length difference using the first word of the example.
      current_diff = abs(len(example["input"]) - new_word_length)
      
      # Update the best match if the current one is closer in length.
      if current_diff < smallest_diff:
        smallest_diff = current_diff
        best_match = example
    
    return [best_match]

example_selector = CustomExampleSelector(examples)

print(example_selector.select_examples({"input": "okay"}))

example_selector.add_example({"input": "hand", "output": "mano"})
print(example_selector.select_examples({"input": "okay"}))

# Example selector can now be used in a prompt.
example_prompt = PromptTemplate.from_template(
  "Input: {input} -> Output: {output}"
)
prompt = FewShotPromptTemplate(
  example_selector=example_selector,
  example_prompt=example_prompt,
  suffix="Input: {input} -> Output:",
  prefix="Translate the following words from English to Italain:",
  input_variables=["input"],
)

print("-"*50)
print(prompt.format(input="word"))

#### Few-shot prompt templates


##### USING AN EXAMPLE SET

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.few_shot import FewShotPromptTemplate

# Create a few-shot prompt template from either a set of examples or an 
# Example Selector object.
# Setting up self-ask with search examples in few-shot learning.

## CREATE THE EXAMPLE SET
# Create a list of few-shot examples, each example is a dictionary with input
# variables as keys and their corresponding values.
examples = [
  {
    "question": "Who lived longer, Muhammad Ali or Alan Turing?",
    "answer": """
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali
""",
  },
  {
    "question": "When was the founder of craigslist born?",
    "answer": """
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952
""",
  },
  {
    "question": "Who was the maternal grandfather of George Washington?",
    "answer": """
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball
""",
  },
  {
    "question": "Are both the directors of Jaws and Casino Royale from the same country?",
    "answer": """
Are follow up questions needed here: Yes.
Follow up: Who is the director of Jaws?
Intermediate Answer: The director of Jaws is Steven Spielberg.
Follow up: Where is Steven Spielberg from?
Intermediate Answer: The United States.
Follow up: Who is the director of Casino Royale?
Intermediate Answer: The director of Casino Royale is Martin Campbell.
Follow up: Where is Martin Campbell from?
Intermediate Answer: New Zealand.
So the final answer is: No
""",
  },
]

## CREATE A FORMATTER FOR THE FEW-SHOT EXAMPLES
# PromptTemplate object to format few-shot examples into a string.
example_prompt = PromptTemplate(
  input_variables=["question", "answer"],
  template="Question: {question}\n{answer}"
)
# print(example_prompt.format(**examples[0]))

## FEED EXAMPLES AND FORMATTER TO FEWSHOTPROMPTEMPLATE
# FewShotPromptTemplate object takes in few-shot examples and formatter.
prompt = FewShotPromptTemplate(
  examples=examples,
  example_prompt=example_prompt,
  suffix="Question: {input}",
  input_variables=["input"],
)
# print(prompt.format(input="Who was the father of Mary Ball Washington?"))


##### USING AN EXAMPLE SELECTOR

In [None]:
## FEED EXAMPLES INTO EXAMPLESELECTOR
# `SemanticSimilarityExampleSelector` returns a subset of the most similar 
# few-shot examples for `FewShotPromptTemplate` by leveraging an embedding model 
# and a vector store to determine nearest neighbors.
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

example_selector = SemanticSimilarityExampleSelector.from_examples(
  # List of available examples to choose from.
  examples=examples,
  # Embedding class for semantic similarity measurement.
  embeddings=OpenAIEmbeddings(),
  # VectorStore class for storing embeddings and performing similarity searches.
  vectorstore_cls=Chroma,
  # Number of examples to generate.
  k=1,
)

# Choose the most similar input example.
question = "Who was the father of Mary Ball Washington?"
selected_examples = example_selector.select_examples({"question": question})
# for example in selected_examples:
#   print("\n")
#   for k, v in example.items():
#     print(f"{k}: {v}")

## FEED EXAMPLE SELECTOR INTO FEWSHOTPROMPTEMPLATE
# FewShotPromptTemplate object with an example selector and formatter for 
# few-shot examples. 
prompt = FewShotPromptTemplate(
  example_selector=example_selector,
  example_prompt=example_prompt,
  suffix="Question: {input}",
  input_variables=["input"],
)

print(prompt.format(input="Who was the father of Mary Ball Washington?"))

#### Few-shot examples for chat models



##### Fixed Examples


In [None]:
from langchain.prompts import (ChatPromptTemplate, 
                               FewShotChatMessagePromptTemplate)
from langchain_openai import ChatOpenAI

"""
The common few-shot prompting technique is to use a fixed prompt example. 
Select, evaluate a chain without worrying about additional moving parts in production.

Template's components include a list of dictionary examples and an example
prompt, which converts each example into 1 or more messages using its 
format_messages method. Example is to create a human message and an
AI message response, or a human message followed by a function call message.
"""

examples = [
  {"input": "2+2", "output": "4"},
  {"input": "2+3", "output": "5"},
]

# assemble examples into the few-shot prompt template
# Template for formatting individual examples.
example_prompt = ChatPromptTemplate.from_messages([
  ("human", "{input}"),
  ("ai", "{output}"),
])
few_shot_prompt = FewShotChatMessagePromptTemplate(
  example_prompt=example_prompt,
  examples=examples,
)
# print(few_shot_prompt.format())

# Assemble final prompt and use it with a model
final_prompt = ChatPromptTemplate.from_messages([
  ("system", "You are a wondrous wizard of math."),
  few_shot_prompt,
  ("human", "{input}"),
])

chain = final_prompt | ChatOpenAI(temperature=0.0)
chain.invoke({"input": "What's the square of a triangle?"})

##### Dynamic few-shot prompting

In [None]:
from langchain.prompts import (SemanticSimilarityExampleSelector,
        ChatPromptTemplate, FewShotChatMessagePromptTemplate)
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

"""
Conditionally choose examples based on input by utilizing an example_selector
- example_selector: selects few-shot examples and their return order. 
Implements BaseExampleSelector interface. 
Common example: SemanticSimilarityExampleSelector with vectorstore.
- example_prompt: converts each example into 1 or more messages via 
format_messages method. Common example: one human message and one AI message 
response, or a human message followed by a function call message. 

These can be combined with messages and chat templates to create the final prompt.
"""

# Using a vectorstore to select examples based on semantic similarity requires 
# initial population.
examples = [
  {"input": "2+2", "output": "4"},
  {"input": "2+3", "output": "5"},
  {"input": "2+4", "output": "6"},
  {"input": "What did the cow say to the moon?", "output": "nothing at all"},
  {
    "input": "Write me a poem about the moon",
    "output": "One for the moon, and one for me, who are we to talk about the moon?",
  },
]
to_vectorize = [" ".join(example.values()) for example in examples]
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_texts(to_vectorize, embeddings, metadatas=examples)

# Create the example_selector with a vectorstore.
# Instruct it to fetch only the top 2 examples.
example_selector = SemanticSimilarityExampleSelector(
  vectorstore=vectorstore,
  k=2
)
# Prompt template loads examples using the `select_examples` method.
# example_selector.select_examples({"input": "hourse"})

# Create the prompt template using the example_selector.
# Specify formatting for each example: 1 human message and 1 AI message.
example_prompt = ChatPromptTemplate.from_messages([
  ("human", "{input}"),
  ("ai", "{output}"),
])
few_shot_prompt = FewShotChatMessagePromptTemplate(
  # Input variables determine values for the example_selector.
  input_variables=["input"],
  example_selector=example_selector,
  example_prompt=example_prompt,
)
# print(few_shot_prompt.format(input="What's 3+3?"))

# Final prompt template assembly
final_prompt = ChatPromptTemplate.from_messages([
  ("system", "You are wondrous wizard of math."),
  few_shot_prompt,
  ("human", "{input}"),
])
# print(final_prompt.format(input="What's 3+3?"))

# Connect model to the few-shot prompt.
chain = final_prompt | ChatOpenAI(temperature=0.0)
print(chain.invoke({"input": "What's 3+3?"}))

#### Types of `MessagePromptTemplate`


In [None]:
from langchain.prompts import ChatMessagePromptTemplate

"""
LangChain offers various MessagePromptTemplate options, such as 
AIMessagePromptTemplate, SystemMessagePromptTemplate, and HumanMessagePromptTemplate,
for generating AI, system, and human messages.

In cases where the chat model supports arbitrary role chat messages, use 
ChatMessagePromptTemplate to specify the role name.
"""

template = "May the {subject} be with you"
chat_message_prompt = ChatMessagePromptTemplate.from_template(
  role="Jedi", template=template
)
chat_message_prompt.format(subject="force")

In [None]:
from langchain.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate,
                               MessagesPlaceholder)
from langchain_core.messages import AIMessage, HumanMessage

"""
LangChain's MessagesPlaceholder allows full control over message rendering. 
This is helpful when unsure of the correct role for message prompts or when 
inserting a message list during formatting.
"""

human_prompt = "Summarize our conversation so far in {word_count} words."
human_message_template = HumanMessagePromptTemplate.from_template(human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([
  MessagesPlaceholder(variable_name="conversation"),
  human_message_template
])

human_message = HumanMessage(content="What is the best way to learn programming?")
ai_message = AIMessage(
  content="""\
1. Choose a programming language: Decide on a programming language that you want to learn.

2. Start with the basics: Familiarize yourself with the basic programming concepts such as variables, data types and control structures.

3. Practice, practice, practice: The best way to learn programming is through hands-on experience\
"""
)
chat_prompt.format_prompt(
  conversation=[human_message, ai_message], word_count="10"
).to_messages()

#### Partial prompt templates


##### Partial with strings


In [None]:
from langchain.prompts import PromptTemplate

"""
Partial a prompt template when variables are received at different times. 
For instance, if a prompt template requires variables foo and baz, and the foo
value is obtained early on but the baz value is obtained later, partialing the
template with the foo value and using the partial template is a more efficient
solution.
"""

prompt = PromptTemplate.from_template("{foo}{bar}")
partial_prompt = prompt.partial(foo="foo")
print("1: ", partial_prompt.format(bar="baz"))

# Initialize the prompt with partial variables.
prompt = PromptTemplate(
  template="{foo}{bar}", input_variables=["bar"], partial_variables={"foo": "foo"}
)
print("2: ", prompt.format(bar="baz"))

##### Partial with functions

In [None]:
from datetime import datetime
from langchain.prompts import PromptTemplate

"""
When a variable needs to be consistently retrieved in a specific manner, 
such as date or time prompts. Instead of hard-coding the current date in the 
prompt or passing it with other input variables, it is convenient to utilize a 
function that always returns the current date.
"""

def _get_datetime():
  now = datetime.now()
  return now.strftime("%m/%d/%Y, %H:%M:%S")

prompt = PromptTemplate(
  template="Tell me a {adjective} joke about the day {date}",
  input_variables=["adjective"],
  partial_variables={"date": _get_datetime}
)
print(prompt.format(adjective="funny"))

#### Pipeline

In [None]:
"""
Composing prompts using a PipelinePrompt. 
It allows reusing prompt parts and comprises:
- Final prompt: The returned prompt.
- Pipeline prompts: A list of tuples with a string name and a prompt template. 
Each formatted template is passed as a variable with the corresponding name to 
future templates.
"""

from langchain.prompts.pipeline import PipelinePromptTemplate
from langchain.prompts.prompt import PromptTemplate

introduction_template = """You are impersonating {person}."""
introduction_prompt = PromptTemplate.from_template(introduction_template)

example_template = """Here's an example of an interaction:

Q: {example_q}
A: {example_a}"""
example_prompt = PromptTemplate.from_template(example_template)

start_template = """Now, do this for real!

Q: {input}
A:"""
start_prompt = PromptTemplate.from_template(start_template)

full_template = """{introduction}

{example}

{start}"""
full_prompt = PromptTemplate.from_template(full_template)


input_prompts = [
  ("introduction", introduction_prompt),
  ("example", example_prompt),
  ("start", start_prompt),
]
pipeline_prompt = PipelinePromptTemplate(
  final_prompt=full_prompt, pipeline_prompts=input_prompts
)
print(pipeline_prompt.input_variables, "\n")
print(pipeline_prompt.format(
    person="Elon Musk",
    example_q="What's your favorite car?",
    example_a="Tesla",
    input="What's your favorite social media site?",
))

### ChatModels



#### Quick Start


##### LCEL


In [None]:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI

"""
Chat models implement the Runnable interface. 
They support (a)invoke/stream/batch, and astream_log calls. 
They accept List[BaseMessage] as inputs, including str (converted to 
HumanMessage) and PromptValue.
"""

chat = ChatOpenAI()

messages = [
  SystemMessage(content="You're a helpful assistant"),
  HumanMessage(content="What is the purpose of model regularization?"),
]

# chat.invoke(messages)
# chat.batch([messages])
# for chunk in chat.stream(messages): 
#   print(chunk.content, end="", flush=True)

# await chat.ainvoke(messages)
# async for chunk in chat.astream(messages):
#   print(chunk.content, end="", flush=True)


#### Caching


In [None]:
from langchain.globals import set_llm_cache
from langchain_openai import ChatOpenAI
from langchain.cache import InMemoryCache, SQLiteCache

"""
LangChain offers an optional caching layer for chat models, serving two purposes: 
1. Cost-saving by minimizing API calls for recurrent completions, 
2. Improved application speed through a reduction in API calls to the LLM provider.
"""

llm = ChatOpenAI()


##### In Memory Cache

In [None]:
%%time
set_llm_cache(InMemoryCache())
# First occurrence, not in cache; hence, it takes longer.
llm.predict("Tell me a joke")

In [None]:
%%time
llm.predict("Tell me a joke")

##### SQLite Cache

In [None]:
!rm .langchain.db

In [None]:
set_llm_cache(SQLiteCache(database_path=".langchain.db"))


#### Function calling



##### Defining functions


###### Python function


In [None]:
import json

from langchain_core.utils.function_calling import convert_to_openai_tool


def multiply(a: int, b: int) -> int:
    """Multiply two integers together.

    Args:
        a: First integer
        b: Second integer
    """
    return a * b


print(json.dumps(convert_to_openai_tool(multiply), indent=2))

###### Pydantic class


In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field


class multiply(BaseModel):
    """Multiply two integers together."""

    a: int = Field(..., description="First integer")
    b: int = Field(..., description="Second integer")


print(json.dumps(convert_to_openai_tool(multiply), indent=2))

###### LangChain Tool


In [None]:
from typing import Any, Type
import json
from langchain_core.tools import BaseTool
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.utils.function_calling import convert_to_openai_tool

class MultiplySchema(BaseModel):
  """Multiply tool schema."""
  
  a: int = Field(..., description="First integer")
  b: int = Field(..., description="Second integer")
  
class Multiply(BaseTool):
  args_schema: Type[BaseModel] = MultiplySchema
  name: str = "multiply"
  description: str = "Multiply two integers together."
  
  def _run(self, a: int, b: int, **kwargs: Any) -> Any:
    return a * b

print(json.dumps(convert_to_openai_tool(Multiply()), indent=2))

##### Binding functions

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")
# llm.invoke("What's 5 times three", tools=[convert_to_openai_tool(multiply)])

llm_with_tool = llm.bind_tools([multiply], tool_choice="multiply")
# llm_with_tool.invoke("what's 5 times three")



#### Streaming


In [None]:
from langchain_openai import ChatOpenAI

chat = ChatOpenAI()
for chunk in chat.stream("Write me a song about goldfish on the moon"):
  print(chunk.content, end="", flush=True)


#### Tracking token usage

In [None]:
from langchain.callbacks import get_openai_callback
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()

# All operations within the context manager are monitored. 
# Example: tracking multiple sequential calls.
with get_openai_callback() as cb:
  result = llm.invoke("Tell me a joke")
  print(cb)

In [None]:
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain_openai import OpenAI

# If a multi-step chain or agent is used, it will track all steps.

llm = OpenAI()
tools = load_tools(["serpapi", "llm_math"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)

with get_openai_callback() as cb:
  response = agent.run(
    "Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?"
  )
  print(f"Total Tokens: {cb.total_tokens}")
  print(f"Prompt Tokens: {cb.prompt_tokens}")
  print(f"Completion Tokens: {cb.completion_tokens}")
  print(f"Total Cost (USD): ${cb.total_cost}")

### LLMs



#### Quick Start


In [None]:
from langchain_openai import OpenAI

"""
LLMs, implementing the Runnable interface (LCEL's core). They support (a)invoke,
(a)stream, (a)batch, and astream_log calls.

They accept string inputs or objects convertible to string prompts, 
such as List[BaseMessage] and PromptValue.
"""

llm = OpenAI()

prompt = "What are some theories about the relationship between unemployment and inflation?"

llm.invoke(prompt)

In [None]:
for chunk in llm.stream(prompt):
  print(chunk, end="", flush=True)

In [None]:
llm.batch([prompt])

In [None]:
await llm.ainvoke(prompt)

In [None]:
await llm.abatch([prompt])

In [None]:
async for chunk in llm.astream(prompt):
  print(chunk, end="", flush=True)


#### Custom LLM


In [None]:
from typing import Any, List, Mapping, Optional

from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM

"""
Creating a custom LLM wrapper for LangChain requires implementing:
1. A _call method: takes a string and optional stop words, returns a string.
2. A _llm_type property: returns a string for logging.
Optionally, implement:
3. An _identifying_params property: returns a dictionary for class printing.
"""

# Implement a basic custom LLM returning the first n characters of the input.
class CustomLLM(LLM):
  n: int
  
  @property
  def _llm_type(self) -> str:
    return "custom"
  
  def _call(
    self,
    prompt: str,
    stop: Optional[List[str]] = None,
    run_manager: Optional[CallbackManagerForLLMRun] = None,
    **kwargs: Any,
  ) -> str:
    if stop is not None:
      raise ValueError("stop kwargs are not permitted.")
    return prompt[:self.n]
  
  @property
  def _identifying_params(self) -> Mapping[str, Any]:
    """Get the identifying parameters."""
    return {"n": self.n}

llm = CustomLLM(n=10)

print(llm.invoke("This is a foobar thing"))

print(llm)


#### Caching


In [None]:
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI
from langchain.cache import InMemoryCache

"""
LangChain offers an optional caching layer for LLMs, providing cost savings by 
minimizing repetitive API calls and improving application speed.
"""

llm = OpenAI(model="gpt-3.5-turbo-instruct", n=2, best_of=2)

In [None]:
%%time

set_llm_cache(InMemoryCache())

# Initially, not cached, hence longer duration.
llm.predict("Tell me a joke")

In [None]:
%%time

# The second time is faster.
llm.predict("Tell me a joke")

##### SQLite Cache

In [None]:
!rm .langchain.db

In [None]:
from langchain.cache import SQLiteCache

set_llm_cache(SQLiteCache(database_path=".langchain.db"))

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")

In [None]:
%%time
# The second time it is, so it goes faster
llm.predict("Tell me a joke")


#### Streaming


In [None]:
from langchain_openai import OpenAI

"""
LLMs implement the Runnable interface with default methods like (a)invoke, 
(a)batch, and (a)stream.

The default behavior returns an Iterator (or AsyncIterator for async streaming) 
of the final result from the underlying LLM provider. 
While this doesn't offer token-by-token streaming without native LLM provider 
support, it ensures compatibility for code expecting iterators of tokens across 
all our LLM integrations.
"""

llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0, max_tokens=512)
for chunk in llm.stream("Write me a song about sparkling water."):
  print(chunk, end="", flush=True)

### Output Parsers

## Retrieval



#### Document loaders


##### CSV


In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

"""
CSV (Comma-Separated Values) files are delimited text files where a comma 
separates values, creating data records with one or more fields per line. 
"""

# Use a single-row-per-document approach when loading CSV data.
loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv')
data = loader.load()
print(data)

###### Customizing the CSV parsing and loading

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv',
                   csv_args={
                     'delimiter': ',',
                     'quotechar': '"',
                     'fieldnames': ['MLB Team', 'Payroll in millions', 'Wins'],
                   })
data = loader.load()

print(data)

###### Specify a column to identify the document source

In [None]:

"""
Utilize the source_column parameter to define the source for each document 
created from a row. If not specified, file_path will be used as the source for 
all documents generated from the CSV file. This is particularly beneficial for 
chains answering questions using sources.
"""
loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv', source_column="Team")
data = loader.load()
print(data)


##### File Directory


##### HTML


In [None]:
from langchain_community.document_loaders import UnstructuredHTMLLoader

"""
HTML is the standard markup language for web documents. 
This guide explains loading HTML into a usable document format.
"""

loader = UnstructuredHTMLLoader('./example_data/fake-content.html')
data = loader.load()
print(data)

###### Loading HTML with BeautifulSoup4

In [None]:
from langchain_community.document_loaders import BSHTMLLoader

loader = BSHTMLLoader('./example_data/fake-content.html')
data = loader.load()
print(data)

##### JSON


In [None]:
import json
from pathlib import Path
from pprint import pprint

file_path = './example_data/facebook_chat.json'
data = json.loads(Path(file_path).read_text())
pprint(data)


###### Using JSONLoader


###### JSON file


In [None]:
from langchain_community.document_loaders import JSONLoader

# Extract values under 'content' field within 'messages' key using JSONLoader.
loader = JSONLoader(
  file_path='./example_data/facebook_chat.json',
  jq_schema='.messages[].content',
  text_content=False,
)
data = loader.load()
pprint(data)

###### JSON Lines file


In [None]:
from langchain_community.document_loaders import JSONLoader

"""
For loading documents from a JSON Lines file, set json_lines=True and provide 
jq_schema to extract page_content from a single JSON object.
"""

loader = JSONLoader(
  file_path='./example_data/facebook_chat_messages.jsonl',
  jq_schema='.content',
  text_content=False,
  json_lines=True,
)
data = loader.load()
pprint(data)

In [None]:
loader = JSONLoader(
  file_path='./example_data/facebook_chat_messages.jsonl',
  jq_schema='.',
  content_key='sender_name',
  json_lines=True,
)
data = loader.load()
pprint(data)

###### Extracting metadata


In [None]:
from langchain_community.document_loaders import JSONLoader

# Define the metadata extraction function.


def metadata_func(record: dict, metadata: dict) -> dict:

  metadata["sender_name"] = record.get("sender_name")
  metadata["timestamp_ms"] = record.get("timestamp_ms")

  return metadata


loader = JSONLoader(
  file_path='./example_data/facebook_chat.json',
  jq_schema='.messages[]',
  content_key="content",
  metadata_func=metadata_func
)

data = loader.load()

###### The metadata_func


In [None]:
# Define the metadata extraction function.
def metadata_func(record: dict, metadata: dict) -> dict:

  metadata["sender_name"] = record.get("sender_name")
  metadata["timestamp_ms"] = record.get("timestamp_ms")

  if "source" in metadata:
      source = metadata["source"].split("/")
      source = source[source.index("langchain"):]
      metadata["source"] = "/".join(source)

  return metadata


loader = JSONLoader(
  file_path='./example_data/facebook_chat.json',
  jq_schema='.messages[]',
  content_key="content",
  metadata_func=metadata_func
)

data = loader.load()

###### Common JSON structures with jq schema

##### Markdown


In [None]:
from langchain_community.document_loaders import UnstructuredMarkdownLoader

markdown_path = './example_data/README.md'
loader = UnstructuredMarkdownLoader(markdown_path)
data = loader.load()
print(data)

###### Retain Elements


In [None]:
"""
Unstructured creates distinct "elements" for various text chunks. 
By default, we merge them, but you can maintain separation by specifying
mode="elements".
"""

from langchain_community.document_loaders import UnstructuredMarkdownLoader

markdown_path = './example_data/README.md'
loader = UnstructuredMarkdownLoader(markdown_path, mode="elements")
data = loader.load()
print(data)

##### PDF


###### Using PyPDF


In [None]:
from pprint import pprint
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores.faiss import FAISS
from langchain_openai import OpenAIEmbeddings

# Load PDF using pypdf into an array of documents. Each document includes 
# page content and metadata, including page number.
loader = PyPDFLoader("./example_data/layout-parser-paper.pdf")
pages = loader.load_and_split()
# pprint(pages)

faiss_index = FAISS.from_documents(documents=pages, embedding=OpenAIEmbeddings())
docs = faiss_index.similarity_search("How chill the community be engaged?", k=2)
for doc in docs:
  print(str(doc.metadata["page"]) + ":", doc.page_content[:300], "\n")

Extracting images


In [None]:
from langchain_community.document_loaders import PyPDFLoader

# The rapidocr-onnxruntime package extracts images as text.
loader = PyPDFLoader("https://arxiv.org/pdf/2103.15348.pdf", extract_images=True)
pages = loader.load()
print(pages[4].page_content)

###### Using MathPix


In [None]:
from langchain_community.document_loaders import MathpixPDFLoader

# Inspired by Daniel Gross's https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
loader = MathpixPDFLoader("example_data/layout-parser-paper.pdf")
data = loader.load()

###### Using Unstructured


In [None]:
from langchain_community.document_loaders import UnstructuredPDFLoader

loader = UnstructuredPDFLoader("example_data/layout-parser-paper.pdf")
data = loader.load()

Retain Elements


In [None]:
"""
Unstructured segregates text into distinct "elements" internally. By default, 
it combines these elements, but you can preserve their separation by 
specifying mode="elements".
"""
loader = UnstructuredPDFLoader(
    "example_data/layout-parser-paper.pdf", mode="elements")
data = loader.load()

###### Using PyPDFium2


In [None]:
from langchain_community.document_loaders import PyPDFium2Loader
loader = PyPDFium2Loader("example_data/layout-parser-paper.pdf")
data = loader.load()

###### Using PDFMiner


In [None]:
from langchain_community.document_loaders import PDFMinerLoader
loader = PDFMinerLoader("example_data/layout-parser-paper.pdf")
data = loader.load()

 Using PDFMiner to generate HTML text


In [None]:
from langchain.docstore.document import Document
import re
from bs4 import BeautifulSoup
from langchain_community.document_loaders import PDFMinerPDFasHTMLLoader

"""
Helpful for semantically chunking texts into sections, parse the output HTML 
content with BeautifulSoup for structured information on font size, page numbers,
PDF headers/footers, etc.
"""

loader = PDFMinerPDFasHTMLLoader("example_data/layout-parser-paper.pdf")
data = loader.load()[0]   # entire PDF is loaded as a single Document

soup = BeautifulSoup(data.page_content, 'html.parser')
content = soup.find_all('div')

cur_fs = None
cur_text = ''
snippets = []   # first collect all snippets that have the same font size
for c in content:
  sp = c.find('span')
  if not sp:
    continue
  st = sp.get('style')
  if not st:
    continue
  fs = re.findall('font-size:(\d+)px', st)
  if not fs:
    continue
  fs = int(fs[0])
  if not cur_fs:
    cur_fs = fs
  if fs == cur_fs:
    cur_text += c.text
  else:
    snippets.append((cur_text, cur_fs))
    cur_fs = fs
    cur_text = c.text
snippets.append((cur_text, cur_fs))
# Strategies include removing duplicate snippets since headers/footers in a PDF 
# appear on multiple pages, indicating redundant information.

cur_idx = -1
semantic_snippets = []
# Assumption: headings have higher font size than their respective content
for s in snippets:
  # if current snippet's font size > previous section's heading => it is a new heading
  if not semantic_snippets or s[1] > semantic_snippets[cur_idx].metadata['heading_font']:
      metadata = {'heading': s[0], 'content_font': 0, 'heading_font': s[1]}
      metadata.update(data.metadata)
      semantic_snippets.append(Document(page_content='', metadata=metadata))
      cur_idx += 1
      continue

  # if current snippet's font size <= previous section's content => content 
  # belongs to the same section (one can also create
  # a tree like structure for sub sections if needed but that may require some 
  # more thinking and may be data specific)
  if not semantic_snippets[cur_idx].metadata['content_font'] or s[1] <= semantic_snippets[cur_idx].metadata['content_font']:
      semantic_snippets[cur_idx].page_content += s[0]
      semantic_snippets[cur_idx].metadata['content_font'] = max(
          s[1], semantic_snippets[cur_idx].metadata['content_font'])
      continue

  # if current snippet's font size > previous section's content but less than 
  # previous section's heading than also make a new section (e.g. title of a PDF
  # will have the highest font size but we don't want it to subsume all sections)
  metadata = {'heading': s[0], 'content_font': 0, 'heading_font': s[1]}
  metadata.update(data.metadata)
  semantic_snippets.append(Document(page_content='', metadata=metadata))
  cur_idx += 1

print(semantic_snippets[4])

###### Using PyMuPDF


In [None]:
from langchain_community.document_loaders import PyMuPDFLoader

# Fastest PDF parsing option with detailed metadata about PDF and pages, returns
# one document per page.
loader = PyMuPDFLoader("example_data/layout-parser-paper.pdf")
data = loader.load()
print(data[0])

# You can pass options from PyMuPDF documentation as keyword arguments in the 
# load call, and they will be forwarded to the get_text() call.

###### PyPDF Directory


In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader("example_data/")
docs = loader.load()

###### Using PDFPlumber


In [None]:
from langchain_community.document_loaders import PDFPlumberLoader

# Outputs documents with detailed metadata for each PDF page. Each document 
# corresponds to a page.
loader = PDFPlumberLoader("example_data/layout-parser-paper.pdf")
data = loader.load()
print(data[0])

#### Text Splitters


##### HTMLHeaderTextSplitter


###### With an HTML string

In [None]:
from pprint import pprint
from langchain.text_splitter import HTMLHeaderTextSplitter

html_string = """
<!DOCTYPE html>
<html>
<body>
  <div>
    <h1>Foo</h1>
    <p>Some intro text about Foo.</p>
    <div>
      <h2>Bar main section</h2>
      <p>Some intro text about Bar.</p>
      <h3>Bar subsection 1</h3>
      <p>Some text about the first subtopic of Bar.</p>
      <h3>Bar subsection 2</h3>
      <p>Some text about the second subtopic of Bar.</p>
    </div>
    <div>
      <h2>Baz</h2>
      <p>Some text about Baz</p>
    </div>
    <br>
    <p>Some concluding text about Foo</p>
  </div>
</body>
</html>
"""

headers_to_split_on = [
  ("h1", "Header 1"),
  ("h2", "Header 2"),
  ("h3", "Header 3"),
]

html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
html_header_splits = html_splitter.split_text(html_string)
pprint(html_header_splits)

###### Pipelined to another splitter, with html loaded from a web URL

In [None]:
from pprint import pprint
from langchain.text_splitter import (RecursiveCharacterTextSplitter,
                                     HTMLHeaderTextSplitter)

url = "https://plato.stanford.edu/entries/goedel/"
headers_to_split_on = [
  ("h1", "Header 1"),
  ("h2", "Header 2"),
  ("h3", "Header 3"),
  ("h4", "Header 4"),
]
html_splitter = HTMLHeaderTextSplitter(headers_to_split_on)
html_header_splits = html_splitter.split_text_from_url(url)

chunk_size = 500
chunk_overlap = 30
text_splitter = RecursiveCharacterTextSplitter(
  chunk_size=chunk_size, chunk_overlap=chunk_overlap
)

splits = text_splitter.split_documents(html_header_splits)
pprint(splits)

##### Split by character


In [None]:
from pprint import pprint
from langchain.text_splitter import CharacterTextSplitter

# The simplest method splits based on characters (by default “”) and measures 
# chunk length by the number of characters. How the text is split: by a single 
# character. How the chunk size is measured: by the number of characters.

with open("./example_data/state_of_the_union.txt") as f:
  file = f.read()

text_splitter = CharacterTextSplitter(
  separator="\n\n",
  chunk_size=1000,
  chunk_overlap=200,
  length_function=len,
  is_separator_regex=False,
)
texts = text_splitter.create_documents([file])
pprint(texts)

In [None]:
# Example of passing metadata along with the documents, notice that it is split
# along with the documents.
metadatas = [{"document": 1}, {"document": 2}]
documents = text_splitter.create_documents(
    [state_of_the_union, state_of_the_union], metadatas=metadatas
)
print(documents[0])

##### Split code


##### MarkdownHeaderTextSplitter


In [None]:
from pprint import pprint
from langchain.text_splitter import (MarkdownHeaderTextSplitter,
                                     RecursiveCharacterTextSplitter)

markdown_document = "# Intro \n\n    ## History \n\n Markdown[9] is a lightweight markup language for creating formatted text using a plain-text editor. John Gruber created Markdown in 2004 as a markup language that is appealing to human readers in its source code form.[9] \n\n Markdown is widely used in blogging, instant messaging, online forums, collaborative software, documentation pages, and readme files. \n\n ## Rise and divergence \n\n As Markdown popularity grew rapidly, many Markdown implementations appeared, driven mostly by the need for \n\n additional features such as tables, footnotes, definition lists,[note 1] and Markdown inside HTML blocks. \n\n #### Standardization \n\n From 2012, a group of people, including Jeff Atwood and John MacFarlane, launched what Atwood characterised as a standardisation effort. \n\n ## Implementations \n\n Implementations of Markdown are available for over a dozen programming languages."

headers_to_split_on = [
  ("#", "Header 1"),
  ("##", "Header 2"),
  ("###", "Header 3"),
]
# By default, MarkdownHeaderTextSplitter strips headers being split on from the 
# output chunk’s content. This can be disabled by setting strip_headers = False. 
markdown_splitter = MarkdownHeaderTextSplitter(
  headers_to_split_on=headers_to_split_on, strip_headers=False,
)
md_header_splits = markdown_splitter.split_text(markdown_document)
pprint(md_header_splits); print()

# Within each markdown group, any text splitter can then be applied.
chunk_size = 250
chunk_overlap = 30
text_splitter = RecursiveCharacterTextSplitter(
  chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
splits = text_splitter.split_documents(md_header_splits)
pprint(splits)

##### Recursively split JSON


In [None]:
from pprint import pprint
import json, requests
from langchain.text_splitter import RecursiveJsonSplitter

# This large nested JSON object will be loaded into a Python dictionary.
json_data = requests.get("https://api.smith.langchain.com/openapi.json").json()

splitter = RecursiveJsonSplitter(max_chunk_size=300)

# Recursively split JSON data to access or manipulate smaller chunks.
json_chunks = splitter.split_json(json_data=json_data)
# The splitter can output documents
docs = splitter.create_documents(texts=[json_data])
# or a list of strings
texts = splitter.split_text(json_data=json_data)

# The json splitter does not split lists by default. The following preprocesses 
# the JSON, converting lists to a dictionary with index:item as key:val pairs.
texts_lists = splitter.split_text(json_data=json_data, convert_lists=True)

##### Recursively split by character


In [None]:
from pprint import pprint
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open("./example_data/state_of_the_union.txt") as f:
  file_content = f.read()

text_splitter = RecursiveCharacterTextSplitter(
  chunk_size=100,
  chunk_overlap=20,
  length_function=len,
  is_separator_regex=False,
)
texts_splits = text_splitter.create_documents([file_content])
pprint(texts_splits)

##### Semantic Chunking


In [None]:
from pprint import pprint
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings

# Splits the text based on semantic similarity. This splits into sentences, 
# groups into sets of 3 sentences, and merges those similar in the embedding 
# space.

with open("./example_data/state_of_the_union.txt") as f:
  file_content = f.read()

text_splitter = SemanticChunker(OpenAIEmbeddings())

docs = text_splitter.create_documents([file_content])
pprint(docs)

##### Split by tokens

###### tiktoken


In [None]:
from pprint import pprint
from langchain.text_splitter import CharacterTextSplitter

with open("./example_data/state_of_the_union.txt") as f:
  file_content = f.read()

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
  chunk_size=100, chunk_overlap=0,
)
texts_splits = text_splitter.split_text(file_content)
pprint(texts_splits)

###### spaCy


In [None]:
from pprint import pprint
from langchain.text_splitter import SpacyTextSplitter

with open("./example_data/state_of_the_union.txt") as f:
  file_content = f.read()

text_splitter = SpacyTextSplitter(chunk_size=1000)
text_splits = text_splitter.split_text(file_content)

pprint(text_splits)

###### SentenceTransformers


In [None]:
from pprint import pprint
from langchain.text_splitter import SentenceTransformersTokenTextSplitter

text_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0)
text = "Lorem "

count_start_and_stop_tokens = 2
text_token_count = text_splitter.count_tokens(text=text) - count_start_and_stop_tokens
print(f"text_token_count: {text_token_count}")

token_multiplier = text_splitter.maximum_tokens_per_chunk // text_token_count + 1
text_to_split = text*token_multiplier
print(
    f"tokens in text to split: {text_splitter.count_tokens(text=text_to_split)}")

text_chunks = text_splitter.split_text(text=text_to_split)
pprint(text_chunks)

###### NLTK


In [None]:
from pprint import pprint
from langchain.text_splitter import NLTKTextSplitter

with open("./example_data/state_of_the_union.txt") as f:
  file_content = f.read()
  
text_splitter = NLTKTextSplitter(chunk_size=1000)
text_splits = text_splitter.split_text(file_content)

pprint(text_splits)

###### Hugging Face tokenizer

In [None]:
from pprint import pprint
from transformers import GPT2TokenizerFast
from langchain.text_splitter import CharacterTextSplitter

with open("./example_data/state_of_the_union.txt") as f:
  file_content = f.read()

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
  tokenizer=tokenizer, chunk_size=100, chunk_overlap=0
)
text_splits = text_splitter.split_text(file_content)
pprint(text_splits)

#### Text embedding models



##### Intro


In [None]:
from pprint import pprint
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()

# embed_documents
# Embed list of texts
docs = [
    "Hi there!",
    "Oh, hello!",
    "What's your name?",
    "My friends call me World",
    "Hello World!"
]
embedded_docs = embeddings_model.embed_documents(docs)
print(len(embedded_docs), len(embedded_docs[0]))
print(embedded_docs[0]); print()

# embed_query
# Embed single query
# Embed a text for comparing to others.
query = "What was the name mentioned in the conversation?"
embedded_query = embeddings_model.embed_query(query)
print(len(embedded_query))
print(embedded_query)

##### CacheBackedEmbeddings

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.storage import LocalFileStore, InMemoryByteStore
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain_community.vectorstores.faiss import FAISS

embeddings_model = OpenAIEmbeddings()

# Using local file system for storing embeddings and FAISS vector store for retrieval
store = LocalFileStore("./cache/")
# To use a different ByteStore, specify it when creating `CacheBackedEmbeddings`.
# store = InMemoryByteStore()

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
  underlying_embeddings=embeddings_model,
  document_embedding_cache=store,
  namespace=embeddings_model.model
)

# The cache is initially empty.
print(list(store.yield_keys()), "\n")

# Load the document, split into chunks, embed each, load into the vector store.
raw_documents = TextLoader("./example_data/state_of_the_union.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

# Create the vector store.
db = FAISS.from_documents(documents, cached_embedder)
# Re-creating the vector store is faster, as it avoids re-computing embeddings.

# Some created embeddings
print(list(store.yield_keys())[:5])

#### Vector stores



##### Get started


In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.chroma import Chroma
from langchain_community.vectorstores.faiss import FAISS

embeddings = OpenAIEmbeddings()

# Load the document
raw_documents = TextLoader("./example_data/state_of_the_union.txt").load()
# Split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
# Embed each chunk. Load it into the vector store.
db_chroma = Chroma.from_documents(documents, embeddings)
db_faiss = FAISS.from_documents(documents, embeddings)

# Similarity search
query = "What did the president say about Ketanji Brown Jackson"
result_chroma = db_chroma.similarity_search(query)
result_faiss = db_faiss.similarity_search(query)
print(result_chroma[0].page_content)
print(result_faiss[0].page_content)
print('-'*80)

# Similarity document search based on an embedding vector
embedding_vector = embeddings.embed_query(query)
result_vector_chroma = db_chroma.similarity_search_by_vector(embedding_vector)
result_vector_faiss = db_faiss.similarity_search_by_vector(embedding_vector)
print(result_vector_chroma[0].page_content)
print(result_vector_faiss[0].page_content)

##### Asynchronous operations


In [None]:
from langchain_community.vectorstores.qdrant import Qdrant

# Create a vector store asynchronously
db = await Qdrant.afrom_documents(documents, embeddings, "http://localhost:6333")

# Similarity search
query = "What did the president say about Ketanji Brown Jackson"
docs = await db.asimilarity_search(query)
print(docs[0].page_content)

# Similarity search by vector
embedding_vector = embeddings.embed_query(query)
docs = await db.asimilarity_search_by_vector(embedding_vector)

# Maximal Marginal Relevance optimizes for query similarity and diversity among 
# selected documents. This feature is also supported in the async API.
query = "What did the president say about Ketanji Brown Jackson"
found_docs = await Qdrant.amax_marginal_relevance_search(query, k=2, fetch_k=10)
for i, doc in enumerate(found_docs):
    print(f"{i + 1}.", doc.page_content, "\n")

#### Retrievers


##### Intro


In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """Answer the question based only on the following context:

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke("What did the president say about technology?")

##### Vector store-backed retriever


In [None]:
from pprint import pprint
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import faiss

loader = TextLoader("./example_data/state_of_the_union.txt")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
db = faiss.FAISS.from_documents(texts, embeddings)

retriever = db.as_retriever(
  # The default mode for the VectorStoreRetriever is similarity search. 
  # Specify the search type as maximum marginal relevance (if supported)
  search_type="mmr",
  
  # Set a retrieval method with a similarity score threshold, returning only 
  # documents surpassing that threshold.
  search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5},
  
  # Specify search kwargs like `k` for retrieval customization.
  search_kwargs={"k": 1}
)

query = "what did he say about ketanji brown jackson"
docs = retriever.get_relevant_documents(query)
pprint(docs)

##### MultiQueryRetriever


In [None]:
import logging
from pprint import pprint
from typing import List
from pydantic import BaseModel, Field
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import chroma
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

embedding = OpenAIEmbeddings()
vectordb = chroma.Chroma.from_documents(documents=splits, embedding=embedding)
retriever = vectordb.as_retriever()

# Specify the LLM for query generation, and the retriever handles the rest.
llm = ChatOpenAI(temperature=0)

# Supply a prompt with an output parser to split results into a list of queries.
# Output parser will split the LLM result into a list of queries


class LineList(BaseModel):
  # "lines" is the key (attribute name) of the parsed output
  lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
  def __init__(self) -> None:
    super().__init__(pydantic_object=LineList)

  def parse(self, text: str) -> LineList:
    lines = text.strip().split("\n")
    return LineList(lines=lines)


output_parser = LineListOutputParser()

template = """You are an AI language model assistant. Your task is to generate 
five different versions of the given user question to retrieve relevant documents
from a vector database. By generating multiple perspectives on the user question, 
your goal is to help the user overcome some of the limitations of the 
distance-based similarity search. Provide these alternative questions separated 
by newlines. Original question: {question}"""
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template=template
)

llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)
multi_query_retriever = MultiQueryRetriever(
    retriever=retriever,
    llm_chain=llm_chain,
    parser_key="lines",  # key (attribute name) of the parsed output
)

query = "What does the course say about regression?"
unique_docs = multi_query_retriever.get_relevant_documents(query)

##### Contextual compression


In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_community.vectorstores import faiss
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import (LLMChainExtractor,
      LLMChainFilter, EmbeddingsFilter, DocumentCompressorPipeline)

# Initializes a vector store retriever, stores the data in chunks. The retriever
# returns relevant and irrelevant docs, with relevant ones containing excess
# irrelevant information.
data = TextLoader("./example_data/state_of_the_union.txt").load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

embeddings = OpenAIEmbeddings()
db = faiss.FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()

llm = OpenAI(temperature=0)

# Extracts relevant content for the query from documents.
llm_chain_extractor = LLMChainExtractor.from_llm(llm)

# Use an LLM chain to filter out or return initially retrieved 
# documents without manipulating their contents.
llm_chain_filter = LLMChainFilter.from_llm(llm)

# Embeddeds documents and query, and returning documents with similar embeddings
embeddings_filter = EmbeddingsFilter(
    embeddings=embeddings, similarity_threshold=0.76
)

# Split documents
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
# Filters out redundant documents based on embedding similarity.
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
# Combines compressors in sequence.
pipeline_compressor = DocumentCompressorPipeline(
  transformers=[splitter, redundant_filter, relevant_filter]
)

compressors = {
  "llm_chain_extractor": llm_chain_extractor,
  "llm_chain_filter": llm_chain_filter,
  "embeddings_filter": embeddings_filter,
  "pipeline_compressor": pipeline_compressor,  
}
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressors["pipeline_compressor"], base_retriever=retriever
)

query = "What did the president say about Ketanji Brown Jackson"
compressed_docs = compression_retriever.get_relevant_documents(query)
pretty_print_docs(compressed_docs)

##### Ensemble Retriever


In [None]:
from pprint import pprint
from langchain_core.runnables import ConfigurableField
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import faiss
from langchain.retrievers import BM25Retriever, EnsembleRetriever

doc_list_1 = [
  "I like apples",
  "I like oranges",
  "Apples and oranges are fruits",
]
doc_list_2 = [
  "You like apples",
  "You like oranges",
]

retriever_bm25 = BM25Retriever.from_texts(
  doc_list_1, metadatas=[{"source": 1}] * len(doc_list_1)
)
retriever_bm25.k = 2

embedding = OpenAIEmbeddings()
vectorstore_faiss = faiss.FAISS.from_texts(
  doc_list_2, embedding, metadatas=[{"source": 2}] * len(doc_list_2)
)
retriever_faiss = vectorstore_faiss.as_retriever(
  search_kwargs={"k": 2}
)

retriever_ensemble = EnsembleRetriever(
  retrievers=[retriever_bm25, retriever_faiss], weights=[0.5, 0.5]
)

pprint(retriever_ensemble.invoke("apples"))

##### Long-Context Reorder


In [None]:
from pprint import pprint
from langchain.chains import LLMChain, StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain_community.document_transformers import LongContextReorder
from langchain_community.vectorstores import chroma
from langchain_openai import OpenAI, OpenAIEmbeddings

texts = [
  "Basquetball is a great sport.",
  "Fly me to the moon is one of my favourite songs.",
  "The Celtics are my favourite team.",
  "This is a document about the Boston Celtics",
  "I simply love going to the movies",
  "The Boston Celtics won the game by 20 points",
  "This is just a random text.",
  "Elden Ring is one of the best games in the last 15 years.",
  "L. Kornet is one of the best Celtics players.",
  "Larry Bird was an iconic NBA player.",
]

llm = OpenAI()
embeddings = OpenAIEmbeddings()
vectorstore = chroma.Chroma.from_texts(texts, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
# Reorder documents: Less relevant document in the middle, more relevant at the
# beginning/end.
reodering = LongContextReorder()

stuff_prompt_template = """Given this text extracts:
-----
{context}
-----
Please answer the following question:
{query}
"""
stuff_prompt = PromptTemplate(
  template=stuff_prompt_template, input_variables=["context", "query"]
)
llm_chain = LLMChain(llm=llm, prompt=stuff_prompt)

# Prepare and run a custom Stuff chain with reordered docs as context.
# Override prompts
document_prompt = PromptTemplate(
  input_variables=["page_content"], template="{page_content}"
)
document_variable_name = "context"
stuff_chain = StuffDocumentsChain(
  llm_chain=llm_chain,
  document_prompt=document_prompt,
  document_variable_name=document_variable_name,
)

query = "What can you tell me about the Celtics?"
docs = retriever.get_relevant_documents(query)
reodered_docs = reodering.transform_documents(docs)
pprint(chain.run(input_documents=reodered_docs, query=query))

##### MultiVector Retriever


In [None]:
import uuid
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryByteStore
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import chroma
from langchain.retrievers.multi_vector import (MultiVectorRetriever,
                                               SearchType)
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000)
child_text_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
embeddings = OpenAIEmbeddings()

loaders = [
  TextLoader("./example_data/paul_graham_essay.txt"),
  TextLoader("./example_data//state_of_the_union.txt"),
]
data = []
for loader in loaders:
  data.extend(loader.load())
docs = text_splitter.split_documents(data)

doc_ids = [str(uuid.uuid4()) for _ in docs]
id_key = "doc_id"
byte_store = InMemoryByteStore()

###### Smaller chunks


In [None]:
# Used to index the child chunks
vectorstore = chroma.Chroma(
  collection_name="full_documents", embedding_function=embeddings
)

# The storage layer for the parent documents
retriever = MultiVectorRetriever(
  vectorstore=vectorstore,
  byte_store=byte_store,
  id_key=id_key,
)

sub_docs = []
for i, doc in enumerate(docs):
  _id = doc_ids[i]
  _sub_docs = child_text_splitter.split_documents([doc])
  for _doc in _sub_docs:
    _doc.metadata[id_key] = _id
  sub_docs.extend(_sub_docs)

retriever.vectorstore.add_documents(sub_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

# The retriever performs a similarity search by default on the vector database. 
# LangChain Vector Stores also support Max Marginal Relevance search. 
retriever.search_type = SearchType.mmr

# Vectorstore alone retrieves the small chunks
print(retriever.vectorstore.similarity_search("justice breyer")[0])
# Retriever returns larger chunks
print(len(retriever.get_relevant_documents("justice breyer")[0].page_content))

###### Summary


In [None]:
template = """Summarize the following document:
{doc}
"""
chain = (
  {"doc": lambda x: x.page_content}
  | ChatPromptTemplate.from_template(template)
  | ChatOpenAI(max_retries=0)
  | StrOutputParser()
)
summaries = chain.batch(docs, {"max_concurrency": 5})

# Used to index the child chunks
vectorstore = chroma.Chroma(
  collection_name="summaries", embedding_function=embeddings
)
# The storage layer for the parent documents
retriever = MultiVectorRetriever(
  vectorstore=vectorstore,
  byte_store=byte_store,
  id_key=id_key,
)

summary_docs = [
  Document(page_content=summary, metadata={id_key: doc_ids[i]})
           for i, summary in enumerate(summaries)
]
retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

query = "justice breyer"
retrieved_docs = retriever.get_relevant_documents(query)

###### Hypothetical Queries

In [None]:
functions = [
  {
    "name": "hypothetical_questions",
    "description": "Generate hypothetical questions",
    "parameters": {
      "type": "object",
      "properties": {
        "questions": {
          "type": "array",
          "items": {"type": "string"},
        },
    },
      "required": ["questions"],
    },
  }
]

template = """
Generate a list of exactly 3 hypothetical questions that the below document 
could be used to answer:

{doc}
"""
prompt = ChatPromptTemplate.from_template(template)

chat = ChatOpenAI(max_retries=0).bind(
  functions=functions, function_call={"name": "hypothetical_questions"},
)

output_parser = JsonKeyOutputFunctionsParser(key_name="questions")

chain = (
  {"doc": lambda x: x.page_content}
  | prompt
  | chat
  | output_parser
)
hypothetical_question = chain.batch(docs, {"max_concurrency": 5})

# Used to index the child chunks
vectorstore = chroma.Chroma(
  collection_name="hypo_questions", embedding_function=embeddings
)
# The storage layer for the parent documents
retriever = MultiVectorRetriever(
  vectorstore=vectorstore,
  byte_store=byte_store,
  id_key=id_key,
)

question_docs = []
for i, question_list in enumerate(hypothetical_question):
  question_docs.extend(
      [Document(page_content=s, metadata={
                id_key: doc_ids[i]}) for s in question_list]
  )

retriever.vectorstore.add_documents(question_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

retrieved_docs = retriever.get_relevant_documents("justice breyer")

##### Parent Document Retriever


In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import chroma
from langchain.retrievers import ParentDocumentRetriever

loaders = [
  TextLoader("./example_data/paul_graham_essay.txt"),
  TextLoader("./example_data/state_of_the_union.txt"),
]
docs = []
for loader in loaders: docs.extend(loader.load())

embeddings = OpenAIEmbeddings()


###### Retrieving full documents


In [None]:
# Only a child splitter is specified, yielding two keys for two added documents.
child_text_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
# `vectorstore` used for indexing child chunks
vectorstore = chroma.Chroma(collection_name="full_documents", 
                            embedding_function=embeddings)
# The storage layer for the parent documents
store = InMemoryStore()
retriever = ParentDocumentRetriever(
  vectorstore=vectorstore,
  docstore=store,
  child_splitter=child_text_splitter,
)
retriever.add_documents(docs, ids=None)

print(list(store.yield_keys())) # 2 docs

sub_docs = vectorstore.similarity_search("justice breyer") # small chunks
retrieved_docs = retriever.get_relevant_documents("justice breyer") # large docs

###### Retrieving larger chunks

In [None]:
# Full documents may be too large to retrieve. Split raw documents into larger 
# chunks, further split them into smaller chunks and index. During retrieval, 
# the larger chunks are retrieved.

parent_text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
child_text_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
vectorstore = chroma.Chroma(collection_name="split_parents", 
                            embedding_function=embeddings)
store = InMemoryStore()  # The storage layer for the parent documents
retriever = ParentDocumentRetriever(
  vectorstore=vectorstore,
  docstore=store,
  child_splitter=child_text_splitter,
  parent_splitter=parent_text_splitter,
)
retriever.add_documents(docs)

print(len(list(store.yield_keys())))  # larger chunks

query = "justice breyer"
sub_docs = vectorstore.similarity_search(query)  # small chunks
retrieved_docs = retriever.get_relevant_documents(query)

##### Self-querying


###### Basic


In [None]:
import json
from pprint import pprint
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

with open("./example_data/movie_data.json") as json_file:
    movie_data = json.load(json_file)

docs = [Document(**doc) for doc in movie_data["docs"]]
metadata_field_info = [AttributeInfo(**info)
                       for info in movie_data["metadata_field_info"]]

embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(temperature=0)

vectorstore = chroma.Chroma.from_documents(docs, embeddings)
document_content_description = "Brief summary of a movie"
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,  # Filter k
)

pprint(retriever.invoke("I want to watch a movie rated higher than 8.5"))


###### Constructing from scratch with LCEL

In [None]:
import json
from pprint import pprint
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import chroma
from langchain.chains.query_constructor.base import (AttributeInfo,
          StructuredQueryOutputParser, get_query_constructor_prompt)
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.retrievers.self_query.chroma import ChromaTranslator

with open("./example_data/movie_data.json") as json_file:
    movie_data = json.load(json_file)

docs = [Document(**doc) for doc in movie_data["docs"]]
metadata_field_info = [AttributeInfo(**info)
                       for info in movie_data["metadata_field_info"]]
document_content_description = "Brief summary of a movie"

embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(temperature=0)
vectorstore = chroma.Chroma.from_documents(docs, embeddings)

prompt = get_query_constructor_prompt(
  document_content_description,
  metadata_field_info,
)

# The query constructor is crucial for a self-query retriever. 
# Make sure fine-tuning prompts, prompt examples, attribute descriptions, etc. 
output_parser = StructuredQueryOutputParser.from_components()
query_constructor = prompt | llm | output_parser
# pprint(prompt.format(query="dummy question"))

query = "What are some sci-fi movies from the 90's directed by Luc Besson about taxi drivers"
result = query_constructor.invoke({"query": query})
# pprint(result)

# The structured query translator translates the `StructuredQuery` into a 
# metadata filter in the vector store's syntax.
retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=vectorstore,
    structured_query_translator=ChromaTranslator(),
)

query = "What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated"
result = retriever.invoke(query)
pprint(result)

##### Time-weighted vector store retriever

#### Indexing

###### Quickstart


In [None]:
from langchain.schema import Document
from langchain.indexes import SQLRecordManager, index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import elasticsearch

# Initialize a vector store and embed:
collection_name = "test_index"
embedding = OpenAIEmbeddings()
vectorstore = elasticsearch.ElasticsearchStore(
    es_url="http://localhost:9200", index_name=collection_name, embedding=embedding
)

# Initialize a record manager with a suitable namespace
# Considering the vector store and collection name, like `redis/my_docs`, 
# `chromadb/my_docs`, or `postgres/my_docs`.
vectorstore_name = "elasticsearch"
namespace = f"{vectorstore_name}/{collection_name}"
record_manager = SQLRecordManager(
    namespace, db_url="sqlite:///record_manager_cache.sql"
)
# Create a schema before utilizing the record manager.
record_manager.create_schema()

# Index test documents in an empty vector store.
doc1 = Document(page_content="kitty", metadata={"source": "kitty.txt"})
doc2 = Document(page_content="doggy", metadata={"source": "doggy.txt"})

def _clear():
  """Hacky helper method to clear content."""
  index([], record_manager, vectorstore,
      cleanup="full", source_id_key="source")

None deletion mode


"incremental" deletion mode


"full" deletion mode


###### Source


###### Using with loaders

## Agents




### Quickstart


In [None]:
from pprint import pprint
import uuid
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import faiss
from langchain.tools.retriever import create_retriever_tool
from langchain import hub
from langchain.agents import create_openai_functions_agent, AgentExecutor
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Build an agent with two tools: one to look up online information and another
# to search specific data in an index. 
# `Agents` follow a self-determined, input-dependent sequence of steps, LangSmith
# helps debugging (observability).

#*** DEFINE TOOLS 

## TAVILY
search = TavilySearchResults()
# pprint(search.invoke("what is the weather in SF"))

## RETRIEVER
# Create a retriever over our data. 
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
  chunk_size=1000, chunk_overlap=200
)
documents = text_splitter.split_documents(docs)
embedding = OpenAIEmbeddings()
vectorstore = faiss.FAISS.from_documents(documents, embedding)
retriever = vectorstore.as_retriever()
# pprint(retriever.get_relevant_documents("how to upload a dataset"))

# Convert index for retrieval into a tool (`agent` format).
retriever_tool = create_retriever_tool(
  retriever=retriever,
  name="langsmith_search",
  description="Search for information about LangSmith. For any questions about LangSmith, you must use this tool!",
)

## TOOLS
# list the tools for downstream use
tools = [search, retriever_tool]

# *** CREATE THE AGENT with the LLM, the prompt, and the tools
# Choose the `LLM` to guide the `agent`.
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

# Choose the prompt to guide the agent.
prompt = hub.pull("hwchase17/openai-functions-agent")
# pprint(prompt.messages)

# The `agent` takes input and decides actions, the `AgentExecutor` executes them
agent = create_openai_functions_agent(llm, tools, prompt)

# Combine the brain `agent` with tools in `AgentExecutor` to repeatedly call
# the agent and execute tools.
# The agent is stateless and doesn't remember prior interactions.
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


#*** RUN THE AGENT
query = "how can langsmith help with testing?"
result = agent_executor.invoke({"input": query})
# pprint(result)

#*** ADDING IN MEMORY
# To provide memory, pass in previous `chat_history`. The variable name must be
# `chat_history` due to the specific prompt used; changing the prompt allows a
# different variable name.
# Initialize `chat_history` with an empty list for the first chat message.
# To automatically track messages, wrap the process in a
# `RunnableWithMessageHistory`
message_history = ChatMessageHistory()
session_id = uuid.uuid4()
agent_with_chat_history = RunnableWithMessageHistory(
  agent_executor,
  # `SessionId` is typically required in real-world scenarios
  lambda session_id: message_history,
  input_messages_key="input",
  history_messages_key="chat_history",
)

"""
agent_with_chat_history.invoke(
  {"input": "hi! I'm bob"},
  config={"configurable": {"session_id": session_id}},
)

agent_with_chat_history.invoke(
    {"input": "what's my name?"},
    config={"configurable": {"session_id": session_id}},
)
"""


### Agent Types


#### OpenAI functions


In [None]:
from pprint import pprint
from langchain import hub
from langchain_community.tools import tavily_search
from langchain_openai import ChatOpenAI
from langchain.agents import create_openai_functions_agent, AgentExecutor
from langchain_core.messages import AIMessage, HumanMessage


#* Initialize Tools
tools = [tavily_search.TavilySearchResults(max_results=1)]

#* Create Agent
# Modifiable prompt
prompt = hub.pull("hwchase17/openai-functions-agent")
pprint(prompt.messages)

# Choose the `LLM` to drive the agent.
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")
# Construct the OpenAI Functions agent
agent = create_openai_functions_agent(llm, tools, prompt)

#* Run Agent
# Create an agent executor with the agent and tools.
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
# agent_executor.invoke({"input": "What is LangChain?"})

#* Using with chat history
agent_executor.invoke({
    "chat_history": [
        HumanMessage(content="hi! my name is bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
    ],
    "input": "what's my name?"
})

#### OpenAI tools


In [None]:
from langchain import hub
from langchain_community.tools import tavily_search
from langchain_openai import ChatOpenAI
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain_core.messages import AIMessage, HumanMessage

#* Initialize Tools
tools = [
  tavily_search.TavilySearchResults(max_results=1),
]

#* Create Agent
prompt = hub.pull("hwchase17/openai-tools-agent")
llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)
agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


#* Run Agent Using with chat history
agent_executor.invoke({
  "chat_history": [
    HumanMessage(content="hi! my name is bob"),
    AIMessage(content="Hello Bob! How can I assist you today?")
  ],
  "input": "what's my name? Don't use tools to look this up unless you NEED to",
})

#### JSON Chat Agent


In [None]:
from langchain import hub
from langchain.agents import create_json_chat_agent, AgentExecutor
from langchain_openai import ChatOpenAI
from langchain_community.tools import tavily_search
from langchain_core.messages import AIMessage, HumanMessage

#* Initialize Tools
tools = [
  tavily_search.TavilySearchResults(max_results=1),
]

#* Create Agent
prompt = hub.pull("hwchase17/react-chat-json")
llm = ChatOpenAI()
agent = create_json_chat_agent(llm, tools, prompt)
agent_executor = AgentExecutor(
  agent=agent, tools=tools, verbose=True, handle_parsing_errors=True
)

#* Run Agent Using with chat history
agent_executor.invoke({
  "chat_history": [
    HumanMessage(content="hi! my name is bob"),
    AIMessage(content="Hello Bob! How can I assist you today?")
  ],
  "input": "what's my name?",
})

#### Structured chat


In [6]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain import hub
from langchain.agents import AgentExecutor, create_structured_chat_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI

tools = [TavilySearchResults(max_results=1)]
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/structured-chat-agent")
# Choose the LLM that will drive the agent
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106")

# Construct the JSON agent
agent = create_structured_chat_agent(llm, tools, prompt)
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(
    agent=agent, tools=tools, verbose=True, handle_parsing_errors=True
)

agent_executor.invoke(
    {
        "input": "what's my name? Do not use tools unless you have to",
        "chat_history": [
            HumanMessage(content="hi! my name is bob"),
            AIMessage(content="Hello Bob! How can I assist you today?"),
        ],
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
  "action": "Final Answer",
  "action_input": "Your name is Bob."
}[0m

[1m> Finished chain.[0m


{'input': "what's my name? Do not use tools unless you have to",
 'chat_history': [HumanMessage(content='hi! my name is bob'),
  AIMessage(content='Hello Bob! How can I assist you today?')],
 'output': 'Your name is Bob.'}

#### ReAct


#### Self-ask with search

In [3]:
from langchain import hub
from langchain.tools import tavily_search
from langchain_community.llms import fireworks
from langchain.agents import create_self_ask_with_search_agent, AgentExecutor

#* Initialize Tools
tools = [
  tavily_search.TavilyAnswer(max_results=1, name="Intermediate Answer"),
]

#* Create Agent
prompt = hub.pull("hwchase17/self-ask-with-search")
llm = fireworks.Fireworks()
agent = create_self_ask_with_search_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

#* Run Agent
agent_executor.invoke({
  "input": "What is the hometown of the reigning men's U.S. Open champion?",
})


### How-to


#### Custom agent


In [4]:
from langchain_openai import ChatOpenAI
from langchain.agents import tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

"""
Creates a custom agent using OpenAI Tool Calling. 
Add memory for conversation enablement.
"""

#* Load the LLM
# Load the language model to control the agent.
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

#* Define Tools
# Python function to calculate word length. The function docstring is crucial.
@tool
def get_word_length(word: str) -> int:
  """Returns the length of a word."""
  return len(word)

tools = [get_word_length]
# get_word_length.invoke("abc")

#* Create Prompt for OpenAI Function Calling for tool usage
# Input variables: string `input` (user objective) and `agent_scratchpad` 
# (sequence of messages with agent tool invocations and outputs).
prompt = ChatPromptTemplate.from_messages([
  (
    "system",
    "You are very powerful assistant, but don't know current events",
  ),
  (
    "user",
    "{input}"
  ),
  MessagesPlaceholder(variable_name="agent_scratchpad"),
])

#* Bind tools to LLM
# `Agent` identifies available tools by calling LLMs, trained to know when to 
# use them. We format and pass tools to the agent in OpenAI tool format. 
# Binding functions ensures tools are passed each time the model is invoked.
llm_with_tools = llm.bind_tools(tools)

#* Create the Agent


#* Adding memory

3

#### Streaming


#### Structured Tools


#### Running Agent as an Iterator


#### Returning Structured Output


#### Handle parsing errors


#### Access intermediate steps


#### Cap the max number of iterations


#### Timeouts for agents


### Tools

## Chains


## More


# LangServe


# LangSmith


# LangGraph