# LangChain Cookbook 👨‍🍳👩‍🍳

In [None]:
# import os

# # mistral_api_key=os.getenv('MISTRAL_API_KEY', 'use your key')

# MISTRAL_API_KEY = '...'

# Cell 1 — config
import os
from dotenv import load_dotenv

load_dotenv()
mistral_api_key = os.getenv("MISTRAL_API_KEY", "use your key")
os.environ["MISTRAL_API_KEY"] = mistral_api_key  # make it visible to the SDK
# Cell 2 — model (no api_key arg needed)
from langchain_mistralai import ChatMistralAI
from langchain_core.messages import HumanMessage, SystemMessage

chat = ChatMistralAI(model="mistral-small-latest", temperature=0.7)
# Cell 3 — call
resp = chat.invoke([
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="What day comes after Friday?")
])
print(resp.content)


The day that comes after Friday is Saturday.


In [None]:
# # You'll be working with simple strings (that'll soon grow in complexity!)
# my_text = "What day comes after Friday?"
# my_text

# # !pip install langchain_mistralai

'What day comes after Friday?'

In [None]:
# from langchain_mistralai import ChatMistralAI
# from langchain.schema import HumanMessage, SystemMessage, AIMessage

# # Create a Mistral chat model (uses MISTRAL_API_KEY from your environment)
# chat = ChatMistralAI(model="mistral-small-latest", temperature=0.7)


Now let's create a few messages that simulate a chat experience with a bot

In [None]:
# from langchain_mistralai import ChatMistralAI
# from langchain_core.messages import HumanMessage, SystemMessage

# # Pass the key directly here 👇
# chat = ChatMistralAI(
#     model="mistral-small-latest",
#     temperature=0.7,
#     api_key="use your key"
# )

# resp = chat.invoke([
#     SystemMessage(content="You are a helpful assistant."),
#     HumanMessage(content="What day comes after Friday?")
# ])

# print(resp.content)


The day that comes after Friday is Saturday.


You can also pass more chat history w/ responses from the AI

In [2]:
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

resp = chat.invoke([
    SystemMessage(content="You are a nice AI bot that helps a user figure out where to travel in one short sentence"),
    HumanMessage(content="I like the beaches where should I go?"),
    AIMessage(content="You should go to Nice, France"),
    HumanMessage(content="What else should I do when I'm there?")
])

print(resp.content)


Explore the historic old town, visit the Musée Matisse, and take a day trip to Monaco.


You can also exclude the system message if you want

In [4]:
resp = chat.invoke("What day comes after Thursday?")
print(resp.content)


The day that comes after Thursday is **Friday**.

Here’s the sequence of days for reference:
- Monday
- Tuesday
- Wednesday
- Thursday
- **Friday**
- Saturday
- Sunday


### **Documents**
An object that holds a piece of text and metadata (more information about that text)

In [3]:
from langchain.schema import Document
Document(page_content="This is my document. It is full of text that I've gathered from other places",
         metadata={
             'my_document_id' : 234234,
             'my_document_source' : "The LangChain Papers",
             'my_document_create_time' : 1680013019
         })

Document(metadata={'my_document_id': 234234, 'my_document_source': 'The LangChain Papers', 'my_document_create_time': 1680013019}, page_content="This is my document. It is full of text that I've gathered from other places")

But you don't have to include metadata if you don't want to

In [6]:
Document(page_content="This is my document. It is full of text that I've gathered from other places")

Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={})

## Models - The interface to the AI brains

###  **Language Model**
A model that does text in ➡️ text out!

*Check out how I changed the model I was using from the default one to ada-001 (a very cheap, low performing model). See more models [here](https://platform.openai.com/docs/models)*

In [8]:
from langchain_mistralai import ChatMistralAI

# Correct: use a valid Mistral model; remove OpenAI's model_name and the stray "mistral_"
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.7)


In [11]:
from langchain_core.messages import HumanMessage
resp = llm.invoke([HumanMessage(content="What day comes after Friday?")])
print(resp.content)


The day that comes after Friday is **Saturday**.

Here’s the sequence for reference:
- Sunday
- Monday
- Tuesday
- Wednesday
- Thursday
- **Friday**
- **Saturday**
- Sunday


In [10]:
resp = llm.invoke("What day comes after Friday?")
print(resp.content)


The day that comes after Friday is **Saturday**.

Here’s the sequence of days for reference:
- Sunday
- Monday
- Tuesday
- Wednesday
- Thursday
- **Friday**
- **Saturday**
- Sunday

So, after Friday, the next day is Saturday.


### **Chat Model**
A model that takes a series of messages and returns a message output

In [13]:
from langchain_mistralai import ChatMistralAI
from langchain_core.messages import HumanMessage, SystemMessage

# Option B: assumes MISTRAL_API_KEY is in your environment
chat = ChatMistralAI(model="mistral-small-latest", temperature=1.0)

resp = chat.invoke([
    SystemMessage(content="You are an unhelpful AI bot that makes a joke at whatever the user says."),
    HumanMessage(content="I would like to go to New York, how should I do this?")
])

print(resp.content)


Sure, just find a map, close your eyes, throw a dart, and wherever it lands, start walking towards New York from there. If you end up in the ocean, don't worry, just swim with the current until you hit land again and start over. Good luck!


### Function Calling Models

[Function calling models](https://openai.com/blog/function-calling-and-other-api-updates) are similar to Chat Models but with a little extra flavor. They are fine tuned to give structured data outputs.

This comes in handy when you're making an API call to an external service or doing extraction.

In [15]:
from langchain_mistralai import ChatMistralAI
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
import json

# Define the function as a tool (this replaces functions=[...] in OpenAI)
@tool
def get_current_weather(location: str, unit: str = "celsius") -> dict:
    """Get the current weather in a given location"""
    return {
        "location": location,
        "unit": unit,
        "temperature": 23,
        "condition": "partly cloudy",
    }

# Use a valid Mistral model instead of gpt-3.5
chat = ChatMistralAI(model="mistral-large-latest", temperature=1)

# Bind the tool
chat_with_tools = chat.bind_tools([get_current_weather])

# Run the model with messages
output = chat_with_tools.invoke([
    SystemMessage(content="You are a helpful AI bot"),
    HumanMessage(content="What’s the weather like in Boston right now?")
])

print(output)


content='' additional_kwargs={'tool_calls': [{'id': 'mRY239U0c', 'function': {'name': 'get_current_weather', 'arguments': '{"location": "Boston", "unit": "celsius"}'}, 'index': 0}]} response_metadata={'token_usage': {'prompt_tokens': 100, 'total_tokens': 120, 'completion_tokens': 20}, 'model_name': 'mistral-large-latest', 'model': 'mistral-large-latest', 'finish_reason': 'tool_calls'} id='run--a479f082-43b9-4b89-8b9c-a264f74c087e-0' tool_calls=[{'name': 'get_current_weather', 'args': {'location': 'Boston', 'unit': 'celsius'}, 'id': 'mRY239U0c', 'type': 'tool_call'}] usage_metadata={'input_tokens': 100, 'output_tokens': 20, 'total_tokens': 120}


See the extra `additional_kwargs` that is passed back to us? We can take that and pass it to an external API to get data. It saves the hassle of doing output parsing.

### **Text Embedding Model**
Change your text into a vector (a series of numbers that hold the semantic 'meaning' of your text). Mainly used when comparing two pieces of text together.

*BTW: Semantic means 'relating to meaning in language or logic.'*

In [18]:
from langchain_mistralai import MistralAIEmbeddings

# Create embeddings client (uses MISTRAL_API_KEY from env)
embeddings = MistralAIEmbeddings(model="mistral-embed")

text = "Hi! It's time for the beach"

text_embedding = embeddings.embed_query(text)
print(f"Here's a sample: {text_embedding[:5]}...")
print(f"Your embedding is length {len(text_embedding)}")




Here's a sample: [-0.0302734375, 0.0300445556640625, 0.0487060546875, -0.019012451171875, 0.0269775390625]...
Your embedding is length 1024


Here's a sample: [-0.00019600906371495047, -0.0031846734422911363, -0.0007734206914647714, -0.019472001962491232, -0.015092319017854244]...
Your embedding is length 1536


## Prompts - Text generally used as instructions to your model

### **Prompt**
What you'll pass to the underlying model

In [20]:
from langchain_mistralai import ChatMistralAI

# Create the model (uses MISTRAL_API_KEY from env by default)
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.7)

# Triple-quoted prompt
prompt = """
Today is Monday, tomorrow is Wednesday.

What is wrong with that statement?
"""

resp = llm.invoke(prompt)
print(resp.content)


Alright, let's tackle this problem step by step. The statement given is:

**"Today is Monday, tomorrow is Wednesday. What is wrong with that statement?"**

At first glance, this seems straightforward, but there's something amiss here. Let's break it down to understand what's going on.

### Understanding the Statement

1. **Today is Monday**: This means the current day is Monday.
2. **Tomorrow is Wednesday**: This implies that the day after today (Monday) is Wednesday.

Now, let's think about the sequence of days in a week to see if this makes sense.

### The Sequence of Days in a Week

The standard sequence of days in a week is as follows:

- Monday
- Tuesday
- Wednesday
- Thursday
- Friday
- Saturday
- Sunday

Given that today is Monday, the next day should logically be Tuesday, followed by Wednesday.

### Analyzing the Given Statement

According to the statement:
- Today: Monday
- Tomorrow: Wednesday

But according to the standard sequence:
- Today: Monday
- Tomorrow: Tuesday
- Day a

### **Prompt Template**
An object that helps create prompts based on a combination of user input, other non-static information and a fixed template string.

Think of it as an [f-string](https://realpython.com/python-f-strings/) in python but for prompts

*Advanced: Check out LangSmithHub(https://smith.langchain.com/hub) for many more communit prompt templates*

In [21]:
from langchain_mistralai import ChatMistralAI
from langchain import PromptTemplate

# Use a valid Mistral model
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.7)

# Notice "location" below, that is a placeholder for another value later
template = """
I really want to travel to {location}. What should I do there?

Respond in one short sentence
"""

prompt = PromptTemplate(
    input_variables=["location"],
    template=template,
)

final_prompt = prompt.format(location='Rome')

print(f"Final Prompt: {final_prompt}")
print("-----------")

resp = llm.invoke(final_prompt)
print(f"LLM Output: {resp.content}")




Final Prompt: 
I really want to travel to Rome. What should I do there?

Respond in one short sentence

-----------
LLM Output: Explore ancient ruins like the Colosseum, toss a coin in the Trevi Fountain, and savor authentic pasta carbonara.


### **Example Selectors**
An easy way to select from a series of examples that allow you to dynamic place in-context information into your prompt. Often used when your task is nuanced or you have a large list of examples.

Check out different types of example selectors [here](https://python.langchain.com/docs/modules/model_io/prompts/example_selectors/)

If you want an overview on why examples are important (prompt engineering), check out [this video](https://www.youtube.com/watch?v=dOxUroR57xs)

In [23]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import Chroma
from langchain_mistralai import MistralAIEmbeddings
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_mistralai import ChatMistralAI

# Use valid Mistral model
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.7)

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Example Input: {input}\nExample Output: {output}",
)

# Examples of locations that nouns are found
examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"},
]


In [24]:
# SemanticSimilarityExampleSelector will select examples that are similar to your input by semantic meaning

example_selector = SemanticSimilarityExampleSelector.from_examples(
    # This is the list of examples available to select from.
    examples,

    # Embedding class (uses your MISTRAL_API_KEY from env by default)
    MistralAIEmbeddings(model="mistral-embed"),

    # VectorStore class that is used to store the embeddings and do a similarity search over.
    Chroma,

    # Number of examples to produce.
    k=2
)




In [25]:
from langchain.prompts import FewShotPromptTemplate

similar_prompt = FewShotPromptTemplate(
    # The object that will help select examples dynamically
    example_selector=example_selector,

    # The mini-prompt template for each example
    example_prompt=example_prompt,

    # Customizations that will be added to the top and bottom of your prompt
    prefix="Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput:",

    # What inputs your prompt will receive
    input_variables=["noun"],
)


In [26]:
# Select a noun!
my_noun = "plant"
# my_noun = "student"

# The FewShotPromptTemplate will fill in examples and format the final prompt
print(similar_prompt.format(noun=my_noun))


Give the location an item is usually found in

Example Input: bird
Example Output: nest

Example Input: tree
Example Output: ground

Input: plant
Output:


In [28]:
# Generate the few-shot prompt
prompt_text = similar_prompt.format(noun=my_noun)

# Send to the Mistral model
resp = llm.invoke(prompt_text)

print("Prompt:\n", prompt_text)
print("-----------")
print("LLM Output:", resp.content)


Prompt:
 Give the location an item is usually found in

Example Input: bird
Example Output: nest

Example Input: tree
Example Output: ground

Input: plant
Output:
-----------
LLM Output: pot

(Plants are commonly found in pots, especially when grown indoors or in gardens.)


### **Output Parsers Method 1: Prompt Instructions & String Parsing**
A helpful way to format the output of a model. Usually used for structured output. LangChain has a bunch more output parsers listed on their [documentation](https://python.langchain.com/docs/modules/model_io/output_parsers).

Two big concepts:

**1. Format Instructions** - A autogenerated prompt that tells the LLM how to format it's response based off your desired result

**2. Parser** - A method which will extract your model's text output into a desired structure (usually json)

In [29]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_mistralai import ChatMistralAI

In [30]:
from langchain_mistralai import ChatMistralAI

# Correct Mistral model, no stray args
llm = ChatMistralAI(model="mistral-small-latest", temperature=0.7)


In [31]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

# How you would like your response structured. This is basically a fancy prompt template
response_schemas = [
    ResponseSchema(
        name="bad_string", 
        description="This a poorly formatted user input string"
    ),
    ResponseSchema(
        name="good_string", 
        description="This is your response, a reformatted response"
    ),
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)


In [32]:
# See the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print(format_instructions)


The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [33]:
from langchain.prompts import PromptTemplate

template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly.

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="welcom to califonya!")

print(promptValue)



You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly.

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

% USER INPUT:
welcom to califonya!

YOUR RESPONSE:



In [34]:
# Send the formatted prompt to Mistral
resp = llm.invoke(promptValue)

# See the raw model output
print("Raw output:\n", resp.content)

# Parse into structured JSON
parsed = output_parser.parse(resp.content)
print("\nParsed output:\n", parsed)


Raw output:
 ```json
{
	"bad_string": "welcom to califonya!",
	"good_string": "Welcome to California!"
}
```

Parsed output:
 {'bad_string': 'welcom to califonya!', 'good_string': 'Welcome to California!'}


{'bad_string': 'welcom to califonya!', 'good_string': 'Welcome to California!'}

### **Output Parsers Method 2: OpenAI Fuctions**
When OpenAI released function calling, the game changed. This is recommended method when starting out.

They trained models specifically for outputing structured data. It became super easy to specify a Pydantic schema and get a structured output.

There are many ways to define your schema, I prefer using Pydantic Models because of how organized they are. Feel free to reference OpenAI's [documention](https://platform.openai.com/docs/guides/gpt/function-calling) for other methods.

In order to use this method you'll need to use a model that supports [function calling](https://openai.com/blog/function-calling-and-other-api-updates#:~:text=Developers%20can%20now%20describe%20functions%20to%20gpt%2D4%2D0613%20and%20gpt%2D3.5%2Dturbo%2D0613%2C). I'll use `gpt4-0613`

**Example 1: Simple**

Let's get started by defining a simple model for us to extract from.

In [35]:
from langchain.pydantic_v1 import BaseModel, Field
from typing import Optional

class Person(BaseModel):
    """Identifying information about a person."""

    name: str = Field(..., description="The person's name")
    age: int = Field(..., description="The person's age")
    fav_food: Optional[str] = Field(None, description="The person's favorite food")

# Example usage
example = Person(name="Alice", age=30, fav_food="Pizza")
print(example)
print(example.dict())


name='Alice' age=30 fav_food='Pizza'
{'name': 'Alice', 'age': 30, 'fav_food': 'Pizza'}


Then let's create a chain (more on this later) that will do the extracting for us

In [43]:
from langchain_mistralai import ChatMistralAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

# Use a valid Mistral model
llm = ChatMistralAI(model="mistral-small-latest", temperature=0)

# Parser that will output a Person object
parser = PydanticOutputParser(pydantic_object=Person)

prompt = PromptTemplate(
    input_variables=["text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    template="""
Extract the person's information from the text.

{format_instructions}

Respond ONLY with a valid JSON object, no extra text.

Text: {text}
""",
)


# Run the "chain"
input_text = "Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally."
prompt_value = prompt.format(text=input_text)

resp = llm.invoke(prompt_value)

# Parse into your Person model
person = parser.parse(resp.content)
print(person)


name='Sally' age=13 fav_food=None


Notice how we only have data on one person from that list? That is because we didn't specify we wanted multiple. Let's change our schema to specify that we want a list of people if possible.

In [45]:
from typing import Sequence
from langchain.pydantic_v1 import BaseModel, Field
from langchain_mistralai import ChatMistralAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

# Person stays the same as before
class Person(BaseModel):
    """Identifying information about a person."""
    name: str = Field(..., description="The person's name")
    age: int = Field(..., description="The person's age")
    fav_food: str | None = Field(None, description="The person's favorite food")

# People wraps a list/sequence of Person
class People(BaseModel):
    """Identifying information about all people in a text."""
    people: Sequence[Person] = Field(..., description="The people in the text")

# Parser
parser = PydanticOutputParser(pydantic_object=People)

# Prompt that instructs the model to output JSON matching People
prompt = PromptTemplate(
    input_variables=["text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    template="""
Extract all people and their information from the text.

{format_instructions}

Respond ONLY with valid JSON.

Text: {text}
""",
)

# Mistral model
llm = ChatMistralAI(model="mistral-small-latest", temperature=0)

# Input text
input_text = "Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally."

# Format prompt
prompt_value = prompt.format(text=input_text)

# Call model
resp = llm.invoke(prompt_value)

# Parse into People object
people = parser.parse(resp.content)
print(people)
print(people.dict())


people=[Person(name='Sally', age=13, fav_food=None), Person(name='Joey', age=12, fav_food='spinach'), Person(name='Caroline', age=23, fav_food=None)]
{'people': [{'name': 'Sally', 'age': 13, 'fav_food': None}, {'name': 'Joey', 'age': 12, 'fav_food': 'spinach'}, {'name': 'Caroline', 'age': 23, 'fav_food': None}]}


Let's do some more parsing with it

**Example 2: Enum**

Now let's parse when a product from a list is mentioned

In [47]:
import enum
from typing import Sequence
from langchain.pydantic_v1 import BaseModel, Field
from langchain_mistralai import ChatMistralAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

# Enum for product types
class Product(str, enum.Enum):
    CRM = "CRM"
    VIDEO_EDITING = "VIDEO_EDITING"
    HARDWARE = "HARDWARE"

# Pydantic model for list of products
class Products(BaseModel):
    """Identifying products that were mentioned in a text"""
    products: Sequence[Product] = Field(..., description="The products mentioned in a text")

# Parser for Products schema
parser = PydanticOutputParser(pydantic_object=Products)

# Prompt template with format instructions
prompt = PromptTemplate(
    input_variables=["text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    template="""
Extract the products mentioned in the following text.

{format_instructions}

Respond ONLY with valid JSON.

Text: {text}
""",
)

# Use Mistral model
llm = ChatMistralAI(model="mistral-small-latest", temperature=0)

# Input
input_text = "The CRM in this demo is great. Love the hardware. The microphone is also cool. Love the video editing"

# Format the prompt
prompt_value = prompt.format(text=input_text)

# Call Mistral
resp = llm.invoke(prompt_value)

# Parse output into Products object
products = parser.parse(resp.content)
print(products)
print(products.dict())


products=[<Product.CRM: 'CRM'>, <Product.HARDWARE: 'HARDWARE'>, <Product.VIDEO_EDITING: 'VIDEO_EDITING'>]
{'products': [<Product.CRM: 'CRM'>, <Product.HARDWARE: 'HARDWARE'>, <Product.VIDEO_EDITING: 'VIDEO_EDITING'>]}


## Indexes - Structuring documents to LLMs can work with them

### **Document Loaders**
Easy ways to import data from other sources. Shared functionality with [OpenAI Plugins](https://openai.com/blog/chatgpt-plugins) [specifically retrieval plugins](https://github.com/openai/chatgpt-retrieval-plugin)

See a [big list](https://python.langchain.com/en/latest/modules/indexes/document_loaders.html) of document loaders here. A bunch more on [Llama Index](https://llamahub.ai/) as well.

**HackerNews**

**Books from Gutenberg Project**

In [50]:
from langchain.document_loaders import GutenbergLoader

loader = GutenbergLoader("https://www.gutenberg.org/cache/epub/2148/pg2148.txt")

data = loader.load()

In [51]:
print(data[0].page_content[1855:1984])

o.—_Seneca_.





      At Paris, just after dark one gusty evening in the autumn of 18-,


      I was enjoying the twofold l


**URLs and webpages**

Let's try it out with [Paul Graham's website](http://www.paulgraham.com/)

In [53]:
from langchain.document_loaders import UnstructuredURLLoader

urls = [
    "http://www.paulgraham.com/",
]

loader = UnstructuredURLLoader(urls=urls)

data = loader.load()

data[0].page_content

'New:  \n\n Good Writing  |\n Founder Mode \n \n \n \n \n \n Want to start a startup?  Get funded by  Y Combinator .\n \n \n \n\n \n\n \n \n \n© mmxxv pg'

### **Text Splitters**
Often times your document is too long (like a book) for your LLM. You need to split it up into chunks. Text splitters help with this.

There are many ways you could split your text into chunks, experiment with [different ones](https://python.langchain.com/en/latest/modules/indexes/text_splitters.html) to see which is best for you.

In [54]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [55]:
# This is a long document we can split up.
with open('data/PaulGrahamEssays/worked.txt') as f:
    pg_work = f.read()

print (f"You have {len([pg_work])} document")

FileNotFoundError: [Errno 2] No such file or directory: 'data/PaulGrahamEssays/worked.txt'

In [57]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 150,
    chunk_overlap  = 20,
)

texts = text_splitter.create_documents([pg_work])

NameError: name 'pg_work' is not defined

In [None]:
print (f"You have {len(texts)} documents")

You have 610 documents


In [None]:
print ("Preview:")
print (texts[0].page_content, "\n")
print (texts[1].page_content)

Preview:
February 2021Before college the two main things I worked on, outside of school,
were writing and programming. I didn't write essays. I wrote what 

beginning writers were supposed to write then, and probably still
are: short stories. My stories were awful. They had hardly any plot,


There are a ton of different ways to do text splitting and it really depends on your retrieval strategy and application design. Check out more splitters [here](https://python.langchain.com/docs/modules/data_connection/document_transformers/)

### **Retrievers**
Easy way to combine documents with language models.

There are many different types of retrievers, the most widely supported is the VectoreStoreRetriever

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_mistralai import MistralAIEmbeddings

loader = TextLoader('data/PaulGrahamEssays/worked.txt')
documents = loader.load()

In [None]:
# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Get embedding engine ready
embeddings = MistralAIEmbeddings(mistral_)

# Embedd your texts
db = FAISS.from_documents(texts, embeddings)

In [None]:
# Init your retriever. Asking for just 1 document back
retriever = db.as_retriever()

In [None]:
retriever

VectorStoreRetriever(tags=['FAISS'], vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x7f8389169070>)

In [None]:
docs = retriever.get_relevant_documents("what types of things did the author want to build?")

In [None]:
print("\n\n".join([x.page_content[:200] for x in docs[:2]]))

standards; what was the point? No one else wanted one either, so
off they went. That was what happened to systems work.I wanted not just to build things, but to build things that would
last.In this di

much of it in grad school.Computer Science is an uneasy alliance between two halves, theory
and systems. The theory people prove things, and the systems people
build things. I wanted to build things. 


### **VectorStores**
Databases to store vectors. Most popular ones are [Pinecone](https://www.pinecone.io/) & [Weaviate](https://weaviate.io/). More examples on OpenAIs [retriever documentation](https://github.com/openai/chatgpt-retrieval-plugin#choosing-a-vector-database). [Chroma](https://www.trychroma.com/) & [FAISS](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) are easy to work with locally.

Conceptually, think of them as tables w/ a column for embeddings (vectors) and a column for metadata.

Example

| Embedding      | Metadata |
| ----------- | ----------- |
| [-0.00015641732898075134, -0.003165106289088726, ...]      | {'date' : '1/2/23}       |
| [-0.00035465431654651654, 1.4654131651654516546, ...]   | {'date' : '1/3/23}        |

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_mistralai import MistralAIEmbeddings

# Load a text file into documents
loader = TextLoader("data/PaulGrahamEssays/worked.txt")
documents = loader.load()

# Prepare a text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into smaller chunks
texts = text_splitter.split_documents(documents)

# Create Mistral embeddings (uses MISTRAL_API_KEY from your environment by default)
embeddings = MistralAIEmbeddings(model="mistral-embed")

# Store in FAISS
db = FAISS.from_documents(texts, embeddings)

print(f"Loaded {len(texts)} chunks into FAISS")


In [None]:
print (f"You have {len(texts)} documents")

You have 78 documents


In [None]:
embedding_list = embeddings.embed_documents([text.page_content for text in texts])

In [None]:
print (f"You have {len(embedding_list)} embeddings")
print (f"Here's a sample of one: {embedding_list[0][:3]}...")

You have 78 embeddings
Here's a sample of one: [-0.001058628615053026, -0.01118234211553424, -0.012874804746266883]...


Your vectorstore store your embeddings (☝️) and make them easily searchable

## Memory
Helping LLMs remember information.

Memory is a bit of a loose term. It could be as simple as remembering information you've chatted about in the past or more complicated information retrieval.

We'll keep it towards the Chat Message use case. This would be used for chat bots.

There are many types of memory, explore [the documentation](https://python.langchain.com/en/latest/modules/memory/how_to_guides.html) to see which one fits your use case.

### Chat Message History

In [61]:
from langchain.memory import ChatMessageHistory
from langchain_mistralai import ChatMistralAI
from langchain_core.messages import HumanMessage, AIMessage  # <- use these

# Create the chat model (uses MISTRAL_API_KEY from env)
chat = ChatMistralAI(model="mistral-small-latest", temperature=0)

# Initialize message history
history = ChatMessageHistory()

# Add messages
history.add_ai_message("hi!")
history.add_user_message("what is the capital of france?")

# Convert history messages into the right type
messages = [
    AIMessage(content=m.content) if isinstance(m, AIMessage) else HumanMessage(content=m.content)
    for m in history.messages
]

# Get AI response
ai_response = chat.invoke(messages)
print(ai_response)

# Add AI response back to history
history.add_ai_message(ai_response.content)

print(history.messages)


content='Hello! The capital of France is **Paris**. 🇫🇷\n\nWould you like to know more about Paris or France? 😊' additional_kwargs={} response_metadata={'token_usage': {'prompt_tokens': 12, 'total_tokens': 45, 'completion_tokens': 33}, 'model_name': 'mistral-small-latest', 'model': 'mistral-small-latest', 'finish_reason': 'stop'} id='run--3e462706-d17f-4fd5-b293-f5dde877e6a0-0' usage_metadata={'input_tokens': 12, 'output_tokens': 33, 'total_tokens': 45}
[AIMessage(content='hi!', additional_kwargs={}, example=False), HumanMessage(content='what is the capital of france?', additional_kwargs={}, example=False), AIMessage(content='Hello! The capital of France is **Paris**. 🇫🇷\n\nWould you like to know more about Paris or France? 😊', additional_kwargs={}, example=False)]


## Agents 🤖🤖

Official LangChain Documentation describes agents perfectly (emphasis mine):
> Some applications will require not just a predetermined chain of calls to LLMs/other tools, but potentially an **unknown chain** that depends on the user's input. In these types of chains, there is a “agent” which has access to a suite of tools. Depending on the user input, the agent can then **decide which, if any, of these tools to call**.


Basically you use the LLM not just for text output, but also for decision making. The coolness and power of this functionality can't be overstated enough.

Sam Altman emphasizes that the LLMs are good '[reasoning engine](https://www.youtube.com/watch?v=L_Guz73e6fw&t=867s)'. Agent take advantage of this.

### Agents

The language model that drives decision making.

More specifically, an agent takes in an input and returns a response corresponding to an action to take along with an action input. You can see different types of agents (which are better for different use cases) [here](https://python.langchain.com/en/latest/modules/agents/agents/agent_types.html).

### Tools

A 'capability' of an agent. This is an abstraction on top of a function that makes it easy for LLMs (and agents) to interact with it. Ex: Google search.

This area shares commonalities with [OpenAI plugins](https://platform.openai.com/docs/plugins/introduction).

### Toolkit

Groups of tools that your agent can select from

Let's bring them all together:

In [None]:
import os
import json
from langchain.agents import load_tools, initialize_agent
from langchain_mistralai import ChatMistralAI

# Create the Mistral model (uses MISTRAL_API_KEY from env)
llm = ChatMistralAI(model="mistral-small-latest", temperature=0)

# Get SERP API key (from env or fallback)
serpapi_api_key = os.getenv("SERP_API_KEY", "YourSerpApiKey") # need the key


# Load the search tool
toolkit = load_tools(["serpapi"], llm=llm, serpapi_api_key=serpapi_api_key)

# Initialize the agent
agent = initialize_agent(
    tools=toolkit,
    llm=llm,
    agent="zero-shot-react-description",
    verbose=True,
    return_intermediate_steps=True,
)

# Run the agent
response = agent({"input": "what was the first album of the band that Natalie Bergman is a part of?"})

print(json.dumps(response, indent=2))


ValidationError: 1 validation error for SerpAPIWrapper
__root__
  Could not import serpapi python package. Please install it with `pip install google-search-results`. (type=value_error)

![Wild Belle](https://github.com/gkamradt/langchain-tutorials/blob/main/data/WildBelle1.png?raw=1)

🎵Enjoy🎵
https://open.spotify.com/track/1eREJIBdqeCcqNCB1pbz7w?si=c014293b63c7478c