# Introduction to LangChain

- https://console.mistral.ai/
- https://www.langchain.com/



In [None]:
# Get the API key here and add it to the secrets (left).
from google.colab import userdata
api_key = userdata.get("mistralapikey")

In [None]:
!pip install -U langchain-core langchain-mistralai langchain-community langchain-chroma

Collecting langchain-core
  Downloading langchain_core-0.3.15-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-mistralai
  Downloading langchain_mistralai-0.2.1-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.5-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-chroma
  Downloading langchain_chroma-0.1.4-py3-none-any.whl.metadata (1.6 kB)
Collecting httpx-sse<1,>=0.3.1 (from langchain-mistralai)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting SQLAlchemy<2.0.36,>=1.4 (from langchain-community)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting langchain<0.4.0,>=0.3.6 (from langchain-community)
  Downloading langchain-0.3.7-py3-none-any.whl.metadata (7.1 kB)
Collecting pydantic-settings<3.0

# Imports.


In [None]:
import json
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_mistralai.chat_models import ChatMistralAI

from langchain_core.globals import set_verbose, set_debug
set_verbose(False)
set_debug(False)

import logging
logging.getLogger().setLevel(logging.ERROR)

import warnings
warnings.filterwarnings("ignore")

## Getting started.

- https://docs.mistral.ai/getting-started/models/

In [None]:
# bp open source
"""
import huggingface_hub
huggingface_hub.login
"""

'\nimport huggingface_hub\nhuggingface_hub.login\n'

In [None]:
# bp open source
"""
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace

llm = HuggingFaceEndpoint(repo_id="HuggingFaceH4/zephyr-7b-beta")

chat_model = ChatHuggingFace(llm=llm)
"""

'\nfrom langchain_community.llms import HuggingFaceEndpoint\nfrom langchain_community.chat_models.huggingface import ChatHuggingFace\n\nllm = HuggingFaceEndpoint(repo_id="HuggingFaceH4/zephyr-7b-beta")\n\nchat_model = ChatHuggingFace(llm=llm)\n'

In [None]:
llm = ChatMistralAI(
    api_key=api_key,
    model="mistral-medium-latest"
)
"""
messages = [
    SystemMessage(
        content="You are a friendly AI assistant that speaks English but sometimes uses German words."
    ),
    HumanMessage(
        content="Write a poem about love."
    )
]
result = llm.invoke(messages)
print(result.content)"""

'\nmessages = [\n    SystemMessage(\n        content="You are a friendly AI assistant that speaks English but sometimes uses German words."\n    ),\n    HumanMessage(\n        content="Write a poem about love."\n    )\n]\nresult = llm.invoke(messages)\nprint(result.content)'

In [None]:
#print(json.dumps(result.response_metadata, indent=4))

## Use streaming.

In [None]:
#async for chunk in llm.astream(messages):
#    print(chunk.content, end="", flush=True)

## Translation

In [None]:
"""
messages = [
    SystemMessage(
        content=""
            "You are a friendly AI assistant."
            " Your specialty are great translations. Answer with the translation first. And then explain it in detail. Explanation as a bulleted list please. Use HTML tags."
    ),
    HumanMessage(
        content=""
            "L'homme est libre au moment qu'il veut l'être."
        )
]
result = llm.invoke(messages)
print(result.content)
"""

'\nmessages = [\n    SystemMessage(\n        content=""\n            "You are a friendly AI assistant."\n            " Your specialty are great translations. Answer with the translation first. And then explain it in detail. Explanation as a bulleted list please. Use HTML tags."\n    ),\n    HumanMessage(\n        content=""\n            "L\'homme est libre au moment qu\'il veut l\'être."\n        )\n]\nresult = llm.invoke(messages)\nprint(result.content)\n'

In [None]:
#print(result.content)

## Chain example: Parsing.

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
"""
result = llm.invoke(messages)
print(result)
parsed_result = parser.invoke(result)
print(parsed_result)
"""

'\nresult = llm.invoke(messages)\nprint(result)\nparsed_result = parser.invoke(result)\nprint(parsed_result)\n'

In [None]:
#chain = llm | parser
#chain.invoke(messages)

## Summarization.

https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/

In [None]:
# Get a file.
#!wget https://raw.githubusercontent.com/vilmibm/lovecraftcorpus/master/ulthar.txt

In [None]:
"""from langchain_community.document_loaders import TextLoader

loader = TextLoader("ulthar.txt")
documents = loader.load()

first_document_content = documents[0].page_content

summary_prompt = f"Please summarize the following document:\n\n{first_document_content}"
messages = [
    SystemMessage(
        content="You are a friendly AI assistant that speaks English."
                "You write really good summaries."
                "You sometimes use bulleted lists but not all the time."
    ),
    HumanMessage(content=summary_prompt)
]

chain = llm | parser
summary = chain.invoke(messages)
summary
"""

'from langchain_community.document_loaders import TextLoader\n\nloader = TextLoader("ulthar.txt")\ndocuments = loader.load()\n\nfirst_document_content = documents[0].page_content\n\nsummary_prompt = f"Please summarize the following document:\n\n{first_document_content}"\nmessages = [\n    SystemMessage(\n        content="You are a friendly AI assistant that speaks English."\n                "You write really good summaries."\n                "You sometimes use bulleted lists but not all the time."\n    ),\n    HumanMessage(content=summary_prompt)\n]\n\nchain = llm | parser\nsummary = chain.invoke(messages)\nsummary\n'

In [None]:
# Downloads hotel_reviews.csv
!gdown https://drive.google.com/uc?id=1vDVuBi6UnfkkxiTrpNzwEodiNZ6LYaVf # I put it to my gdrive and share to everyone
#https://drive.google.com/file/d/1vDVuBi6UnfkkxiTrpNzwEodiNZ6LYaVf/view?usp=sharing
#!gdown https://drive.google.com/file/d/1vDVuBi6UnfkkxiTrpNzwEodiNZ6LYaVf/view?usp=sharing

Downloading...
From (original): https://drive.google.com/uc?id=1vDVuBi6UnfkkxiTrpNzwEodiNZ6LYaVf
From (redirected): https://drive.google.com/uc?id=1vDVuBi6UnfkkxiTrpNzwEodiNZ6LYaVf&confirm=t&uuid=9fbe04b4-7d6e-49a9-9f2c-cc3b62bce61f
To: /content/Hotel_Reviews.csv
100% 238M/238M [00:04<00:00, 50.1MB/s]


In [None]:
# Get a file.
import pandas as pd
reviews_df = pd.read_csv("Hotel_Reviews.csv")
reviews_df.tail(50)

Unnamed: 0,Hotel_Address,Additional_Number_of_Scoring,Review_Date,Average_Score,Hotel_Name,Reviewer_Nationality,Negative_Review,Review_Total_Negative_Word_Counts,Total_Number_of_Reviews,Positive_Review,Review_Total_Positive_Word_Counts,Total_Number_of_Reviews_Reviewer_Has_Given,Reviewer_Score,Tags,days_since_review,lat,lng
515688,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/11/2016,8.1,Atlantis Hotel Vienna,Romania,Parking to expensive and so is the breakfast,9,2823,No Positive,0,2,7.5,"[' Couple ', ' Standard Double or Twin Room ',...",388 day,48.203745,16.335677
515689,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/7/2016,8.1,Atlantis Hotel Vienna,Saudi Arabia,The net was slow The ac was not cooling,10,2823,The breakfast was excellent The staff were ve...,12,1,9.2,"[' Leisure trip ', ' Family with young childre...",392 day,48.203745,16.335677
515690,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/7/2016,8.1,Atlantis Hotel Vienna,Kuwait,Windows shades hotel design and location,7,2823,Staff and cleanliness,4,1,6.7,"[' Leisure trip ', ' Couple ', ' Standard Doub...",392 day,48.203745,16.335677
515691,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/5/2016,8.1,Atlantis Hotel Vienna,United Kingdom,No Negative,0,2823,Good location,3,5,10.0,"[' Leisure trip ', ' Solo traveler ', ' Superi...",394 day,48.203745,16.335677
515692,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/5/2016,8.1,Atlantis Hotel Vienna,Croatia,Parking in front of reception for guest check...,16,2823,Peace frendly staff nice breakfast location,7,9,9.2,"[' Business trip ', ' Solo traveler ', ' Super...",394 day,48.203745,16.335677
515693,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/4/2016,8.1,Atlantis Hotel Vienna,United Arab Emirates,Since there is no room service they should pr...,18,2823,Breakfast spread was good,5,7,5.0,"[' Leisure trip ', ' Couple ', ' Standard Doub...",395 day,48.203745,16.335677
515694,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,7/1/2016,8.1,Atlantis Hotel Vienna,Lithuania,No Negative,0,2823,Rooms are very clean and comfortabile,7,4,10.0,"[' Leisure trip ', ' Group ', ' Standard Tripl...",398 day,48.203745,16.335677
515695,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,6/27/2016,8.1,Atlantis Hotel Vienna,South Korea,dust towel,3,2823,No Positive,0,2,7.1,"[' Leisure trip ', ' Solo traveler ', ' Standa...",402 day,48.203745,16.335677
515696,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,6/23/2016,8.1,Atlantis Hotel Vienna,United States of America,Street noise was loud Rooms were advertised a...,12,2823,Easy bus stop to get to downtown Vienna 48A,11,16,7.1,"[' Leisure trip ', ' Couple ', ' Standard Doub...",406 day,48.203745,16.335677
515697,Wurzbachgasse 21 15 Rudolfsheim F nfhaus 1150 ...,168,6/13/2016,8.1,Atlantis Hotel Vienna,Greece,room could be more clean in the room could ha...,12,2823,nice room comfortable and quiet,6,4,6.7,"[' Leisure trip ', ' Couple ', ' Standard Doub...",416 day,48.203745,16.335677


In [None]:
import pandas as pd

# read data
#reviews_df = pd.read_csv("../Data/Hotel_Reviews.csv")
# append the positive and negative text reviews
reviews_df["review"] = reviews_df["Negative_Review"] + reviews_df["Positive_Review"]
# create the label
reviews_df["is_bad_review"] = reviews_df["Reviewer_Score"].apply(lambda x: 1 if x < 5 else 0)
# select only relevant columns
reviews_df = reviews_df[["Hotel_Name","review", "is_bad_review"]]
reviews_df.head()

Unnamed: 0,Hotel_Name,review,is_bad_review
0,Hotel Arena,I am so angry that i made this post available...,1
1,Hotel Arena,No Negative No real complaints the hotel was g...,0
2,Hotel Arena,Rooms are nice but for elderly a bit difficul...,0
3,Hotel Arena,My room was dirty and I was afraid to walk ba...,1
4,Hotel Arena,You When I booked with your company on line y...,0


In [None]:
reviews_hotel = reviews_df["Hotel_Name"].unique()
reviews_hotel
hotel_ind = 200
print(len(reviews_hotel))
print(reviews_hotel[hotel_ind])

1492
Novotel Paris Centre Gare Montparnasse


In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("Hotel_Reviews.csv")
#hotel = "Hotel Arena"
#hotel = "Atlantis Hotel Vienna"
hotel = "Novotel Paris Centre Gare Montparnasse"
documents = loader.load()

first_document_content = documents[0].page_content

#summary_prompt = f"Please summarize the following document:\n\n{first_document_content}. Take only the rows of the dataset with 'Average_Score' less than 0.5 and 'days_since_review' less than 90 days. For each 'Hotel_Name' summarize text of 'Negative_Review' and return a new dataframe with 2 columns 'Hotel_Name' and 'Summarized_Negative_Review'. Ignore 'No Negative' value from the 'Negative_Review' when doing summarization." # Exceeds limit
#summary_prompt = f"Please summarize the following dataframe:\n\n{reviews_df}. Take only the rows of the dataset with 'is_bad_review' equals to 1. For each 'Hotel_Name' summarize text of 'review' and return a new dataframe with 2 columns 'Hotel_Name' and 'Summarized_Negative_Review'. Ignore 'No Negative' value from the 'Negative_Review' when doing summarization." # works
#summary_prompt = f"Please summarize the following dataframe:\n\n{reviews_df}. Take only the rows of the dataset with 'is_bad_review' equals to 1. For each 'Hotel_Name' summarize text of 'review' and return a table with 2 columns 'Hotel_Name' and 'Summarized_Negative_Review'. Ignore 'No Negative' value from the 'Negative_Review' when doing summarization." # ReadTimeout: The read operation timed out
#summary_prompt = f"Please summarize the following pandas dataframe:\n\n{reviews_df}. Take only the rows of the dataset with 'is_bad_review' equals to 1. For each 'Hotel_Name' summarize text of 'review' and return a pandas dataframe named reviews_df_summarized with 2 columns 'Hotel_Name' and 'Summarized_Negative_Review'. Ignore 'No Negative' value from the 'Negative_Review' when doing summarization. Save the dataframe as 'hotel_reviews_summarized.csv'" # gives the code
#summary_prompt = f"Please summarize the following dataframe:\n\n{reviews_df}. Take only the rows of the dataset with 'is_bad_review' equals to 1. For each 'Hotel_Name' summarize text of 'review' and visualize the results as a table with 2 columns 'Hotel_Name' and 'Summarized_Negative_Review'. Ignore 'No Negative' value from the 'Negative_Review' when doing summarization."
#summary_prompt = f"Please summarize the following dataframe:\n\n{reviews_df}. Take only the rows of the dataset with 'is_bad_review' equals to 1. For the hotel 'Hotel_Name' is equal to {hotel} summarize text of 'review'. Ignore the value 'No Negative' in the 'review when doing summarization."
#summary_prompt = f"From the following dataframe: {reviews_df} for the column 'Hotel_Name' is equal to {hotel} please summarize the text of the column 'review' using bullets and very concise sentences (5 words per bullet maximum). Take only the rows of the dataset with 'is_bad_review' equals to 1. Ignore the value 'No Negative' in the 'review when doing summarization. Output only the summarized reviews in bullets and nothing else."
#summary_prompt = f"From the following dataframe: {reviews_df} for the column 'Hotel_Name' is equal to {hotel} please summarize the text of the column 'review' using bullets and very concise sentences (10 words per bullet maximum). Take only the rows of the dataset with 'is_bad_review' equals to 1. Ignore the value 'No Negative' in the 'review when doing summarization. Visualize the output as bullets of the summarized reviews. Please do not write any note or explanation."
summary_prompt = f"From the following dataframe: {reviews_df} for the column 'Hotel_Name' is equal to {hotel} please summarize the text of the column 'review' using maximum 3 very concise sentences (10 words per sentence maximum). Take only the rows of the dataset with 'is_bad_review' equals to 1. Ignore the value 'No Negative' in the 'review when doing summarization. Visualize the output as sentences of the summarized reviews. Please do not write any note or explanation."




messages = [
    SystemMessage(
        content="You are a friendly AI assistant that speaks English."
                "You write really good summaries."
                #"You sometimes use bulleted lists but not all the time."
    ),
    HumanMessage(content=summary_prompt)
]

chain = llm | parser
summary = chain.invoke(messages)
summary

'Rooms often dirty or unclean.\nElevator issues, long wait times.\nPoor staff communication and service.'

## Advanced summarization.

In [None]:
from langchain.chains.summarize import load_summarize_chain

chain = load_summarize_chain(llm, chain_type="map_reduce")
summary = chain.invoke(documents)["output_text"]
summary

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

'"The Cats of Ulthar" is a short story by H.P. Lovecraft about a village, Ulthar, where cats are revered. An elderly couple, notorious for harming cats, vanish and are found as mere skeletons with unusual beetles. Their disappearance coincides with the arrival of a group of mysterious travelers who pray to their deities when a kitten goes missing. The villagers attribute the couple\'s demise to the vengeful cats, resulting in a law that protects cats in Ulthar. The story touches on themes of justice, supernatural powers, and the bond between humans and animals.'

In [None]:
chain = load_summarize_chain(llm, chain_type="stuff")
summary = chain.invoke(documents)["output_text"]
summary

"In the town of Ulthar, cats are considered sacred and their killing is strictly forbidden. This originated from an old tale about an evil couple who trapped and killed cats until one day, a caravan of dark travelers arrived in town with a small boy named Menes who had a black kitten. When the couple killed Menes' kitten, the boy prayed to the sky, and all the cats in Ulthar disappeared that night. The next morning, every cat returned to their home, but the evil couple had vanished, leaving only their cleanly picked skeletons behind. The villagers suspected that the cats had taken revenge, leading to the passing of the law protecting cats in Ulthar."

In [None]:
chain = load_summarize_chain(llm, chain_type="refine")
summary = chain.invoke(documents)["output_text"]
summary

"In the village of Ulthar, beyond the river Skai, there is a law that no man may kill a cat. This law came about due to the actions of an old couple who trapped and killed their neighbors' cats. A caravan of dark wanderers arrived in Ulthar, and a young boy named Menes in the caravan had a black kitten that went missing. The villagers suspected the old couple, and that night, all the cats of Ulthar disappeared. The next morning, the wanderers left and all the cats returned, looking well-fed and content. A week later, the old couple was found dead in their cottage, their bones picked clean. The villagers passed a law that no man may kill a cat in Ulthar."

## Structured output.

In [None]:
import json
from langchain_core.output_parsers import JsonOutputParser

messages = [
    SystemMessage(
        content="You are a friendly AI assistant that speaks English."
                "Your specialty is extracting structured output in JSON."
    ),
    HumanMessage(
        content=f"List all the characters and what you know about them as JSON:\n\n{first_document_content}"
    )
]

chain = llm | JsonOutputParser()
structured_output = chain.invoke(messages)
print(json.dumps(structured_output, indent=4))

{
    "characters": [
        {
            "name": "The cat",
            "description": "The cat is cryptic and close to strange things which men cannot see. It is the soul of ancient Egypt and bearer of tales from forgotten cities in Meroe and Ophir. It is the kin of the jungle's lords and heir to the secrets of hoary and sinister Africa. The Sphinx is its cousin, and it speaks her language; but it is more ancient than the Sphinx and remembers that which she hath forgotten."
        },
        {
            "name": "The old cotter and his wife",
            "description": "The old cotter and his wife are inhabitants of Ulthar who delight in trapping and slaying their neighbors' cats. The villagers fear them because of their habitual expression and the peculiar sounds heard after dark from their cottage."
        },
        {
            "name": "The villagers of Ulthar",
            "description": "The villagers of Ulthar are simple people who know not where cats first came from. Th

## Advanced structured output with Pydantic

- https://docs.pydantic.dev/latest/

In [None]:
from typing import List
from pydantic import BaseModel
from langchain.chains import create_extraction_chain_pydantic


class Person(BaseModel):
    first_name: str
    last_name: str
    known_facts: str

class PersonGroup(BaseModel):
    persons: List[Person]

llm_small = ChatMistralAI(
    api_key=api_key,
    model="mistral-small-latest" # Medium does not have function calling.
)

chain = llm_small.with_structured_output(PersonGroup)
structured_output = chain.invoke(summary_prompt)
print(structured_output)
print()
print(structured_output.model_dump_json(indent=4))

persons=[Person(first_name='Menes', last_name='', known_facts='little boy, no parents, owns a black kitten, prays to bring back his kitten')]

{
    "persons": [
        {
            "first_name": "Menes",
            "last_name": "",
            "known_facts": "little boy, no parents, owns a black kitten, prays to bring back his kitten"
        }
    ]
}


## Classification.

In [None]:
from pydantic import BaseModel, Field


class Classification(BaseModel):
    sentiment: str = Field(
        ...,
        description="describes the sentiment of the statement",
        enum=["negative", "neutral", "positive"]
    )
    aggressiveness: int = Field(
        ...,
        description="describes how aggressive the statement is, the higher the number the more aggressive",
        enum=[0, 1, 2, 3],
    )
    language: str = Field(
        ...,
        description="describes the language of the statement",
        enum=["english", "french", "german", "other"]
    )

statements = [
    "I absolutely love this new restaurant! The food is amazing, and the service is top-notch.",
    "Le service client ici est terrible, et je ne reviendrai jamais.",
    "Ich bin gleichgültig gegenüber der neuen Politik; sie betrifft mich nicht wirklich.",
    "Your recent actions were completely unacceptable, and they have consequences.",
    "Quel beau jour ! Je me sens si heureux et en paix.",
    "Die Art und Weise, wie Sie die Situation gehandhabt haben, war sehr enttäuschend und unprofessionell.",
    "Creo que la presentación estuvo bien, pero podría mejorar.",
    "You have no right to speak to me that way! It's utterly disrespectful.",
    "Ce livre est très intéressant, et j'ai beaucoup aimé le lire.",
    "Ihre Bemühungen bei dem Projekt waren bestenfalls mittelmäßig, und wir müssen das besprechen.",
    "Hab SoSlI' Quch!"
]

for statement in statements:
    chain = llm_small.with_structured_output(Classification)
    structured_output = chain.invoke(statement)
    print(statement)
    print(structured_output.dict())
    print("")

I absolutely love this new restaurant! The food is amazing, and the service is top-notch.
{'sentiment': 'positive', 'aggressiveness': 0, 'language': 'english'}

Le service client ici est terrible, et je ne reviendrai jamais.
{'sentiment': 'negative', 'aggressiveness': 2, 'language': 'french'}

Ich bin gleichgültig gegenüber der neuen Politik; sie betrifft mich nicht wirklich.
{'sentiment': 'neutral', 'aggressiveness': 0, 'language': 'german'}

Your recent actions were completely unacceptable, and they have consequences.
{'sentiment': 'negative', 'aggressiveness': 2, 'language': 'english'}

Quel beau jour ! Je me sens si heureux et en paix.
{'sentiment': 'positive', 'aggressiveness': 0, 'language': 'french'}

Die Art und Weise, wie Sie die Situation gehandhabt haben, war sehr enttäuschend und unprofessionell.
{'sentiment': 'negative', 'aggressiveness': 2, 'language': 'german'}

Creo que la presentación estuvo bien, pero podría mejorar.
{'sentiment': 'neutral', 'aggressiveness': 0, 'lang

## Tool use.

In [None]:
from langchain_core.tools import tool

@tool
def sum_tool(numbers:list) -> int:
    """Sum up numbers."""
    return sum(numbers)

print(sum_tool.name)
print(sum_tool.description)
print(sum_tool.args)

numbers = [42, 308423, 666, 1000000, 1729, -1245, 768]
numbers_string = ", ".join(str(n) for n in numbers)

# Sanity.
print("Expected:", sum(numbers))
print("")

# Create the prompts.
system_prompt = f"You are a friendly AI assistant that speaks English. You are good at math."
sum_prompt = f"Please sum up the following numbers: {numbers_string}."
messages = [
    SystemMessage(content=sum_prompt),
    HumanMessage(content=numbers_string)
]

# Without tools.
print("Without tools:")
print(llm_small.invoke(messages))
print("")

# With tools.
print("With tools:")
llm_small_with_tools = llm_small.bind_tools([sum_tool])
print(llm_small_with_tools.invoke(messages))
chain = llm_small_with_tools | (lambda x: x.tool_calls[0]["args"]) | sum_tool
print(chain.invoke(messages))

sum_tool
Sum up numbers.
{'numbers': {'items': {}, 'title': 'Numbers', 'type': 'array'}}
Expected: 1310383

Without tools:
content='To sum up the numbers 42, 308423, 666, 1000000, 1729, -1245, and 768, you can simply add them together:\n\n42 + 308423 + 666 + 1000000 + 1729 - 1245 + 768 = 1309773\n\nSo, the sum of these numbers is 1,309,773.' additional_kwargs={} response_metadata={'token_usage': {'prompt_tokens': 96, 'total_tokens': 225, 'completion_tokens': 129}, 'model': 'mistral-small-latest', 'finish_reason': 'stop'} id='run-31d50469-b363-416c-9423-be2ce24a7b73-0' usage_metadata={'input_tokens': 96, 'output_tokens': 129, 'total_tokens': 225}

With tools:
content='' additional_kwargs={'tool_calls': [{'id': 'i3AxGIyAI', 'type': 'function', 'function': {'name': 'sum_tool', 'arguments': '{"numbers": [42, 308423, 666, 1000000, 1729, -1245, 768]}'}}]} response_metadata={'token_usage': {'prompt_tokens': 160, 'total_tokens': 222, 'completion_tokens': 62}, 'model': 'mistral-small-latest', '

## Loading PDFs.


In [None]:
!wget https://www.pileface.com/sollers/pdf/Zarathustra.pdf
!pip install pypdf

--2024-10-24 14:37:47--  https://www.pileface.com/sollers/pdf/Zarathustra.pdf
Resolving www.pileface.com (www.pileface.com)... 46.105.204.11, 2001:41d0:1:1b00:213:186:33:40
Connecting to www.pileface.com (www.pileface.com)|46.105.204.11|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 683432 (667K) [application/pdf]
Saving to: ‘Zarathustra.pdf’


2024-10-24 14:37:48 (1.68 MB/s) - ‘Zarathustra.pdf’ saved [683432/683432]

Collecting pypdf
  Downloading pypdf-5.0.1-py3-none-any.whl.metadata (7.4 kB)
Downloading pypdf-5.0.1-py3-none-any.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.5/294.5 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.0.1


In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Zarathustra.pdf")
pages = loader.load_and_split()

for page in pages[:2]:
    print(page.page_content)
    print("")

1Friedrich Nietzsche 
 
Also sprach Zarathustra 
 
Ein Buch für Alle und Keinen 
 
   Inhaltsverzeichnis    Erster Theil       Zarathustra's Vorrede     Die Reden Zarathustra's       Von den drei Verwandlungen       Von den Lehrstühlen der Tugend       Von den Hinterweltlern       Von den Verächtern des Leibes       Von den Freuden- und Leidenschaften       Vom bleichen Verbrecher       Vom Lesen und Schreiben       Vom Baum am Berge       Von den Predigern des Todes       Vom Krieg und Kriegsvolke       Vom neuen Götzen       Von den Fliegen des Marktes       Von der Keuschheit       Vom Freunde       Von tausend und Einem Ziele       Von der Nächstenliebe       Vom Wege des Schaffenden       Von alten und jungen Weiblein       Vom Biss der Natter       Von Kind und Ehe       Vom freien Tode       Von der schenkenden Tugend   Zweiter Theil       Das Kind mit dem Spiegel       Auf den glückseligen Inseln       Von den Mitleidigen       Von den Priestern       Von den Tugendhaften      

## Web Loader

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://de.wikipedia.org/wiki/Heilbronn")

pages = loader.load_and_split()

for page in pages[:2]:
    print(page.page_content)
    print("")

Heilbronn – Wikipedia












































Heilbronn

aus Wikipedia, der freien Enzyklopädie



Zur Navigation springen
Zur Suche springen



Der Titel dieses Artikels ist mehrdeutig. Weitere Bedeutungen sind unter Heilbronn (Begriffsklärung) aufgeführt.



Wappen

Deutschlandkarte











Basisdaten


Koordinaten:

49° 9′ N, 9° 13′ O49.1416666666679.2222222222222157Koordinaten: 49° 9′ N, 9° 13′ O


Bundesland:
Baden-Württemberg


Regierungsbezirk:

Stuttgart


Höhe:

157 m ü. NHN


Fläche:

99,9 km2


Einwohner:

130.093 (31. Dez. 2023)[1]


Bevölkerungsdichte:

1302 Einwohner je km2


Postleitzahlen:

74072–74081


Vorwahlen:

07131, 07066


Kfz-Kennzeichen:

HN


Gemeindeschlüssel:

08 1 21 000


LOCODE:

DE HEN


NUTS:

DE117


Stadtgliederung:

9 Stadtteile


Adresse der Stadtverwaltung:

Marktplatz 774072 Heilbronn


Website:

www.heilbronn.de


Oberbürgermeister:

Harry Mergel (SPD)


Lage der Stadt Heilbronn in Baden-Württemberg


Karte

Blick über die

## Gradio chat.

https://www.gradio.app/

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.3.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio)
  Downloading python_multipart-0.0.14-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.w

In [None]:
import gradio as gr

def predict(message, history):
    history_langchain_format = []
    for human, ai in history:
        history_langchain_format.append(HumanMessage(content=human))
        history_langchain_format.append(AIMessage(content=ai))
    history_langchain_format.append(HumanMessage(content=message))
    gpt_response = llm(history_langchain_format)
    return gpt_response.content

gr.ChatInterface(predict).launch()

ModuleNotFoundError: No module named 'multipart'

## Vector Databases.

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_mistralai import MistralAIEmbeddings

embeddings_model = MistralAIEmbeddings(
    api_key=api_key,
    model="mistral-embed"
)

embedding = embeddings_model.embed_query("This is a test, I want to embed.")
print(len(embedding))

1024


In [None]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator("embedding_distance", embeddings=embeddings_model)

distance = evaluator.evaluate_strings(
    prediction="Dune is a great movie.",
    reference="I like the Star Wars series."
)
print(distance)

distance = evaluator.evaluate_strings(
    prediction="Dune is a great movie.",
    reference="Hi. I am Tristan. I love teaching AI."
)
print(distance)

{'score': 0.24685926507781253}
{'score': 0.4234202117961898}


In [None]:
!wget https://raw.githubusercontent.com/vilmibm/lovecraftcorpus/master/mountains_of_madness.txt

--2024-10-24 15:04:40--  https://raw.githubusercontent.com/vilmibm/lovecraftcorpus/master/mountains_of_madness.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 245885 (240K) [text/plain]
Saving to: ‘mountains_of_madness.txt’


2024-10-24 15:04:40 (4.41 MB/s) - ‘mountains_of_madness.txt’ saved [245885/245885]



## Let us use Chroma.

- https://www.trychroma.com/

In [None]:
from langchain_chroma import Chroma

raw_documents = TextLoader("mountains_of_madness.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
print(f"Got {len(documents)} documents after splitting")

print(documents[0])

Got 264 documents after splitting
page_content='AT THE MOUNTAINS OF MADNESS

I


Doubt of the real facts, as I must reveal them, is inevitable; yet, if I suppressed what will seem extravagant and incredible, there would be nothing left. The hitherto withheld photographs, both ordinary and aerial, will count in my favor, for they are damnably vivid and graphic. Still, they will be doubted because of the great lengths to which clever fakery can be carried. The ink drawings, of course, will be jeered at as obvious impostures, notwithstanding a strangeness of technique which art experts ought to remark and puzzle over.' metadata={'source': 'mountains_of_madness.txt'}


Fill the database.

In [None]:
database = Chroma.from_documents(documents, embeddings_model)

Query the database.

In [None]:
query = "What is an Old One?"
docs = database.similarity_search(query)
docs[0].page_content

'It was curious to note from the pictured battles that both the Cthulhu spawn and the Mi-Go seem to have been composed of matter more widely different from that which we know than was the substance of the Old Ones. They were able to undergo transformations and reintegrations impossible for their adversaries, and seem therefore to have originally come from even remoter gulfs of the cosmic space. The Old Ones, but for their abnormal toughness and peculiar vital properties, were strictly material, and must have had their absolute origin within the known space-time continuum--whereas the first sources of the other beings can only be guessed at with bated breath. All this, of course, assuming that the non-terrestrial linkages and the anomalies ascribed to the invading foes are not pure mythology. Conceivably, the Old Ones might have invented a cosmic framework to account for their occasional defeats, since historical interest and pride obviously formed their chief psychological element. It 

In [None]:
query = "What is an Old One?"
docs = database.similarity_search_with_score(query)
docs[0][0].page_content, docs[0][1]

('It was curious to note from the pictured battles that both the Cthulhu spawn and the Mi-Go seem to have been composed of matter more widely different from that which we know than was the substance of the Old Ones. They were able to undergo transformations and reintegrations impossible for their adversaries, and seem therefore to have originally come from even remoter gulfs of the cosmic space. The Old Ones, but for their abnormal toughness and peculiar vital properties, were strictly material, and must have had their absolute origin within the known space-time continuum--whereas the first sources of the other beings can only be guessed at with bated breath. All this, of course, assuming that the non-terrestrial linkages and the anomalies ascribed to the invading foes are not pure mythology. Conceivably, the Old Ones might have invented a cosmic framework to account for their occasional defeats, since historical interest and pride obviously formed their chief psychological element. It

## Talk to document.

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate

qa_chain = load_qa_chain(llm)

template="""Given the following conversation history and a new user question, generate a standalone question.
Conversation history:
{chat_history}
New question: {question}
Standalone question:"""

question_generator_prompt = PromptTemplate(
    input_variables=["chat_history", "question"],
    template=template
)

question_generator_chain = LLMChain(
    llm=llm,
    prompt=question_generator_prompt
)

retrieval_chain = ConversationalRetrievalChain(
    retriever=database.as_retriever(search_kwargs={"k": 5}),
    combine_docs_chain=qa_chain,
    question_generator=question_generator_chain
)

def predict(message, history):
    history_langchain_format = []
    for human, ai in history:
        history_langchain_format.append(HumanMessage(content=human))
        history_langchain_format.append(AIMessage(content=ai))

    history_langchain_format.append(HumanMessage(content=message))

    response = retrieval_chain(
        {"question": message, "chat_history": history_langchain_format}
    )

    return response["answer"]

gr.ChatInterface(predict).launch()

NameError: name 'gr' is not defined

# Software development

In [None]:
messages = [
    SystemMessage(
        content="You are a 150K EUR/year principal software engineer. You write the best code in the world."
    ),
    HumanMessage(
        content="Implement Conway's game of life in Python."
    )
]
result = llm.invoke(messages)
print(result.content)

In [None]:
code = """
class ToDoList:
    def __init__(self):
        self.tasks = []

    def add_task(self, task: str):
        if not isinstance(task, str) or not task.strip():
            raise ValueError("Task must be a non-empty string")
        self.tasks.append({"task": task, "completed": False})

    def remove_task(self, task: str):
        for t in self.tasks:
            if t["task"] == task:
                self.tasks.remove(t)
                return
        raise ValueError("Task not found")

    def mark_completed(self, task: str):
        for t in self.tasks:
            if t["task"] == task:
                t["completed"] = True
                return
        raise ValueError("Task not found")

    def get_tasks(self, completed=None):
        if completed is None:
            return self.tasks
        return [t for t in self.tasks if t["completed"] == completed]

    def clear_completed(self):
        self.tasks = [t for t in self.tasks if not t["completed"]]

"""

messages = [
    SystemMessage(
        content="You are a 150K EUR/year principal software engineer. You write the best code in the world."
    ),
    HumanMessage(
        content=f"Here is some code:\n\n'''\n{code}\n'''\n\nPlease write unit tests."
    )
]
result = llm.invoke(messages)
print(result.content)

In [None]:
messages = [
    SystemMessage(
        content="You are a 150K EUR/year principal code reviewer. You write code reviews even god has never seen."
    ),
    HumanMessage(
        content=f"Here is some code:\n\n'''\n{code}\n'''\n\nPlease a code review.."
    )
]
result = llm.invoke(messages)
print(result.content)

TODO: https://blog.langchain.dev/reflection-agents/