In [1]:
import getpass
import os

if not os.environ.get("MISTRAL_API_KEY"):
    os.environ["MISTRAL_API_KEY"] = getpass.getpass("Enter API key for Mistral AI: ")

from langchain_mistralai import ChatMistralAI

llm = ChatMistralAI(model="mistral-large-latest")

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="Snowflake/snowflake-arctic-embed-m")#, model_kwargs={"trust_remote_code": True})

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("scrtext"))
loader = WebBaseLoader(
    web_paths=("https://imsdb.com/scripts/Shrek.html",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

assert len(docs) == 1
print(f"Total characters: {len(docs[0].page_content)}")
print(docs[0].page_content[:500])

USER_AGENT environment variable not set, consider setting it to identify your requests.


Total characters: 150873



                                          SHREK

                                       Written by

                                William Steig & Ted Elliott




                                     SHREK
                         Once upon a time there was a lovely 
                         princess. But she had an enchantment 
                         upon her of a fearful sort which could 
                         only be broken by love's first kiss. 
                     


In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,  # chunk size (characters)
    chunk_overlap=256,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split the Shrek script into {len(all_splits)} sub-documents.")

Split the Shrek script into 593 sub-documents.


In [5]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

document_ids = []
for i in range(0, len(all_splits)):
    document_ids += vector_store.add_documents(documents=all_splits[i : i + 5])

print(document_ids[:3])

['41f5bc10-05ce-4700-8208-ec88f6d762b1', 'd55762de-8834-4d73-b2b5-1067672db210', '87923e9e-9610-4601-8bb4-62a7f9874918']


In [6]:
print(len(document_ids))

2955


In [7]:
from langchain import hub
from langchain_core.documents import Document
from typing_extensions import TypedDict, List

prompt = hub.pull("rlm/rag-prompt")

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str



In [8]:
# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"], k=16)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [9]:
from langgraph.graph import START, StateGraph

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [10]:
question = "Who and what is encountered in the castle?"
response = graph.invoke({"question": question})
print(response["answer"])

In the castle, Shrek and Donkey encounter a skeleton and a princess who is under an enchantment guarded by a fire-breathing dragon.


In [11]:
question = "Who is getting married?"
response = graph.invoke({"question": question})
print(response["answer"])

Fiona is getting married to Lord Farquaad.


In [17]:
question = "Does anyone object to the marriage?"
response = graph.invoke({"question": question})
print(response["answer"])

Yes, Shrek objects to the marriage. He says, "No, no!" and interrupts the ceremony. Donkey also mentions "That's when you say, 'I object!'"


In [18]:
question = "Where does Shrek live?"
response = graph.invoke({"question": question})
print(response["answer"])

Shrek lives in a cottage. The exact location is not specified, but it is depicted as a secluded, swampy area.
