In [1]:
# NOTE: This sample uses The Count of Monte Cristo to create a RAG
# vector database. This is almost certainly a completely pointless
# exercise since the entire book is probably included in the training
# data of every major AI model. Nevertheless, it shows the principle
# of how RAG can be used with vector databases to augment the ability
# to deal with extra data.

import ollama
from qdrant_client import QdrantClient, models

chat_model = "llama3.3"
embedding_model = "nomic-embed-text"
embeddings_vector_size = 768
vector_collection_name = "textblobs"

In [2]:
# Use Qdrant as an in-memory vector database for storing embeddings.
qdrant_client = QdrantClient(location=":memory:")

qdrant_client.create_collection(
    collection_name=vector_collection_name,
    vectors_config=models.VectorParams(size=embeddings_vector_size, distance=models.Distance.COSINE),
)

True

In [3]:
# Read in contents of the Count of Monte Cristo
f_book = open("../../assets/books/CountOfMonteCristo.txt", "r")
book_text = f_book.read()

In [4]:
# Split text and load embeddings into the vector database.
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
text_blocks = text_splitter.split_text(book_text)

ollama_embed_result = ollama.embed(
    model=embedding_model,
    input=text_blocks
)

text_embeddings_array = ollama_embed_result.embeddings

for i in range(len(text_blocks)):
    text_embeddings = text_embeddings_array[i]
    qdrant_client.upsert(
        collection_name=vector_collection_name,
        points=[
            models.PointStruct(id=i, vector=text_embeddings)
        ]
    )
    

Created a chunk of size 1210, which is longer than the specified 1024
Created a chunk of size 1201, which is longer than the specified 1024
Created a chunk of size 1038, which is longer than the specified 1024
Created a chunk of size 1186, which is longer than the specified 1024
Created a chunk of size 1045, which is longer than the specified 1024
Created a chunk of size 1103, which is longer than the specified 1024
Created a chunk of size 1264, which is longer than the specified 1024
Created a chunk of size 1108, which is longer than the specified 1024
Created a chunk of size 1101, which is longer than the specified 1024
Created a chunk of size 1046, which is longer than the specified 1024
Created a chunk of size 1485, which is longer than the specified 1024
Created a chunk of size 1671, which is longer than the specified 1024
Created a chunk of size 1030, which is longer than the specified 1024
Created a chunk of size 1081, which is longer than the specified 1024
Created a chunk of s

In [5]:
def get_related_text_blocks(prompt, max_blocks):
    ollama_embed_result = ollama.embed(
        model=embedding_model,
        input=prompt
    )
    
    prompt_embedding = ollama_embed_result.embeddings[0]
    
    search_result = qdrant_client.query_points(
        collection_name=vector_collection_name,
        query=prompt_embedding,
        limit=5,
    )

    result_list = []
    for point in search_result.points:
        id = point.id
        # For good measure, we will include the previous and following passages (since they are
        # sequentially id'd).
        if id > 0:
            result_list.append(text_blocks[id - 1])
        result_list.append(text_blocks[id])
        if id < len(text_blocks):
            result_list.append(text_blocks[id + 1])

    return result_list

In [6]:
prompt = "Tell me about when Mercedes recognizes the Count of Monte Cristo's true identity."

related_text_blocks = get_related_text_blocks(prompt, 8)

# Looking at the results you can see how this pulls in some potentially related passages.
for result in related_text_blocks:
    print("\n------------------\n")
    print(result)


------------------

“Who are you, madame?” said the count to the veiled woman.

40227m

The stranger cast one look around her, to be certain that they were
quite alone; then bending as if she would have knelt, and joining her
hands, she said with an accent of despair:

“Edmond, you will not kill my son!”

The count retreated a step, uttered a slight exclamation, and let fall
the pistol he held.

“What name did you pronounce then, Madame de Morcerf?” said he.

“Yours!” cried she, throwing back her veil,—“yours, which I alone,
perhaps, have not forgotten. Edmond, it is not Madame de Morcerf who is
come to you, it is Mercédès.”

“Mercédès is dead, madame,” said Monte Cristo; “I know no one now of
that name.”

------------------

“Yours!” cried she, throwing back her veil,—“yours, which I alone,
perhaps, have not forgotten. Edmond, it is not Madame de Morcerf who is
come to you, it is Mercédès.”

“Mercédès is dead, madame,” said Monte Cristo; “I know no one now of
that name.”

“Mercédès l

In [9]:
# For RAG (Retreival Augmented Generation), you just include the extra
# related in the prompt.

rag_prompt = prompt + "\n\n" + "Below are some potentially related passages that can be used to help answer the prompt above. These passages are separated by dashed lines and come from different parts of the book:"
for text_block in related_text_blocks:
    rag_prompt += "\n\n-------------\n\n" + text_block

response = ollama.chat(
    model=chat_model,
    messages=[
        {"role": "user", "content": rag_prompt }
    ],
    options={'temperature': 0.6}
)

response = response.message
print(response.content)

This text appears to be an excerpt from Alexandre Dumas' novel "The Count of Monte Cristo". The conversation between Mercédès (also known as Haydée's mother) and Edmond Dantès (the Count of Monte Cristo) is a poignant one, as they reflect on their past and the choices they made.

Mercédès is filled with regret and guilt over her past actions, particularly her denial of God and her role in Edmond's imprisonment. She sees herself as a coward and believes that she has been punished for her mistakes. Edmond, on the other hand, has found strength and faith through his experiences and has emerged as a powerful and dignified individual.

The conversation also hints at the possibility of a new love interest for Edmond in Haydée, who is Mercédès' daughter. Haydée is depicted as a beautiful and loyal young woman who is deeply devoted to Edmond. The text suggests that Edmond may be developing feelings for her, and that she may bring him happiness and comfort after all he has suffered.

The arriva