In [54]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama

from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

from uuid import uuid4

In [8]:
loader = DirectoryLoader("./sample_pdfs/en/", glob="**/*.pdf", show_progress=True,use_multithreading=True)
docs = loader.load()

100%|██████████| 3/3 [00:31<00:00, 10.57s/it]


In [None]:
def document_splitter(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = 1000,
            chunk_overlap = 100,
            length_function = len,
            is_separator_regex= False
        )
    
    return text_splitter.split_documents(documents)

In [24]:
def get_embedding_function():
    embeddings = OllamaEmbeddings(model='nomic-embed-text')
    return embeddings


In [29]:
client = QdrantClient(path="./tmp/langchain_qdrant")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=get_embedding_function(),
)

In [33]:
chunks = document_splitter(documents=docs)
uuids = [str(uuid4()) for _ in range(len(chunks))]

['322d0c51-959c-4d13-8acd-7319c02c5aea',
 '5471ef9b-162f-402b-a869-19d99d5acaea',
 'e40d809a-cf3c-4c41-a1a8-2d9b71acf24e',
 'f32e0453-16ac-42ff-bd49-cdb6712bb2c8',
 '89fc73d8-775c-470a-802d-e42c2e46a906',
 '8c83e99a-0ec0-46d6-abf3-5a539b7ce58e',
 'a6f990dd-34a5-4b97-bbf3-6ded5c40b001',
 '134d88c1-f69f-4255-99fa-f1c4b16355c2',
 '2c29eedd-08c0-48c0-ba2d-5341082461dc',
 '38af2ad1-95f5-42d4-8c2a-9d6150950f53',
 '6da6d649-e246-4ea8-8e85-d45a74e1a6c8',
 'cffd03fd-5ff8-4760-9521-0a747410a522',
 'babfb3ac-06a2-4f8e-a66e-89d1367febc6',
 '2404757f-6373-48cd-899a-95ee6d918315',
 '882d3f25-67be-463c-8ad2-503977b442a2',
 '5782f63a-8e19-4a12-93b4-a30b934f67c8',
 '1a9c3170-5e61-4137-8c05-b8b5179663d8',
 'cf741a56-433b-4e20-861e-9c7a6ca7557b',
 '99b7ea23-008c-4b87-99e0-0acb67e6a739',
 '5811ed10-131c-44bd-865c-f00cd46345e0',
 'ddca400d-559b-4d0f-9348-fe85e394ece3',
 'bab3438c-515e-46bc-a858-91145ffad0b4',
 '9aaf1805-a83e-4670-9d7e-fd37f971631b',
 'f9f1b17b-104e-47b0-a48f-9d6658c7f15f',
 '62ea8cb3-ea7c-

In [39]:
PROMPT_TEMPLATE = """

    You are a teacher that answers questions based on the following context:
    {context}

    ---
    Answer the student's question {question} based on the context given above.
    Make the answer as descriptive as you can and make it easier to understand.
    Used good level of vocabulary but make the student understand the new words along the way.

"""

In [65]:
query_text = "Did Santigo know Alchemy in the book? Where is he roaming around?"

results = vector_store.similarity_search_with_score(
    query=query_text, k=5
)

for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.663553] *

When the sun had set, and the first stars made their appearance, the boy started to walk to the south. He eventually sighted a single tent, and a group of Arabs passing by told the boy that it was a place inhabited by genies. But the boy sat down and waited.

Not until the moon was high did the alchemist ride into view. He carried two dead hawks over his shoulder.

"I am here," the boy said.

"You shouldn't be here," the alchemist answered. "Or is it your destiny that brings you here?"

"With the wars between the tribes, it's

impossible to cross the desert. So I have come here."

The alchemist dismounted from his horse, and signaled that the boy should enter the tent with him. It was a tent like many at the oasis. The boy looked around for the ovens and other apparatus used in alchemy, but saw none. There were only some books in a pile, a small cooking stove, and the carpets, covered with mysterious designs. [{'source': 'sample_pdfs\\en\\The Alchemist by Paulo Coel

In [66]:
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context= context_text, question = query_text)

model = Ollama(model="mistral")
response_text = model.invoke(prompt)
print(response_text)

 In the context provided, it's not explicitly stated that Santiago (the boy in the story) is an alchemist himself. However, he encounters an alchemist named Al-Hazred, also known as the alchemist, who teaches him about the principles of alchemy. The story doesn't specify where Santiago roams around initially, but it becomes clear that he eventually travels to the oasis inhabited by genies, where he meets the alchemist. In summary, while Santiago may not be an alchemist at the beginning of the story, he learns about and is influenced by the teachings of Al-Hazred, the alchemist.
