In [1]:
%reload_ext autoreload
%autoreload 2

Import `haystack` modules

In [1]:
import pandas as pd

# Install HuggingFace Datasets using "pip install datasets"
from datasets import load_dataset
from haystack import Document, Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders import ChatPromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.preprocessors import DocumentCleaner
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.nodes import PromptTemplate
from haystack_integrations.document_stores.weaviate import WeaviateDocumentStore
from haystack.dataclasses import ChatMessage
from haystack.components.converters import PDFMinerToDocument
from datetime import datetime

# Import LlamaCppChatGenerator
from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator

  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'Document' from 'haystack' (/mnt/kalista/git-cuongpiger/haystack/.venv/lib/python3.10/site-packages/haystack/__init__.py)

Load pdf files

In [7]:
converter = PDFMinerToDocument()  # type: ignore
results = converter.run(sources=["./data/sample.pdf"], meta={"date_added": datetime.now().isoformat()})
documents = results["documents"]



In [9]:
document_store = WeaviateDocumentStore(
    url='http://localhost:8080'
)

Index documents and saved them in the `document_store`

In [10]:
document_store = InMemoryDocumentStore(embedding_similarity_function="cosine")

doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")  # type: ignore

pipeline = Pipeline()
pipeline.add_component("converter", PDFMinerToDocument())  # type: ignore
pipeline.add_component("cleaner", DocumentCleaner())  # type: ignore
pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=5))  # type: ignore
pipeline.add_component(instance=doc_embedder, name="DocEmbedder")  
pipeline.add_component("writer", DocumentWriter(document_store=document_store))  # type: ignore

pipeline.connect("converter", "cleaner")
pipeline.connect("cleaner", "splitter")
pipeline.connect("splitter", "DocEmbedder")
pipeline.connect("DocEmbedder", "writer")

pipeline.run({"converter": {"sources": ["./data/sample.pdf"]}})

Batches: 100%|██████████| 22/22 [00:13<00:00,  1.65it/s]


{'writer': {'documents_written': 690}}

Prompt template

In [11]:
prompt_template = PromptTemplate(prompt = """"Given the provided Documents, answer the Query. Make your answer detailed and long\n
                                            Query: {query}\n
                                            Documents: {join(documents)}
                                            Answer: 
                                        """,
                                        output_parser=AnswerParser())

NameError: name 'PromptTemplate' is not defined

Using LLAMA model as LLM

In [73]:
rag_pipeline = Pipeline()

text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

# Load the LLM using LlamaCppChatGenerator
model_path = "/mnt/kalista/models/llm/openchat-3.5-1210.Q3_K_S.gguf"
generator = LlamaCppChatGenerator(model=model_path, n_ctx=4096, n_batch=128)

rag_pipeline.add_component(
    instance=text_embedder,
    name="text_embedder",
)
rag_pipeline.add_component(instance=InMemoryEmbeddingRetriever(document_store=document_store, top_k=3), name="retriever")
rag_pipeline.add_component(instance=ChatPromptBuilder(template=chat_template), name="prompt_builder")
rag_pipeline.add_component(instance=generator, name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")

rag_pipeline.connect("text_embedder", "retriever")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm", "answer_builder")
rag_pipeline.connect("retriever", "answer_builder.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x78481c027220>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: ChatPromptBuilder
  - llm: LlamaCppChatGenerator
  - answer_builder: AnswerBuilder
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - retriever.documents -> answer_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.messages (List[ChatMessage])
  - llm.replies -> answer_builder.replies (List[ChatMessage])

In [74]:
question = "What is VKS and What is it used for?"
result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "llm": {"generation_kwargs": {"max_tokens": 128, "temperature": 0.1}},
        "answer_builder": {"query": question},
    }
)

generated_answer = result["answer_builder"]["answers"][0]
print(generated_answer.data)

llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from /mnt/kalista/models/llm/openchat-3.5-1210.Q3_K_S.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = openchat_openchat-3.5-1210


llama_model_loader: - kv   2:                       llama.context_length u32              = 8192
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 8
llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010
llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000
llama_model_loader: - kv  11:                          general.file_type u32              = 11
llama_model_loader: - kv  1

 VKS (VNGCloud Kubernetes Service) is a managed service on VNGCloud that simplifies the deployment and management of container-based applications. It uses Kubernetes, an open-source platform developed by Google, which is widely used to manage and deploy containerized applications in distributed environments. VKS provides features such as event history, volume management, and load balancer management to help users monitor and manage their clusters more effectively.


In [75]:
print(generated_answer.data)

 VKS (VNGCloud Kubernetes Service) is a managed service on VNGCloud that simplifies the deployment and management of container-based applications. It uses Kubernetes, an open-source platform developed by Google, which is widely used to manage and deploy containerized applications in distributed environments. VKS provides features such as event history, volume management, and load balancer management to help users monitor and manage their clusters more effectively.


In [76]:
question = "So could you compare between private and public clusters in VKS?"
result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "llm": {"generation_kwargs": {"max_tokens": 128, "temperature": 0.1}},
        "answer_builder": {"query": question},
    }
)

generated_answer = result["answer_builder"]["answers"][0]
print(generated_answer.data)

Batches: 100%|██████████| 1/1 [00:00<00:00,  4.57it/s]
Llama.generate: 26 prefix-match hit, remaining 491 prompt tokens to eval
llama_perf_context_print:        load time =   62464.08 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   491 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   127 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =  331945.24 ms /   618 tokens


 Yes, I can compare between private and public clusters in VKS.

Private Cluster:
1. Uses private IP addresses for communication between nodes and control plane.
2. Ensures strict access control, compliance with security regulations, and data privacy.
3. Connects to other services in VNG Cloud using private connections.

Public Cluster:
1. Uses public IP addresses for communication between nodes and control plane.
2. Connects to other services in VNG Cloud using public connections.

In summary, a Private Cluster provides a more secure environment with private IP addresses and


In [77]:
question = "What is private cluster"
result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "llm": {"generation_kwargs": {"max_tokens": 128, "temperature": 0.1}},
        "answer_builder": {"query": question},
    }
)

generated_answer = result["answer_builder"]["answers"][0]
print(generated_answer.data)

Batches: 100%|██████████| 1/1 [00:00<00:00,  6.12it/s]
Llama.generate: 27 prefix-match hit, remaining 415 prompt tokens to eval
llama_perf_context_print:        load time =   62464.08 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   415 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    68 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =  225783.78 ms /   483 tokens


 A private cluster is an ideal choice for services that require strict access control, ensuring compliance with security regulations and data privacy. In a private cluster, all connections between nodes, the control plane, clients, and other services are completely private, using private IP addresses for communication. This ensures a higher level of security and compliance with data privacy regulations.
