In [14]:
%load_ext autoreload
%autoreload 2

Import `haystack` modules

In [15]:
import pandas as pd

# Install HuggingFace Datasets using "pip install datasets"
from datasets import load_dataset
from haystack import Document, Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders import ChatPromptBuilder
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.dataclasses import ChatMessage

# Import LlamaCppChatGenerator
from haystack_integrations.components.generators.llama_cpp import LlamaCppChatGenerator

Import my custom modules

In [16]:
from modules import utils

Load environment variables

In [17]:
env_file_path = "./secrets/env"
env_variables = utils.load_env_file(env_file_path=env_file_path)
api_key = env_variables["OPENAI_SECRET_KEY"]

Load `vks.csv` file.

In [18]:
dataset = pd.read_csv("./data/vks.csv")

In [19]:
dataset.head()

Unnamed: 0,text,title
0,VKS (VNGCloud Kubernetes Service) is a managed...,What is VKS?
1,Fully Managed control plane: VKS will free you...,Highlights of VKS
2,When you create a Public Cluster with Public N...,VKS public clusters
3,When you create a Public Cluster with Public/P...,VKS private clusters
4,Below is a comparison table between creating a...,Comparison between using Public Cluster and Pr...


In [20]:
for doc in dataset.iloc():
    print(doc["title"])

What is VKS?
Highlights of VKS
VKS public clusters
VKS private clusters
Comparison between using Public Cluster and Private Cluster


In [21]:
docs = [
    Document(
        content=doc["text"],
        meta={
            "title": doc["title"],
        },
    )
    for doc in dataset.iloc()
]

In [22]:
for doc in dataset.iloc():
    print(f"Title: {doc['title']}")
    print(f"Text: {doc['text'][:100]}...")
    print()

Title: What is VKS?
Text: VKS (VNGCloud Kubernetes Service) is a managed service on VNGCloud that simplifies the deployment an...

Title: Highlights of VKS
Text: Fully Managed control plane: VKS will free you from the burden of managing the Kubernetes Control Pl...

Title: VKS public clusters
Text: When you create a Public Cluster with Public Node Group , the VKS system will:

Create a VM with Flo...

Title: VKS private clusters
Text: When you create a Public Cluster with Public/Private Node Group , the VKS system will:

To enhance t...

Title: Comparison between using Public Cluster and Private Cluster
Text: Below is a comparison table between creating and using Public Cluster and Private Cluster on the VKS...



Index documents

In [23]:
doc_store = InMemoryDocumentStore(embedding_similarity_function="cosine")
# Install sentence transformers using "pip install sentence-transformers"
doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

# Indexing Pipeline
indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=doc_embedder, name="DocEmbedder")
indexing_pipeline.add_component(instance=DocumentWriter(document_store=doc_store), name="DocWriter")
indexing_pipeline.connect("DocEmbedder", "DocWriter")

indexing_pipeline.run({"DocEmbedder": {"documents": docs}})

Batches: 100%|██████████| 1/1 [00:00<00:00,  3.41it/s]


{'DocWriter': {'documents_written': 5}}

In [24]:
system_message = ChatMessage.from_system(
    """
    Answer the question using the provided context.
    Context:
    {% for doc in documents %}
        {{ doc.content }}
    {% endfor %}
    """
)
user_message = ChatMessage.from_user("Question: {{question}}")
assistent_message = ChatMessage.from_assistant("Answer: ")


chat_template = [system_message, user_message, assistent_message]

Using LLAMA model as LLM

In [25]:
rag_pipeline = Pipeline()

text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

# Load the LLM using LlamaCppChatGenerator
model_path = "/mnt/kalista/models/llm/openchat-3.5-1210.Q3_K_S.gguf"
generator = LlamaCppChatGenerator(model=model_path, n_ctx=4096, n_batch=128)

rag_pipeline.add_component(
    instance=text_embedder,
    name="text_embedder",
)
rag_pipeline.add_component(instance=InMemoryEmbeddingRetriever(document_store=doc_store, top_k=3), name="retriever")
rag_pipeline.add_component(instance=ChatPromptBuilder(template=chat_template), name="prompt_builder")
rag_pipeline.add_component(instance=generator, name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")

rag_pipeline.connect("text_embedder", "retriever")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm", "answer_builder")
rag_pipeline.connect("retriever", "answer_builder.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x75abb768ab30>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: ChatPromptBuilder
  - llm: LlamaCppChatGenerator
  - answer_builder: AnswerBuilder
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - retriever.documents -> answer_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.messages (List[ChatMessage])
  - llm.replies -> answer_builder.replies (List[ChatMessage])

In [26]:
question = "What is VKS?"
result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "llm": {"generation_kwargs": {"max_tokens": 128, "temperature": 0.1}},
        "answer_builder": {"query": question},
    }
)

generated_answer = result["answer_builder"]["answers"][0]
print(generated_answer.data)

llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from /mnt/kalista/models/llm/openchat-3.5-1210.Q3_K_S.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = openchat_openchat-3.5-1210
llama_model_loader: - kv   2:                       llama.context_length u32              = 8192
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:          

 VKS, or VNGCloud Kubernetes Service, is a managed service on VNGCloud that simplifies the deployment and management of container-based applications. It is built on top of Kubernetes, an open-source platform developed by Google, and is used to manage and deploy containerized applications in distributed environments. VKS provides a range of features and advantages, such as fully managed control plane, support for the latest Kubernetes versions, efficient and secure networking, seamless upgrades, automatic scaling and healing, reduced costs and enhanced reliability, integration of native blockstore and load balancer, and enhanced security.


In [27]:
print(generated_answer.data)

 VKS, or VNGCloud Kubernetes Service, is a managed service on VNGCloud that simplifies the deployment and management of container-based applications. It is built on top of Kubernetes, an open-source platform developed by Google, and is used to manage and deploy containerized applications in distributed environments. VKS provides a range of features and advantages, such as fully managed control plane, support for the latest Kubernetes versions, efficient and secure networking, seamless upgrades, automatic scaling and healing, reduced costs and enhanced reliability, integration of native blockstore and load balancer, and enhanced security.


In [28]:
question = "So could you compare between private and public clusters in VKS?"
result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "llm": {"generation_kwargs": {"max_tokens": 128, "temperature": 0.1}},
        "answer_builder": {"query": question},
    }
)

generated_answer = result["answer_builder"]["answers"][0]
print(generated_answer.data)

Batches: 100%|██████████| 1/1 [00:00<00:00, 72.11it/s]
Llama.generate: 27 prefix-match hit, remaining 975 prompt tokens to eval
llama_perf_context_print:        load time =   59596.41 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   975 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   127 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =  120631.81 ms /  1102 tokens


 In the VKS system, both Private Clusters and Public Clusters have their own advantages and disadvantages. Here is a comparison between the two:

Private Cluster:

1. Security: Private Clusters offer higher security as all connections are private and limited access. This ensures strict access control and compliance with security regulations and data privacy.
2. Access management: Private Clusters have strict access control, minimizing the risk of external network attacks.
3. Scalability: Both Public and Private Clusters are easily scalable through the Auto Scaling feature.
4. AutoHealing: Both types of
