## An AI question-and-answer assistant demo in KubeBlocks AIGC 
Here, we will demonstrate a demo of creating an AI docs question-and-answer assistant for the KubeBlocks user documentation within the AIGC infrastructure, all built on KubeBlocks.

1. First, please make sure you have completed the following preparations:
* A qdrant cluster created by KubeBlocks named "my-qdrant"
* A private gllm cluster create by KubeBlocks named "my-gllm"
using `kbcli cluster list` to check the cluster status

2. load text to vector embedding model

In [None]:
from typing import Any, Dict, List
from text2vec import SentenceModel
from llama_index import LangchainEmbedding
from llama_index.readers.file.markdown_reader import MarkdownReader
from langchain.embeddings.base import Embeddings


class Text2VecEmbedding(Embeddings):
    def __init__(self):
        self.model = SentenceModel('shibing624/text2vec-base-chinese')

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        texts = list(map(lambda x: x.replace("\n", " "), texts))
        embeddings = self.model.encode(texts)
        return embeddings.tolist()

    def embed_query(self, text: str) -> List[float]:
        text = text.replace("\n", " ")
        return self.model.encode(text)
        
embedding_model = LangchainEmbedding(Text2VecEmbedding())
vector_size = 768
reader = MarkdownReader()

3. create vector database client.
When creating a client to connect to the vector database, we need to know the backend address of that database. You can use the kbcli cluster describe command to view the vector database information in KubeBlocks.

In [None]:
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client.models import VectorParams,Distance
"""
`kbcli cluster describe <qdrant_cluster_name>` get the qdrant server's information 
"""
url = "my-qdrant-qdrant.default.svc.cluster.local"
port = 6333
grpc_port = 6334
distance = "Cosine"

client = qdrant_client.QdrantClient(
                url=url,
                port=port,
                prefer_grpc=False,
                https=False,
                timeout=1000, 
            )
client.recreate_collection(collection_name="demo",vectors_config=VectorParams(
                size=vector_size,
                distance=Distance.COSINE,
            ))

connector = QdrantVectorStore(
            client=client,
            collection_name="demo",
            vectors_config=VectorParams(size=vector_size, distance=distance),
)

4. load our user-docs
Now, we will download KubeBlocks user docs and load it into our vector database using the previously prepared embedding model.

In [None]:
import os
from typing import List, Tuple, Optional
import requests
from llama_index.readers.file.markdown_reader import MarkdownReader
from llama_index.schema import Document,NodeRelationship, RelatedNodeInfo
from llama_index.data_structs.data_structs import Node
from llama_index.vector_stores.types import NodeWithEmbedding


class http_markdown_reader:
    def __init__(self):
        self.reader = MarkdownReader()
        
    def load_data(self, url):
        file_name = os.path.basename(url)
        response = requests.get(url)
        document_content = ""
        if response.status_code == 200:
            document_content = response.text
        else:
            print("An error occurred while retrieving the document content:", response.status_code)

        tups = self.parse_tups(document_content)
        results = []
        nodes: List[NodeWithEmbedding] = []
        for header, value in tups:
            if header is None:
                results.append(Document(text=value, metadata={}))
            else:
                results.append(
                    Document(text=f"\n\n{header}\n{value}", metadata={})
                )
                
        for doc in results:
            
            vector = embedding_model.get_text_embedding(doc.text)
            doc.embedding = vector
            node = Node(
                text=doc.text,
                doc_id=doc.doc_id,
            )
            node.relationships = {
                NodeRelationship.SOURCE: RelatedNodeInfo(
                    node_id=node.node_id, metadata={"source": file_name}
                )
            }
            nodes.append(NodeWithEmbedding(node=node, embedding=vector))
        addPoints = connector.add(nodes)
        print("The document has been loaded into the vector database... You can view the details through the Qdrant web UI tool")

    def parse_tups(
            self, content: str, errors: str = "ignore"
    ) -> List[Tuple[Optional[str], str]]:
        """Parse file into tuples."""

        content = self.reader.remove_hyperlinks(content)
        content = self.reader.remove_images(content)
        markdown_tups = self.reader.markdown_to_tups(content)
        return markdown_tups
    
    
reader = http_markdown_reader()
docs = reader.load_data("https://raw.githubusercontent.com/apecloud/kubeblocks/main/docs/user_docs/installation/install-with-kbcli/install-kbcli.md")


5. query-with-llm

In [None]:
from langchain import PromptTemplate
from pydantic import BaseModel
import json
import requests
import openai
import os

query_str = "how to install kbcli in Windows?"
query_contents = client.search(collection_name="demo",
              query_vector=embedding_model.get_text_embedding(query_str),
              with_vectors=True,
              limit=3,
              score_threshold=0.5,
              search_params={'exact': False, 'hnsw_ef': 128},
              consistency="majority"
             )
pack_context = ""
for query in query_contents:
            payload = query.payload or {}
            text = query.payload.get("text") or json.loads(
                payload["_node_content"]
            ).get("text")
            pack_context += text

'''
you can custom your own prompt
'''
prompt_template = """Q:
上下文信息如下:
----------------\n
{context}
\n--------------------\n

根据提供的上下文信息,然后回答问题：{query}。

请确保回答准确和详细。
A:"""
prompt = PromptTemplate.from_template(prompt_template)
prompt_str = prompt.format(query=query_str, context=pack_context)
# check our prompt_str
print(prompt_str)

In [None]:
LLM_API = "http://my-llm-ggml.default.svc.cluster.local:8000/v1/completions"
data = {
    "prompt": prompt_str,
    "temperature": 0,
    "max_tokens": 512
}
response = requests.post(LLM_API, json=data)
answer = response.json().get('choices', '')
print(answer[0]["text"])