In [1]:
%load_ext autoreload
%autoreload 2

# DeepSeek on VKS documents

## Using PostgreSQL to retrieval VKS documents


Define global variables

In [2]:
import os

from langchain_community.llms import VLLMOpenAI
from langchain_core.messages import HumanMessage, SystemMessage

from modules import utils

from IPython.display import display, Markdown, Latex

In [3]:
ENV_FILE_PATH = "/Users/cuongdm8499/Me/git-cuongpiger/secret/work/vngcloud/ai-platform/env"
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
MAX_TOKENS = 32700
COLLECTION_NAME = "my_docs"

Load environment variables

In [4]:
envs = utils.load_env_to_dict(ENV_FILE_PATH)

## Prepare PostgreSQL vectorstore

In [5]:
from langchain_postgres.vectorstores import PGVector
from langchain_huggingface.embeddings import HuggingFaceEmbeddings


In [6]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
vector_store = PGVector(
    embeddings=embeddings,
    collection_name=COLLECTION_NAME,
    connection=envs["POSTGRESQL_URI_2"],
    use_jsonb=True,
)

## Retrieval

In [8]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [9]:
res = vector_store.similarity_search("VKS l√† g√¨?")
utils.print_documents(res)

Amount of documents: 4
Document 0:
VKS l√† g√¨?

VKS (VNGCloud Kubernetes Service) l√† m·ªôt d·ªãch v·ª• ƒë∆∞·ª£c qu·∫£n l√Ω tr√™n VNGCloud gi√∫p b·∫°n ƒë∆°n gi·∫£n h√≥a qu√° tr√¨nh tri·ªÉn khai v√† qu·∫£n l√Ω c√°c ·ª©ng d·ª•ng d·ª±a tr√™n container. Kubernetes l√† m·ªôt n·ªÅn t·∫£ng m√£ ngu·ªìn m·ªü ƒë∆∞·ª£c ph√°t tri·ªÉn b·ªüi Google, ƒë∆∞·ª£c s·ª≠ d·ª•ng r·ªông r√£i ƒë·ªÉ qu·∫£n l√Ω v√† tri·ªÉn khai c√°c ·ª©ng d·ª•ng container tr√™n m√¥i tr∆∞·ªùng ph√¢n t√°n.

Nh·ªØng ƒëi·ªÉm n·ªïi b·∫≠t c·ªßa VKS

Qu·∫£n l√Ω Control Plane ho√†n to√†n t·ª± ƒë·ªông (Fully Managed control plane): VKS s·∫Ω gi·∫£i ph√≥ng b·∫°n kh·ªèi g√°nh n·∫∑ng qu·∫£n l√Ω Control Plane c·ªßa Kubernetes, gi√∫p b·∫°n t·∫≠p trung v√†o vi·ªác ph√°t tri·ªÉn ·ª©ng d·ª•ng.

H·ªó tr·ª£ c√°c phi√™n b·∫£n Kubernetes m·ªõi nh·∫•t: VKS lu√¥n c·∫≠p nh·∫≠t nh·ªØng phi√™n b·∫£n Kubernetes m·ªõi nh·∫•t (minor version t·ª´ 1.27, 1.28, 1.29) ƒë·ªÉ ƒë·∫£m b·∫£o b·∫°n lu√¥n t·∫≠n d·ª•ng ƒë∆∞·ª£c nh·ªØng t√≠nh nƒÉng ti√™n ti·∫øn nh·∫•t.



## Integrated with DeepSeek R1

In [10]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

In [11]:
# Connect to vLLM API
llm = ChatOpenAI(
    openai_api_key="EMPTY",
    openai_api_base=envs['VLLM_HOST_URL_2'] + "/v1/",
    model_name=MODEL_NAME,
    max_tokens=MAX_TOKENS,
    streaming=True,
)

prompt = ChatPromptTemplate.from_template("""
B·∫°n l√† m·ªôt tr·ª£ l√≠ ·∫£o nh·∫±m tr·∫£ l·ªùi c√°c C√ÇU H·ªéI cho kh√°ch h√†ng c·ªßa VNGCloud.
C√¢u tr·∫£ l·ªùi c·ªßa b·∫°n ph·∫£i c√≥ li√™n quan ƒë·∫øn NG·ªÆ C·∫¢NH ƒë∆∞·ª£c cung c·∫•p. N·∫øu kh√¥ng li√™n quan h√£y y√™u c·∫ßu cung c·∫•p th√™m th√¥ng tin.
B·∫°n PH·∫¢I tr·∫£ l·ªùi b·∫±ng TI·∫æNG VI·ªÜT

C√ÇU H·ªéI: {query}

NG·ªÆ C·∫¢NH:
{context}

TR·∫¢ L·ªúI:
""")

chain = prompt | llm

## Prepare the context

In [12]:
query = "VKS l√† g√¨?"
context = utils.from_documents_to_context(vector_store.similarity_search(query))

display(Markdown(context[:1000]))

- Document 1:
VKS l√† g√¨?

VKS (VNGCloud Kubernetes Service) l√† m·ªôt d·ªãch v·ª• ƒë∆∞·ª£c qu·∫£n l√Ω tr√™n VNGCloud gi√∫p b·∫°n ƒë∆°n gi·∫£n h√≥a qu√° tr√¨nh tri·ªÉn khai v√† qu·∫£n l√Ω c√°c ·ª©ng d·ª•ng d·ª±a tr√™n container. Kubernetes l√† m·ªôt n·ªÅn t·∫£ng m√£ ngu·ªìn m·ªü ƒë∆∞·ª£c ph√°t tri·ªÉn b·ªüi Google, ƒë∆∞·ª£c s·ª≠ d·ª•ng r·ªông r√£i ƒë·ªÉ qu·∫£n l√Ω v√† tri·ªÉn khai c√°c ·ª©ng d·ª•ng container tr√™n m√¥i tr∆∞·ªùng ph√¢n t√°n.

Nh·ªØng ƒëi·ªÉm n·ªïi b·∫≠t c·ªßa VKS

Qu·∫£n l√Ω Control Plane ho√†n to√†n t·ª± ƒë·ªông (Fully Managed control plane): VKS s·∫Ω gi·∫£i ph√≥ng b·∫°n kh·ªèi g√°nh n·∫∑ng qu·∫£n l√Ω Control Plane c·ªßa Kubernetes, gi√∫p b·∫°n t·∫≠p trung v√†o vi·ªác ph√°t tri·ªÉn ·ª©ng d·ª•ng.

H·ªó tr·ª£ c√°c phi√™n b·∫£n Kubernetes m·ªõi nh·∫•t: VKS lu√¥n c·∫≠p nh·∫≠t nh·ªØng phi√™n b·∫£n Kubernetes m·ªõi nh·∫•t (minor version t·ª´ 1.27, 1.28, 1.29) ƒë·ªÉ ƒë·∫£m b·∫£o b·∫°n lu√¥n t·∫≠n d·ª•ng ƒë∆∞·ª£c nh·ªØng t√≠nh nƒÉng ti√™n ti·∫øn nh·∫•t.

Kubernetes Networking: VKS t√≠ch h·ª£p Calico CNI, mang l·∫°i t√≠nh hi·ªáu qu·∫£ v√† b·∫£o m·∫≠t cao.

Upgrade seamlessly: VKS h·ªó tr·ª£ n√¢ng c·∫•p gi·ªØa c√°c phi√™n b·∫£n Kubernetes m·ªôt c√°ch d·ªÖ d√†ng v√† nhanh ch√≥ng, gi√∫p b·∫°n lu√¥n c·∫≠p nh·∫≠t nh·ªØng c·∫£i 

In [13]:
query = "VKS l√† g√¨?"

print("THINKING...")
for chunk in chain.stream({"query": query,
                           "context": utils.from_documents_to_context(vector_store.similarity_search(query))}):
    if chunk.content.strip() == "</think>":
        print("\n\n\nGENERATED RESPONSE")
        continue

    print(chunk.content, end="", flush=True)

THINKING...
ƒê∆∞·ª£c y√™u c·∫ßu tr·∫£ l·ªùi c√¢u h·ªèi "VKS l√† g√¨?" t√¥i c·∫ßn t√¨m hi·ªÉu th√¥ng tin t·ª´ c√°c t√†i li·ªáu cung c·∫•p. T√†i li·ªáu 1 gi·∫£i th√≠ch r√µ VKS l√† d·ªãch v·ª• Kubernetes ƒë∆∞·ª£c qu·∫£n l√Ω c·ªßa VNGCloud, gi√∫p ƒë∆°n gi·∫£n h√≥a vi·ªác tri·ªÉn khai v√† qu·∫£n l√Ω ·ª©ng d·ª•ng container. It nh·∫•n m·∫°nh c√°c t√≠nh nƒÉng nh∆∞ qu·∫£n l√Ω t·ª± ƒë·ªông, h·ªó tr·ª£ c√°c phi√™n b·∫£n m·ªõi, t√≠ch h·ª£p Calico CNI, v√† c√°c t√πy ch·ªçn m·ªü r·ªông t·ª± ƒë·ªông.

T√†i li·ªáu 2 v√† 3 c≈©ng ƒë·ªãnh nghƒ©a VKS gi·ªëng nh∆∞ Document 1, nh∆∞ng th√™m c√°c c·∫≠p nh·∫≠t t√≠nh nƒÉng m·ªõi nh∆∞ gi√°m s√°t t√†i nguy√™n, t√≠ch h·ª£p Terraform, c·∫£i ti·∫øn c√°c plugin v√† ch·ª©c nƒÉng m·ªõi nh∆∞ stop POC. T√†i li·ªáu 4 ƒëi s√¢u v√†o c√°ch s·ª≠ d·ª•ng Terraform v·ªõi VKS, h∆∞·ªõng d·∫´n c√°c b∆∞·ªõc t·∫°o cluster v√† node group, cung c·∫•p v√≠ d·ª• c·ª• th·ªÉ.

T·ª´ c√°c th√¥ng tin n√†y, t√¥i hi·ªÉu VKS kh√¥ng ch·ªâ l√† m·ªôt d·ªãch v·ª• Kubernetes th√¥ng th∆∞·ªùng m√† c√≤n 

In [14]:
query = "L√†m sao ƒë·ªÉ expose service trong VKS?"

print("THINKING...")
for chunk in chain.stream({"query": query,
                           "context": utils.from_documents_to_context(vector_store.similarity_search(query))}):
    if chunk.content.strip() == "</think>":
        print("\n\n\nGENERATED RESPONSE")
        continue

    print(chunk.content, end="", flush=True)

THINKING...
ƒê·ªÉ expose m·ªôt service trong VKS, b·∫°n c·∫ßn t·∫°o m·ªôt Kubernetes Service v√† expose n√≥ th√¥ng qua m·ªôt s·ªë ph∆∞∆°ng ph√°p nh∆∞ ClusterIP, NodePort, LoadBalancer ho·∫∑c Ingress. D∆∞·ªõi ƒë√¢y l√† c√°c b∆∞·ªõc t·ªïng qu√°t ƒë·ªÉ expose service:

1. **T·∫°o Kubernetes Service:**
   - S·ª≠ d·ª•ng file ƒë·ªãnh nghƒ©a YAML ƒë·ªÉ t·∫°o Service. V√≠ d·ª•:

     ```yaml
     apiVersion: v1
     kind: Service
     metadata:
       name: my-service
     spec:
       selector:
         app: my-app
       ports:
       - name: http
         port: 80
         targetPort: 8080
       type: LoadBalancer
     ```

2. **√Åp d·ª•ng c·∫•u h√¨nh:**
   - Ch·∫°y l·ªánh `kubectl apply -f service.yaml` ƒë·ªÉ t·∫°o Service.

3. **Ki·ªÉm tra tr·∫°ng th√°i:**
   - S·ª≠ d·ª•ng `kubectl get svc` ƒë·ªÉ xem th√¥ng tin Service v√† ƒë·ªãa ch·ªâ IP ho·∫∑c DNS ƒë∆∞·ª£c g√°n.

4. **Truy c·∫≠p Service:**
   - T√πy thu·ªôc v√†o lo·∫°i Service:
     - **LoadBalancer:** Truy c·∫≠p qua ƒë·ªãa ch·ªâ IP Â§

In [15]:
query = "Ho√†ng Sa v√† Tr∆∞·ªùng Sa l√† c·ªßa ai?"

print("THINKING...")
for chunk in chain.stream({"query": query,
                           "context": utils.from_documents_to_context(vector_store.similarity_search(query))}):
    if chunk.content.strip() == "</think>":
        print("\n\n\nGENERATED RESPONSE")
        continue

    print(chunk.content, end="", flush=True)

THINKING...
ƒê∆∞·ª£c r·ªìi, systems c·∫ßn ph√¢n t√≠ch c√¢u h·ªèi t·ª´ ng∆∞·ªùi d√πng v√† ng√¥n t·ª´ tr·∫£ l·ªùi ph√π h·ª£p. Kh√°ch h√†ng h·ªèi v·ªÅ ch·ªß quy·ªÅn Ho√†ng Sa v√† Tr∆∞·ªùng Sa, m·ªôt v·∫•n ƒë·ªÅ nh·∫°y c·∫£m v√† quan tr·ªçng ·ªü Vi·ªát Nam. Tuy nhi√™n, c√°c document cung c·∫•p ƒë·ªÅu li√™n quan ƒë·∫øn c√¥ng ngh·ªá v√† Kubernetes, kh√¥ng h·ªÅ nh·∫Øc ƒë·∫øn ƒë·ªãa l√Ω hay ch√≠nh tr·ªã. V√¨ v·∫≠y, t√¥i kh√¥ng th·ªÉ ƒë∆∞a ra c√¢u tr·∫£ l·ªùi c√≥ li√™n quan d·ª±a tr√™n c√°c th√¥ng tin ƒë√£ c√≥.ÏõîÎ∂ÄÌÑ∞ ph·∫£i y√™u c·∫ßu kh√°ch h√†ng cung c·∫•p th√™m th√¥ng tin ho·∫∑c chuy·ªÉn h∆∞·ªõng c√¢u h·ªèi sang ch·ªß ƒë·ªÅ li√™n quan ƒë·∫øn k·ªπ thu·∫≠t n·∫øu c√≥ th·ªÉ.



GENERATED RESPONSE


C√¢u h·ªèi c·ªßa b·∫°n kh√¥ng li√™n quan ƒë·∫øn c√°c t√†i li·ªáu ƒë∆∞·ª£c cung c·∫•p. Vui l√≤ng cung c·∫•p th√™m th√¥ng tin ho·∫∑c h·ªèi v·ªÅ c√°c ch·ªß ƒë·ªÅ k·ªπ thu·∫≠t li√™n quan ƒë·∫øn VNGCloud n·∫øu b·∫°n c·∫ßn h·ªó tr·ª£.

In [16]:
query = "T·∫°o cho t√¥i PVC 20Gb trong vks"

print("THINKING...")
for chunk in chain.stream({"query": query,
                           "context": utils.from_documents_to_context(vector_store.similarity_search(query))}):
    if chunk.content.strip() == "</think>":
        print("\n\n\nGENERATED RESPONSE")
        continue

    print(chunk.content, end="", flush=True)

THINKING...
ƒê∆∞·ª£c y√™u c·∫ßu t·∫°o PVC 20GB trong VKS. ƒê·∫ßu ti√™n xem x√©t c√°c t√†i li·ªáu√™tre cung c·∫•p.

Trong Document 1, c√≥ th√¥ng tin v·ªÅ vi·ªác n√¢ng c·∫•p PVC, cho ph√©p k√≠ch th∆∞·ªõc t·ªëi thi·ªÉu l√† 1GB. ƒêi·ªÅu n√†y ch·ª©ng t·ªè PVC 20GB l√† kh·∫£ thi.

Document 2 v√† Document 4 h∆∞·ªõng d·∫´n c√°ch t·∫°o PVC th√¥ng qua file YAML, bao g·ªìm vi·ªác th√™m tham s·ªë isPOC v√† c√°ch c·∫•u tr√∫c file YAML.

T·ª´ ƒë√≥, t√¥i bi·∫øt c·∫ßn t·∫°o file YAML v·ªõi storage class v√† k√≠ch th∆∞·ªõc 20GB, ƒë·ªìng th·ªùi th√™m tham s·ªë isPOC: "true" n·∫øu c·∫ßn s·ª≠ d·ª•ng v√≠ POC.

K·∫øt h·ª£p c√°c th√¥ng tin, so·∫°n th·∫£o file YAML ph√π h·ª£p v√† h∆∞·ªõng d·∫´n kh√°ch h√†ng √°p d·ª•ng.



GENERATED RESPONSE


ƒê·ªÉ t·∫°o Persistent Volume Claim (PVC) 20GB trong VNGCloud Kubernetes Service (VKS), b·∫°n c√≥ th·ªÉ th·ª±c hi·ªán theo c√°c b∆∞·ªõc sau ƒë√¢y:

### 1. T·∫°o file YAML cho PVC
T·∫°o m·ªôt file c√≥ t√™n `pvc-20gb.yaml` v·ªõi n·ªôi dung nh∆∞ sau:

```yaml
apiVersion: v1

In [17]:
query = "L√†m sao ƒë·ªÉ c·∫•u h√¨nh volume type khi d√πng pvc trong vks"

print("THINKING...")
for chunk in chain.stream({"query": query,
                           "context": utils.from_documents_to_context(vector_store.similarity_search(query))}):
    if chunk.content.strip() == "</think>":
        print("\n\n\nGENERATED RESPONSE")
        continue

    print(chunk.content, end="", flush=True)

THINKING...
ƒê∆∞·ª£c r·ªìi, t√¥i s·∫Øp x·ª≠ l√Ω c√¢u h·ªèi c·ªßa kh√°ch h√†ng v·ªÅ vi·ªác c·∫•u h√¨nh volume type khi d√πng PVC trong VKS. ƒê·∫ßu ti√™n, t√¥i c·∫ßn xem qua c√°c Document m√† kh√°ch h√†ng ƒë√£ cung c·∫•p ƒë·ªÉ t√¨m th√¥ng tin li√™n quan.

Trong Document 1, t√¥i th·∫•y c√≥ m·ªôt v√≠ d·ª• v·ªÅ vi·ªác t·∫°o PVC v√† Pod. ·ªû ph·∫ßn spec c·ªßa PVC, c√≥ tr∆∞·ªùng storageClassName ƒë∆∞·ª£c ch·ªâ ƒë·ªãnh l√† "my-expansion-storage-class". ƒêi·ªÅu n√†y √°m ch·ªâ r·∫±ng ƒë·ªÉ c·∫•u h√¨nh volume type, ch√∫ng ta c·∫ßn ƒë·ªãnh nghƒ©a StorageClass ph√π h·ª£p.

Document 4 nh·∫Øc ƒë·∫øn vi·ªác thay ƒë·ªïi m·∫∑c ƒë·ªãnh Storage Class t·ª´ IOPS 200 sang 3000. ƒêi·ªÅu n√†y cho th·∫•y r·∫±ng StorageClass ƒë∆∞·ª£c c·∫•u h√¨nh ƒë·ªÉÊåáÂÆö volume type d·ª±a tr√™n IOPS ho·∫∑c c√°c th√¥ng s·ªë kh√°c.

T·ª´ Document 3, t√¥i th·∫•y c√≥ m·ªôt ph·∫ßn v·ªÅ vi·ªác th√™m annotation v√†o PVC ƒë·ªÉ ch·ªâ ƒë·ªãnh volume type ID. ƒêi·ªÅu n√†y cho th·∫•y r·∫±ng ngo√†i vi·ªác ƒë·ªãnh nghƒ©a StorageClass, vi·ª