In [None]:
%pip install langchain pgvector psycopg2-binary tiktoken kubernetes

In [16]:
import os, json
from langchain.llms import HuggingFacePipeline
from langchain.llms import HuggingFaceTextGenInference
from langchain.document_loaders import TextLoader
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores.pgvector import PGVector

from kubernetes import client, config

In [17]:
# get the ip address of our Mistral 7B inference service
config.load_kube_config()
api_instance = client.CoreV1Api()

mistral_7b = api_instance.read_namespaced_service(name="mistral-7b-service", namespace="default")
mistral_7b_ip = mistral_7b.status.load_balancer.ingress[0].ip

In [30]:
# test our Mistral 7B interface with langchain
llm = HuggingFaceTextGenInference(
    inference_server_url=f"http://{mistral_7b_ip}",
    max_new_tokens=400,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
)
llm("[INST] You are a helpful, respectful and honest assistant who is an expert in explaining Kubernetes concepts. Always answer as helpfully as possible, while being safe and keep your responses less than 200 words. What is a deployment?[/INST]")

' A deployment in Kubernetes is a way to manage the lifecycle of a set of replicas of a pod. It allows you to specify the desired state of the deployment, including the number of replicas, the desired CPU and memory resources, and the container image to use. Kubernetes will then automatically manage the deployment, scaling it up or down as needed to meet the desired state. Deployments also provide rolling updates, which means that new versions of the pods are introduced gradually, with each new version replacing one of the old ones. This ensures that the application remains available during the update process.'

In [19]:
# Load list of URLs -> kubernetes.io/docs/concepts/
file1 = open('./data/k8s-urls.samples.txt', 'r')

loader = WebBaseLoader(file1.readlines())
documents = loader.load()

In [32]:
# Chunk all the kubernetes concept documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

print("%s chunks in %s pages" % (len(docs), len(documents)))
docs[0:3]

127 chunks in 2 pages


[Document(page_content='Workload Resources | Kubernetes\nWorkload Resources | KubernetesDocumentationKubernetes BlogTrainingPartnersCommunityCase StudiesVersionsRelease Information\nv1.29\nv1.28\nv1.27\nv1.26\nv1.25English中文 (Chinese)\n한국어 (Korean)\n日本語 (Japanese)\nFrançais (French)\nEspañol (Spanish)\nPortuguês (Portuguese)\nBahasa Indonesia\nУкраїнська (Ukrainian)Changes to the location of Linux packages for KubernetesThe legacy Linux package repositories (apt.kubernetes.io and yum.kubernetes.io AKA packages.cloud.google.com)have been frozen starting from September 13, 2023 and are going away in January 2024, users must migrate.Please read our announcement for more details.', metadata={'source': 'https://kubernetes.io/docs/concepts/workloads/controllers/\n', 'title': 'Workload Resources | Kubernetes', 'description': 'Kubernetes provides several built-in APIs for declarative management of your workloads and the components of those workloads.\nUltimately, your applications run as conta

In [33]:
# Load sentence transformer embeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device":"cpu"} # use {"device":"cuda"} for distributed embeddings

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

In [34]:
# Connection string for connecting to Postgres
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver=os.environ.get("PGVECTOR_DRIVER", "psycopg2"),
    host=os.environ.get("PGVECTOR_HOST", "localhost"),
    port=int(os.environ.get("PGVECTOR_PORT", "5432")),
    database=os.environ.get("PGVECTOR_DATABASE", "postgres"),
    user=os.environ.get("PGVECTOR_USER", "postgres"),
    password=os.environ.get("PGVECTOR_PASSWORD", "secretpassword"),
)

In [35]:
COLLECTION_NAME = "k8s_concepts"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

KeyboardInterrupt: 

In [24]:
# A better way with distributed jobs on Kubernetes - Queue Docs
! sh ./queue-documents.sh -f './data/k8s-urls.txt' -t k8s_concepts

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


queuing documents for distributed processing...
---
File Path: ./data/k8s-urls.txt
PubSub Topic: k8s_concepts
---
publishing https://kubernetes.io/docs/concepts/
messageIds:
- '9147050936068804'
publishing https://kubernetes.io/docs/concepts/overview/
messageIds:
- '10069133404682763'
publishing https://kubernetes.io/docs/concepts/overview/working-with-objects/
messageIds:
- '10068925523279567'
publishing https://kubernetes.io/docs/concepts/overview/working-with-objects/object-management/
messageIds:
- '10069271562111716'
publishing https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
messageIds:
- '10069534771233752'
publishing https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
messageIds:
- '10069423309278963'
publishing https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
messageIds:
- '10069183186615624'
publishing https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
messageIds:
- '1006981

In [None]:
# A better way with distributed jobs on Kubernetes
! sh ./deploy-indexer.sh

In [25]:
query = "What's a deployment?" # "Should I use gateway API in my app?"
docs = db.similarity_search(query)
print(f"Query: {query}")
print(f"Retrieved documents: {len(docs)}")
for doc in docs:
    doc_details = doc.to_json()['kwargs']
    print("Source: ", doc_details['metadata']['source'])
    print("Text: ", doc_details['page_content'], "\n")

Query: What's a deployment?
Retrieved documents: 4
Source:  https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
Text:  Create an issue
 Print entire sectionUse CaseCreating a DeploymentPod-template-hash labelUpdating a DeploymentRollover (aka multiple updates in-flight)Label selector updatesRolling Back a DeploymentChecking Rollout History of a DeploymentRolling Back to a Previous RevisionScaling a DeploymentProportional scalingPausing and Resuming a rollout of a DeploymentDeployment statusProgressing DeploymentComplete DeploymentFailed DeploymentOperating on a failed deploymentClean up PolicyCanary DeploymentWriting a Deployment SpecPod TemplateReplicasSelectorStrategyProgress Deadline SecondsMin Ready SecondsRevision History LimitPausedWhat's nextDocumentation
Blog
Training
Partners
Community 

Source:  https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
Text:  Create an issue
 Print entire sectionUse CaseCreating a DeploymentPod-template-hash labe

In [39]:
from langchain.prompts import PromptTemplate
prompt_template = """[INST] You are a helpful, respectful and honest assistant who is an expert in explaining Kubernetes concepts. Always answer as helpfully as possible, while being safe.
        Use the following pieces of context to answer the question.
        
        {context}

        Question: {question}
        Answer:[/INST]"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

In [41]:
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=True
)

In [44]:
result = qa.run("What is a deployment")
print(result)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 A deployment in Kubernetes is an object that manages a set of pods to run an application workload. It provides declarative updates for pods, meaning that you define the desired state of your application and Kubernetes will take care of updating the pods to match that state.

A deployment can be used to manage a set of pods that run a single application, or it can be used to manage a set of pods that run multiple instances of the same application.

When you create a deployment, you can specify the number of replicas (instances) of the pods that should be created, as well as the label selector that should be used to identify the pods. You can also specify the strategy for updating the pods, such as rolling updates or blue-green deployments.

You can use the `kubectl` command-line tool to create, update, and delete deployments in your Kubernetes cluster. You can also use the Kubernetes dashboard to manage deployment