In [1]:
%pip install langchain

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
from langchain.llms import HuggingFacePipeline
from langchain.llms import HuggingFaceTextGenInference
from langchain.document_loaders import TextLoader
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores.pgvector import PGVector

In [6]:
# test our Llama-2 interface with langchain

llm = HuggingFaceTextGenInference(
    inference_server_url="http://mistral-7b.broyal.demo/",
    max_new_tokens=512,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
)
llm("[INST] You are a helpful, respectful and honest assistant who is an expert in explaining Kubernetes concepts. Always answer as helpfully as possible, while being safe and keep your responses less than 200 words. What is a deployment?[/INST]")

' A deployment in Kubernetes is a way to manage the lifecycle of a set of replicas of a pod. It allows you to specify the desired state of the deployment, including the number of replicas, the desired CPU and memory resources, and the desired image version. Kubernetes will then automatically manage the deployment by scaling up or down the number of replicas as needed to meet the desired state. Deployments also provide rolling updates, which means that new versions of the pods are rolled out gradually to minimize downtime.'

In [19]:
# Load list of URLs -> kubernetes.io/docs/concepts/
file1 = open('./data/k8s-urls.samples.txt', 'r')

loader = WebBaseLoader(file1.readlines())
documents = loader.load()

In [21]:
# Chunk all the kubernetes concept documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

print("%s chunks in %s pages" % (len(docs), len(documents)))

143 chunks in 3 pages


In [22]:
# Load sentence transformer embeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device":"cpu"} # use {"device":"cuda"} for distributed embeddings

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

In [23]:
# Connection string for connecting to Postgres
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver=os.environ.get("PGVECTOR_DRIVER", "psycopg2"),
    host=os.environ.get("PGVECTOR_HOST", "localhost"),
    port=int(os.environ.get("PGVECTOR_PORT", "5432")),
    database=os.environ.get("PGVECTOR_DATABASE", "postgres"),
    user=os.environ.get("PGVECTOR_USER", "postgres"),
    password=os.environ.get("PGVECTOR_PASSWORD", "secretpassword"),
)

In [24]:
COLLECTION_NAME = "kubernetes_concepts"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [None]:
# A better way with distributed jobs on Kubernetes


In [27]:
query = "Should I use gateway API in my app?" # "Should I use gateway API in my app?"
docs = db.similarity_search(query)
print(f"Query: {query}")
print(f"Retrieved documents: {len(docs)}")
for doc in docs:
    doc_details = doc.to_json()['kwargs']
    print("Source: ", doc_details['metadata']['source'])
    print("Text: ", doc_details['page_content'], "\n")

Query: Should I use gateway API in my app?
Retrieved documents: 4
Source:  https://kubernetes.io/docs/concepts/services-networking/service/

Text:  cluster. An Ingress lets you consolidate your routing rules into a single resource, so
that you can expose multiple components of your workload, running separately in your
cluster, behind a single listener.The Gateway API for Kubernetes
provides extra capabilities beyond Ingress and Service. You can add Gateway to your cluster -
it is a family of extension APIs, implemented using
CustomResourceDefinitions -
and then use these to configure access to network services that are running in your cluster.Cloud-native service discoveryIf you're able to use Kubernetes APIs for service discovery in your application,
you can query the API server
for matching EndpointSlices. Kubernetes updates the EndpointSlices for a Service
whenever the set of Pods in a Service changes.For non-native applications, Kubernetes offers ways to place a network port or loa

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """[INST] You are a helpful, respectful and honest assistant who is an expert in explaining Kubernetes concepts. Always answer as helpfully as possible, while being safe.
        Use the following pieces of context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
        
        {context}

        Question: {question}
        Answer:[/INST]"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

In [None]:
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=True
)

In [None]:
result = qa.run("When should I use the Gateway API?")
print(result)

In [None]:
query = "What is the nation economic status? Summarize. Keep it under 200 words."
test_rag(qa, query)