In [None]:
%pip install langchain pgvector psycopg2-binary tiktoken kubernetes

In [None]:
import os, json
from langchain.llms import HuggingFacePipeline
from langchain.llms import HuggingFaceTextGenInference
from langchain.document_loaders import TextLoader
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores.pgvector import PGVector

from kubernetes import client, config

In [None]:
# get the ip address of our Mistral 7B inference service
config.load_kube_config()
api_instance = client.CoreV1Api()

mistral_7b = api_instance.read_namespaced_service(name="mistral-7b-service", namespace="default")
mistral_7b_ip = mistral_7b.status.load_balancer.ingress[0].ip

In [None]:
# test our Mistral 7B interface with langchain
llm = HuggingFaceTextGenInference(
    inference_server_url=f"http://{mistral_7b_ip}",
    max_new_tokens=512,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
)
llm("[INST] You are a helpful, respectful and honest assistant who is an expert in explaining Kubernetes concepts. Always answer as helpfully as possible, while being safe and keep your responses less than 200 words. What is a deployment?[/INST]")

In [None]:
# Load list of URLs -> kubernetes.io/docs/concepts/
file1 = open('./data/k8s-urls.samples.txt', 'r')

loader = WebBaseLoader(file1.readlines())
documents = loader.load()

In [None]:
# Chunk all the kubernetes concept documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

print("%s chunks in %s pages" % (len(docs), len(documents)))

In [None]:
# Load sentence transformer embeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device":"cpu"} # use {"device":"cuda"} for distributed embeddings

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

In [None]:
# Connection string for connecting to Postgres
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver=os.environ.get("PGVECTOR_DRIVER", "psycopg2"),
    host=os.environ.get("PGVECTOR_HOST", "localhost"),
    port=int(os.environ.get("PGVECTOR_PORT", "5432")),
    database=os.environ.get("PGVECTOR_DATABASE", "postgres"),
    user=os.environ.get("PGVECTOR_USER", "postgres"),
    password=os.environ.get("PGVECTOR_PASSWORD", "secretpassword"),
)

In [None]:
import sqlalchemy

print(sqlalchemy.__version__)

In [None]:
COLLECTION_NAME = "k8s_concepts"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [None]:
# A better way with distributed jobs on Kubernetes - Queue Docs
! sh ./queue-documents.sh -f './data/k8s-urls.txt' -t k8s_concepts

In [None]:
# A better way with distributed jobs on Kubernetes
! sh ./deploy-indexer.sh

In [None]:
query = "What's a deployment?" # "Should I use gateway API in my app?"
docs = db.similarity_search(query)
print(f"Query: {query}")
print(f"Retrieved documents: {len(docs)}")
for doc in docs:
    doc_details = doc.to_json()['kwargs']
    print("Source: ", doc_details['metadata']['source'])
    print("Text: ", doc_details['page_content'], "\n")

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """[INST] You are a helpful, respectful and honest assistant who is an expert in explaining Kubernetes concepts. Always answer as helpfully as possible, while being safe.
        Use the following pieces of context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
        
        {context}

        Question: {question}
        Answer:[/INST]"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

In [None]:
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=True
)

In [None]:
result = qa.run("When should I use the Gateway API?")
print(result)