# Provision and Import Libraries

In [1]:
# Install all relevant libraries
#pip install langchain
#pip install unstructured
#pip install "unstructured[pdf]"
#pip install pypdf
#pip install tiktoken
#pip install chromadb
#pip install openai
#pip install sentence_transformers
#pip install langkit

In [2]:
import numpy
numpy.version.version

'1.22.4'

In [3]:
#pip install numpy==1.23.0

In [4]:
#pip install --user chromadb==0.3.29

In [5]:
import os
os.environ["OPENAI_API_KEY"] = ''
openai_key=os.environ.get('OPENAI_API_KEY')

In [6]:
# Import relevant Libraries
import os
import openai
import tiktoken
import chromadb
import pandas as pd

from langchain.document_loaders import OnlinePDFLoader, UnstructuredPDFLoader, PyPDFLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from chromadb.utils import embedding_functions
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

# Load your Knowledge Base Documents

In [7]:
from langchain.document_loaders import DirectoryLoader

directory = ''

def load_docs(directory):
  loader = DirectoryLoader(directory)
  documents = loader.load()
  return documents

documents = load_docs(directory)
len(documents)

2

# Chunk and Embed into a Vector DB

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents,chunk_size=1000,chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

docs = split_docs(documents)
print(len(docs))

108


In [9]:
from langchain.embeddings import SentenceTransformerEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)

In [10]:
persist_directory = "chroma_db"

vectordb = Chroma.from_documents(
    documents=docs, embedding=embeddings, persist_directory=persist_directory
)

vectordb.persist()

# Create Conversational Agent

In [11]:
memory = ConversationBufferMemory(memory_key="chat_history", output_key='answer',return_messages=True)
chatQA = ConversationalRetrievalChain.from_llm(
            OpenAI(openai_api_key=openai_key,
               temperature=0, model_name="gpt-3.5-turbo"), 
            vectordb.as_retriever(search_kwargs={"additional": ["vector", "certainty", "id"]}), 
            return_source_documents=True,
            #verbose=True,
            #callbacks=[whylabs],
            memory=memory)



# Install LLM Monitoring

In [12]:
#Install monitoring
from langkit import llm_metrics
import whylogs as why

why.init(session_type='whylabs_anonymous')
schema = llm_metrics.init()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\frede\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Initialized anonymous session with id session-mZfqt6N2 in config C:\Users\frede\AppData\Local\whylogs\whylogs\config.ini


In [13]:
#del prompt_and_response

prompt_and_response = {
      "prompt": [],
      "response": []
  }


# Start Interrogating the Knowledge Base

In [14]:
chat_history = []
qry = ""
while qry != 'done':
    qry = input('Question: ')
    if qry != exit:
        response = chatQA({"question": qry, "chat_history": chat_history})
        #print(response["answer"])
        sources = [doc.metadata.get("source") for doc in response["source_documents"]]
        chunk = [doc.metadata.get("chunk_id") for doc in response["source_documents"]]
        content=f"ANSWER: {response['answer']}, SOURCES: {set(sources)}, CHUNK: {set(chunk)}"
               
        print(content)
        
        #Persist prompt and result in a dictionary
        prompt_and_response["prompt"].append(qry)
        prompt_and_response["response"].append(response["answer"])

Question: What are the dimensions and volume of the refrigerator??
ANSWER: The dimensions and volume of the refrigerator are as follows:
- Height With Hinge: 70"
- Height Without Hinge: 68 3/10"
- Width: 36"
- Width of Cabinet: 35 6/10"
- Depth of Cabinet: 28 1/2"
- Depth With Door: 33 3/10"
- Depth With Door 90° Open: 48 1/2"
- Depth With Door and Handle: 35 7/10"
- Freezer Capacity: 9 Cu. Ft.
- Fresh Food Capacity: 18.8 Cu. Ft.
- Total Capacity: 27.8 Cu. Ft., SOURCES: {'C:\\Machine Learning\\Knowledge Base\\FRFS2823A_EN.pdf'}, CHUNK: {None}
Question: How do you remove the refrigerator doors?
ANSWER: To remove the refrigerator doors, you need to follow these steps:
1. Remove the three screws from both hinge covers on the left and right top of the cabinet.
2. After removing the hinge covers, disconnect any harnesses between the cabinet and doors by grasping both sides of the connector firmly, depress the latch, and pull apart.
3. Remove the bulkhead cover at the rear of the unit to acc

# Review Monitoring Results

In [15]:
results = why.log(pd.DataFrame(prompt_and_response),name="prompt_and_responses", schema=schema)

✅ Aggregated 3 rows into profile 'prompt_and_responses'

Visualize and explore this profile with one-click
🔍 https://hub.whylabsapp.com/resources/model-1/profiles?profile=ref-uNpnrzzzgI9fu0pj&sessionToken=session-mZfqt6N2
