# A demo chatbot for return policy

In [1]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

from IPython.display import display, HTML
import json
import time
import pathlib
import os

### Consume information in the return policy documents

In [2]:
# define what documents to load
# in the demo here, we have 4 textfiles that store the return policy namely "return1_en.txt", "return2_en.txt", "return3_en.txt", "return4_en.txt"
loader = DirectoryLoader("./", glob="*.txt", loader_cls=TextLoader)

# interpret information in the documents
documents = loader.load()
splitter = RecursiveCharacterTextSplitter()
texts = splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})

# create and save the local vector database
db = FAISS.from_documents(texts, embeddings)
db.save_local("faiss")

### Prepare a LLM that knows about our return policy documents

In [3]:
# prepare the template we will use when prompting the AI
template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""

# load the language model
os.environ["TOKENIZERS_PARALLELISM"] = "false"
config = {'max_new_tokens': 2048, 'context_length': 4096, 'temperature': 0.01}
llm = CTransformers(model='llama-2-7b-chat.ggmlv3.q8_0.bin',
                    model_type='llama', config=config)

# load the interpreted information from the local database
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})
db = FAISS.load_local("faiss", embeddings)

# prepare a version of the llm pre-loaded with the local content
retriever = db.as_retriever(search_kwargs={'k': 2})
prompt = PromptTemplate(
    template=template,
    input_variables=['context', 'question'])

QA_LLM = RetrievalQA.from_chain_type(llm=llm,
                                     chain_type='stuff',
                                     retriever=retriever,
                                     return_source_documents=True,
                                     chain_type_kwargs={'prompt': prompt})

## Ask Questions

In [4]:
def query(model, question):
    model_path = model.combine_documents_chain.llm_chain.llm.model
    model_name = pathlib.Path(model_path).name
    time_start = time.time()
    output = model.invoke({'query': question})
    response = output["result"]
    time_elapsed = time.time() - time_start
    display(HTML(f'<strong>Question:</strong> {question}'))
    display(HTML(f'<strong>Answer:</strong> {response}'))

In [5]:
query(QA_LLM, "i want to return a ring")

In [6]:
query(QA_LLM, "within 5 days?")

In [13]:
query(QA_LLM, "money back?")