#### Question Answering and building Cache/KB
This notebook shows the pattern to persist the information from the question answering system to a cache or KB. This is useful when the question answering system is slow and we want to avoid calling it for every question. The cache can be built offline and then used for answering questions. This notebook shows how to build the cache and how to use it for answering questions.

##### Set Environment Variables

In [1]:
import os  
import json  
import openai
from Utilities.envVars import *

# Set Search Service endpoint, index name, and API key from environment variables
indexName = SearchIndex

# Set OpenAI API key and endpoint
openai.api_type = "azure"
openai.api_version = OpenAiVersion
openai_api_key = OpenAiKey
assert openai_api_key, "ERROR: Azure OpenAI Key is missing"
openai.api_key = openai_api_key
openAiEndPoint = f"https://{OpenAiService}.openai.azure.com"
assert openAiEndPoint, "ERROR: Azure OpenAI Endpoint is missing"
assert "openai.azure.com" in openAiEndPoint.lower(), "ERROR: Azure OpenAI Endpoint should be in the form: \n\n\t<your unique endpoint identifier>.openai.azure.com"
openai.api_base = openAiEndPoint
davincimodel = OpenAiDavinci

In [2]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from Utilities.cogSearch import performCogSearch, generateEmbeddings, performCogVectorSearch, performKbCogVectorSearch, indexDocs
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from IPython.display import display, HTML
from langchain.chains.question_answering import load_qa_chain

embeddingModelType = "azureopenai"
temperature = 0.3
tokenLength = 1000

if (embeddingModelType == 'azureopenai'):
        openai.api_type = "azure"
        openai.api_key = OpenAiKey
        openai.api_version = OpenAiVersion
        openai.api_base = f"https://{OpenAiService}.openai.azure.com"

        llm = AzureChatOpenAI(
                openai_api_base=openai.api_base,
                openai_api_version=OpenAiVersion,
                deployment_name=OpenAiChat,
                temperature=temperature,
                openai_api_key=OpenAiKey,
                openai_api_type="azure",
                max_tokens=tokenLength)
        embeddings = OpenAIEmbeddings(model=OpenAiEmbedding, chunk_size=1, openai_api_key=OpenAiKey)
        logging.info("LLM Setup done")
elif embeddingModelType == "openai":
        openai.api_type = "open_ai"
        openai.api_base = "https://api.openai.com/v1"
        openai.api_version = '2020-11-07' 
        openai.api_key = OpenAiApiKey
        llm = ChatOpenAI(temperature=temperature,
        openai_api_key=OpenAiApiKey,
        model_name="gpt-3.5-turbo",
        max_tokens=tokenLength)
        embeddings = OpenAIEmbeddings(openai_api_key=OpenAiApiKey)

In [3]:
# We already created our index and loaded the data, so we can skip that part. Let's try to ask a question:
# Question answering involves fetching multiple documents, and then asking a question of them. 
# The LLM response will contain the answer to your question, based on the content of the documents.
# The simplest way of using Langchain and LLM is to use load_qa_chain and run it with a query and a list of documents.
overrideChain = "stuff"

In [4]:
if (overrideChain == "stuff"):
    template = """
    Given the following extracted parts of a long document and a question, create a final answer. 
    If you don't know the answer, just say that you don't know. Don't try to make up an answer. 
    If the answer is not contained within the text below, say \"I don't know\".

    QUESTION: {question}
    =========
    {summaries}
    =========
    """
    #qaPrompt = load_prompt('lc://prompts/qa_with_sources/stuff/basic.json')
    qaPrompt = PromptTemplate(template=template, input_variables=["summaries", "question"])
    #qaChain = load_qa_chain(llm, chain_type=overrideChain, prompt=qaPrompt)
    qaChain = load_qa_with_sources_chain(llm, chain_type=overrideChain, prompt=qaPrompt)

    # followupTemplate = """
    # Perform the following steps in a consecutive order Step 1, Step 2, Step 3, and Step 4. 
    # Step 1 Generate 10 questions based on the {context}?. 
    # Step 2 – Generate 5 more questions about "{context}" that do not repeat the above. 
    # Step 3 – Generate 5 more questions about "{context}" that do not repeat the above. 
    # Step 4 – Based on the above Steps 1,2,3 suggest a final list of questions avoiding duplicates or 
    # semantically similar questions.
    # Use double angle brackets to reference the questions, e.g. <>.
    # ALWAYS return a "NEXT QUESTIONS" part in your answer.
    # """
    followupTemplate = """
    Generate three very brief follow-up questions that the user would likely ask next.
    Use double angle brackets to reference the questions, e.g. <>.
    Try not to repeat questions that have already been asked.

    Return the questions in the following format:
    <>
    <>
    <>

    ALWAYS return a "NEXT QUESTIONS" part in your answer.

    =========
    {context}
    =========

    """
    followupPrompt = PromptTemplate(template=followupTemplate, input_variables=["context"])
    followupChain = load_qa_chain(llm, chain_type=overrideChain, prompt=followupPrompt)
elif (overrideChain == "map_rerank"):
    outputParser = RegexParser(
        regex=r"(.*?)\nScore: (.*)",
        output_keys=["answer", "score"],
    )

    promptTemplate = """
    
    Use the following pieces of context to answer the question. If you don't know the answer, just say that you don't know, don't try to make up an answer.

    In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:

    Question: [question here]
    [answer here]
    Score: [score between 0 and 100]

    Begin!

    Context:
    ---------
    {summaries}
    ---------
    Question: {question}

    """
    qaPrompt = PromptTemplate(template=promptTemplate,input_variables=["summaries", "question"],
                                output_parser=outputParser)
    qaChain = load_qa_with_sources_chain(llm, chain_type=chainType,
                                prompt=qaPrompt)

    followupTemplate = """
    Generate three very brief follow-up questions that the user would likely ask next.
    Use double angle brackets to reference the questions, e.g. <>.
    Try not to repeat questions that have already been asked.

    ALWAYS return a "NEXT QUESTIONS" part in your answer.

    =========
    {context}
    =========

    """
    followupPrompt = PromptTemplate(template=followupTemplate, input_variables=["context"])
    followupChain = load_qa_chain(llm, chain_type='stuff', prompt=followupPrompt)
elif (overrideChain == "map_reduce"):

    qaTemplate = """Use the following portion of a long document to see if any of the text is relevant to answer the question.
    Return any relevant text.
    {context}
    Question: {question}
    Relevant text, if any :"""

    qaPrompt = PromptTemplate(
        template=qaTemplate, input_variables=["context", "question"]
    )

    combinePromptTemplate = """
        Given the following extracted parts of a long document and a question, create a final answer. 
        If you don't know the answer, just say that you don't know. Don't try to make up an answer. 
        If the answer is not contained within the text below, say \"I don't know\".

        QUESTION: {question}
        =========
        {summaries}
        =========
        """
    combinePrompt = PromptTemplate(
        template=combinePromptTemplate, input_variables=["summaries", "question"]
    )

    

    #qaChain = load_qa_chain(llm, chain_type=overrideChain, question_prompt=qaPrompt, combine_prompt=combinePrompt)
    qaChain = load_qa_with_sources_chain(llm, chain_type=overrideChain, question_prompt=qaPrompt, combine_prompt=combinePrompt)
    
    followupTemplate = """
    Generate three very brief follow-up questions that the user would likely ask next.
    Use double angle brackets to reference the questions, e.g. <>.
    Try not to repeat questions that have already been asked.

    Return the questions in the following format:
    <>
    <>
    <>

    ALWAYS return a "NEXT QUESTIONS" part in your answer.

    =========
    {context}
    =========

    """
    followupPrompt = PromptTemplate(template=followupTemplate, input_variables=["context"])
    followupChain = load_qa_chain(llm, chain_type='stuff', prompt=followupPrompt)
elif (overrideChain == "refine"):
    refineTemplate = (
        "The original question is as follows: {question}\n"
        "We have provided an existing answer, including sources: {existing_answer}\n"
        "We have the opportunity to refine the existing answer"
        "(only if needed) with some more context below.\n"
        "------------\n"
        "{context_str}\n"
        "------------\n"
        "Given the new context, refine the original answer to better "
        "If you do update it, please update the sources as well. "
        "If the context isn't useful, return the original answer."
    )
    refinePrompt = PromptTemplate(
        input_variables=["question", "existing_answer", "context_str"],
        template=refineTemplate,
    )

    qaTemplate = """
        Given the following extracted parts of a long document and a question, create a final answer. 
        If you don't know the answer, just say that you don't know. Don't try to make up an answer. 
        If the answer is not contained within the text below, say \"I don't know\".

        QUESTION: {question}
        =========
        {context_str}
        =========
        """
    qaPrompt = PromptTemplate(
        input_variables=["context_str", "question"], template=qaTemplate
    )
    qaChain = load_qa_with_sources_chain(llm, chain_type=overrideChain, question_prompt=qaPrompt, refine_prompt=refinePrompt)

    
    followupTemplate = """
    Generate three very brief follow-up questions that the user would likely ask next.
    Use double angle brackets to reference the questions, e.g. <>.
    Try not to repeat questions that have already been asked.

    Return the questions in the following format:
    <>
    <>
    <>
    
    ALWAYS return a "NEXT QUESTIONS" part in your answer.

    =========
    {context}
    =========

    """
    followupPrompt = PromptTemplate(template=followupTemplate, input_variables=["context"])
    followupChain = load_qa_chain(llm, chain_type='stuff', prompt=followupPrompt)

In [17]:
# Since we already index our document, we can perform the search on the query to retrieve "TopK" documents
topK = 3
question = "What is ADF"
vectorQuestion = generateEmbeddings(OpenAiService, OpenAiKey, OpenAiVersion, OpenAiApiKey, embeddingModelType, question)
indexType = 'cogsearchvs'
kbIndexName = 'aoaikb'

# Let's perform the search on the KB first before asking the question to the model
kbSearch = performKbCogVectorSearch(vectorQuestion, 'vectorQuestion', SearchService, SearchKey, indexType, indexName, kbIndexName, 1, ["id", "question", "indexType", "indexName", "answer"])

Search index aoaikb already exists


In [18]:
print(kbSearch.get_count())

1


In [19]:
def callLlm():
    import uuid

    # Call LLM to answer the question
    r = performCogVectorSearch(vectorQuestion, 'contentVector', SearchService, SearchKey, indexName, topK)
    if r.get_count() == 0:
        docs = [Document(page_content="No results found")]
    else :
        docs = [
            Document(page_content=doc['content'], metadata={"id": doc['id'], "source": doc['sourcefile']})
            for doc in r
            ]
    rawDocs=[]
    for doc in docs:
        rawDocs.append(doc.page_content)

    answer = qaChain({"input_documents": docs, "question": question}, return_only_outputs=True)
    answer = answer['output_text'].replace("Answer: ", '').replace("Sources:", 'SOURCES:').replace("Next Questions:", 'NEXT QUESTIONS:')
    modifiedAnswer = answer

    if overrideChain == "stuff" or overrideChain == "map_rerank":
        thoughtPrompt = qaPrompt.format(question=question, summaries=rawDocs)
    elif overrideChain == "map_reduce":
        thoughtPrompt = qaPrompt.format(question=question, context=rawDocs)
    elif overrideChain == "refine":
        thoughtPrompt = qaPrompt.format(question=question, context_str=rawDocs)


    # Followup questions
    followupAnswer = followupChain({"input_documents": docs, "question": question}, return_only_outputs=True)
    nextQuestions = followupAnswer['output_text'].replace("Answer: ", '').replace("Sources:", 'SOURCES:').replace("Next Questions:", 'NEXT QUESTIONS:').replace('NEXT QUESTIONS:', '').replace('NEXT QUESTIONS', '')
    sources = ''                
    if (modifiedAnswer.find("I don't know") >= 0):
        sources = ''
        nextQuestions = ''
    else:
        sources = sources + "\n" + docs[0].metadata['source']

    outputData = []
    outputData.append({"data_points": rawDocs, "answer": modifiedAnswer, 
            "thoughts": f"<br><br>Prompt:<br>" + thoughtPrompt.replace('\n', '<br>'),
                "sources": sources, "nextQuestions": nextQuestions, "error": ""})
    
    # Now that we got the answer, let's save it to the KB

    kbData = []
    id = str(uuid.uuid4())
    kbData.append({
        "id": id,
        "question": question,
        "indexType": indexType,
        "indexName": indexName,
        "vectorQuestion": vectorQuestion,
        "answer": str({"data_points": rawDocs, "answer": modifiedAnswer, 
            "thoughts": f"<br><br>Prompt:<br>" + thoughtPrompt.replace('\n', '<br>'),
                "sources": sources, "nextQuestions": nextQuestions, "error": ""}),
    })

    indexDocs(SearchService, SearchKey, kbIndexName, kbData)
    print(outputData)
    print(vectorQuestion)
    print(kbData)

In [20]:
if kbSearch.get_count() > 0:
    for s in kbSearch:
        if s['@search.score'] >= 0.95:
            print(s['answer'])
        else:
            callLlm()
else:
    callLlm()

