# QnA evaluation

In [1]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ["OPENAI_API_KEY"]

## QnA

In [2]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [3]:
# Supporting data
file = "OutdoorClothingCatalog_1000.csv"
loader = CSVLoader(file_path=file)
docs = loader.load()

# Index and retriever
index = VectorstoreIndexCreator(
    embedding=OpenAIEmbeddings(),
    vectorstore_cls=DocArrayInMemorySearch,
).from_loaders([loader])
retriever = index.vectorstore.as_retriever()

# QnA
llm = ChatOpenAI()
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)



## Evaluation

In [4]:
# Hard-coded examples
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set have side pockets?",
        "answer": "Yes",
    },
    {
        "query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection",
    },
]

# LLM-generated examples
from langchain.evaluation.qa import QAGenerateChain, QAEvalChain

def get_query_and_answers(generated_qa):  # output parser
    return generated_qa["qa_pairs"]

chain = QAGenerateChain.from_llm(llm) | get_query_and_answers
new_examples = chain.batch([{"doc": doc} for doc in docs[:5]])

examples += new_examples

In [5]:
# Manuel evaluation
# import langchain        # it is possible to use the following to print all the necessary
# langchain.debug = True  # information regarding the execution of the chain while running

print("AI:", qa.invoke(examples[0]["query"])["result"])
print("Me:", examples[0]["answer"])

AI: Yes, the Cozy Comfort Pullover Set does have side pockets.
Me: Yes


In [6]:
# LLM-assisted eval
predictions = [qa.invoke(example) for example in examples]
eval_chain = QAEvalChain.from_llm(llm)
graded_outputs = eval_chain.evaluate(examples, predictions)

for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print(f"{'Question ':-<20} {predictions[i]['query']}")
    print(f"{'Real Answer ':-<20} {predictions[i]['answer']}")
    print(f"{'Predicted Answer ':-<20} {predictions[i]['result']}")
    print(f"{'Predicted Grade ':-<20} {graded_outputs[i]['results']}")
    print()

Example 0:
Question ----------- Do the Cozy Comfort Pullover Set have side pockets?
Real Answer -------- Yes
Predicted Answer --- The Cozy Comfort Pullover Set does not mention having side pockets in the provided description.
Predicted Grade ---- INCORRECT

Example 1:
Question ----------- What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?
Real Answer -------- The DownTek collection
Predicted Answer --- The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade ---- CORRECT

Example 2:
Question ----------- What are the key features of the Women's Campside Oxfords as described in the document?
Real Answer -------- The key features of the Women's Campside Oxfords include a super-soft canvas material for a broken-in feel and look, a comfortable EVA innersole with Cleansport NXT® antimicrobial odor control, a vintage hunt, fish, and camping motif on the innersole, a moderate arch contour of the innersole, an EVA foam midsole for cushi