# Langchain: Evaluation

- Example QA generation chain
- Manual evaluation
- LLM-assisted evaluation
- Langchain evaluation platform

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

## QnA Application

In [12]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch

In [10]:
fpath = "../data/OutdoorClothingCatalog_1000.csv"
loader = CSVLoader(fpath)
docs = loader.load()

In [13]:
embedding = OpenAIEmbeddings()

In [15]:
db = DocArrayInMemorySearch.from_documents(
    documents=docs,
    embedding=embedding
)

In [17]:
retriever = db.as_retriever()

In [18]:
llm = ChatOpenAI(temperature=0.0, model=llm_model)

In [19]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>"
    }
)

### Hardcoded examples

In [20]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

### LLM-Generated examples

In [22]:
from langchain.evaluation.qa import QAGenerateChain

In [23]:
llm = ChatOpenAI(model=llm_model)
example_gen_chain = QAGenerateChain.from_llm(llm)

In [25]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in docs[:5]]
)

In [26]:
new_examples

[{'query': "What is the weight of each pair of Women's Campside Oxfords?",
  'answer': "The approximate weight of each pair of Women's Campside Oxfords is 1 lb. 1 oz."},
 {'query': 'What is the construction material used to make the Recycled Waterhog dog mat?',
  'answer': 'The Recycled Waterhog dog mat is made of 24 oz. polyester fabric, which is constructed from 94% recycled materials, and has a rubber backing.'},
 {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?",
  'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. The four-way-stretch and chlorine-resistant fabric keeps its shape and resists snags. The UPF 50+ rated fabric provides the highest rated sun protection possible, blocking 98% of the sun's harmful rays. The crossover no-slip straps and fully lined bottom ensure a secure fit and maximum coverage. It can be machine washed and line dried for best results."},
 {'query': 'What is the fabric composition

In [27]:
examples += new_examples

## Manual Evaluation

In [28]:
import langchain
langchain.debug = True

In [29]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set        have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "context": ": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\

'The Cozy Comfort Pullover Set, Stripe does have side pockets.'

In [30]:
langchain.debug = False

## LLM Assisted Evaluation

In [31]:
predictions = qa.apply(examples)

In [32]:
predictions[0]

{'query': 'Do the Cozy Comfort Pullover Set        have side pockets?',
 'answer': 'Yes',
 'result': 'The Cozy Comfort Pullover Set, Stripe does have side pockets.'}

In [33]:
from langchain.evaluation.qa import QAEvalChain

In [35]:
llm = ChatOpenAI(temperature=0, model=llm_model)
eval_chain = QAEvalChain.from_llm(llm)

In [36]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [37]:
graded_outputs

[{'text': 'CORRECT'},
 {'text': 'CORRECT'},
 {'text': 'CORRECT'},
 {'text': 'CORRECT'},
 {'text': 'CORRECT'},
 {'text': 'CORRECT'},
 {'text': 'CORRECT'}]

In [38]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['text'])
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set        have side pockets?
Real Answer: Yes
Predicted Answer: The Cozy Comfort Pullover Set, Stripe does have side pockets.
Predicted Grade: CORRECT

Example 1:
Question: What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade: CORRECT

Example 2:
Question: What is the weight of each pair of Women's Campside Oxfords?
Real Answer: The approximate weight of each pair of Women's Campside Oxfords is 1 lb. 1 oz.
Predicted Answer: The weight of each pair of Women's Campside Oxfords is approximately 1 lb. 1 oz.
Predicted Grade: CORRECT

Example 3:
Question: What is the construction material used to make the Recycled Waterhog dog mat?
Real Answer: The Recycled Waterhog dog mat is made of 24 oz. polyester fabric, which is constructed from 94% recycled materials, and has a 

## Langchain evaluation platform

The LangChain evaluation platform, LangChain Plus, can be accessed here https://www.langchain.plus/.  
Use the invite code `lang_learners_2023`