# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [None]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Create our QandA application

In [1]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [2]:
file = 'myntra_products_catalog.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [3]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [4]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [5]:
data[10]

Document(page_content='ProductID: 10000911\nProductName: Kenneth Cole Women Navy Blue Solid Backpack\nProductBrand: Kenneth Cole\nGender: Women\nPrice (INR): 2463\nNumImages: 5\nDescription: Navy Blue backpackNon-Padded haul loop1 main compartment with zip closurePadded backZip PocketPadded shoulder strap: PaddedWater-resistance: No\nPrimaryColor: Blue', metadata={'source': 'myntra_products_catalog.csv', 'row': 10})

In [6]:
data[11]

Document(page_content='ProductID: 10000245\nProductName: Parx Men Green Printed Polo Collar T-shirt\nProductBrand: Parx\nGender: Men\nPrice (INR): 629\nNumImages: 5\nDescription: Green printed T-shirt, has a polo collar, and short sleeves\nPrimaryColor: Green', metadata={'source': 'myntra_products_catalog.csv', 'row': 11})

### Hard-coded examples

In [20]:
examples = [
    {
        "query": "Does the CKenneth Cole Women Navy Blue Solid Backpack\
        have pockets?",
        "answer": "Yes"
    },
    {
        "query": "Does the Parx Men Green Printed Polo Collar T-shirt \
        have a collar?",
        "answer": "Yes"
    }
]

### LLM-Generated examples

In [8]:
from langchain.evaluation.qa import QAGenerateChain


In [9]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

In [10]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)



In [11]:
new_examples[0]

{'query': 'What is the product ID of the DKNY Unisex Black & Grey Printed Medium Trolley Bag?',
 'answer': 'The product ID of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is 10017413.'}

In [12]:
data[0]

Document(page_content='ProductID: 10017413\nProductName: DKNY Unisex Black & Grey Printed Medium Trolley Bag\nProductBrand: DKNY\nGender: Unisex\nPrice (INR): 11745\nNumImages: 7\nDescription: Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer\nPrimaryColor: Black', metadata={'source': 'myntra_products_catalog.csv', 'row': 0})

In [13]:
len(new_examples)

5

In [14]:
new_examples[4]

{'query': 'What is the price of the Parx Men Brown & Off-White Slim Fit Printed Casual Shirt?',
 'answer': 'The price of the Parx Men Brown & Off-White Slim Fit Printed Casual Shirt is INR 759.'}

In [15]:
data[4]

Document(page_content='ProductID: 10017833\nProductName: Parx Men Brown & Off-White Slim Fit Printed Casual Shirt\nProductBrand: Parx\nGender: Men\nPrice (INR): 759\nNumImages: 5\nDescription: Brown and off-white printed casual shirt, has a spread collar, long sleeves, button placket,  curved hem, one patch pocket\nPrimaryColor: White', metadata={'source': 'myntra_products_catalog.csv', 'row': 4})

### Combine examples

In [21]:
examples += new_examples

In [26]:
qa.run(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'Based on the provided information, the Kenneth Cole Women Navy Blue Solid Backpack has a zip pocket.'

## Manual Evaluation

In [27]:
import langchain
langchain.debug = True

In [24]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Does the CKenneth Cole Women Navy Blue Solid Backpack        have pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Does the CKenneth Cole Women Navy Blue Solid Backpack        have pockets?",
  "context": "ProductID: 10000911\nProductName: Kenneth Cole Women Navy Blue Solid Backpack\nProductBrand: Kenneth Cole\nGender: Women\nPrice (INR): 2463\nNumImages: 5\nDescription: Navy Blue backpackNon-Padded haul loop1 main compartment with zip closurePadded backZip PocketPadded shoulder strap: PaddedWater-resistance: No\nPrimaryColor: Blue<<<<>>>>>ProductID: 10000915\nProductName: Kenneth Cole Women Brown Solid Backpack\nProductBrand: Kenneth

'Based on the provided information, the Kenneth Cole Women Navy Blue Solid Backpack has a zip pocket.'

In [28]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation


In [29]:
predictions = qa.apply(examples)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [30]:
from langchain.evaluation.qa import QAEvalChain

In [31]:
llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

In [32]:
graded_outputs = eval_chain.evaluate(examples, predictions)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Tue, 18 Jul 2023 18:59:01 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7e8ced527a493ae4-IAD', 'alt-svc': 'h3=":443"; ma=86400'}.


In [37]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: Does the CKenneth Cole Women Navy Blue Solid Backpack        have pockets?
Real Answer: Yes
Predicted Answer: Based on the provided information, the Kenneth Cole Women Navy Blue Solid Backpack has a zip pocket.
Predicted Grade: CORRECT

Example 1:
Question: Does the Parx Men Green Printed Polo Collar T-shirt         have a collar?
Real Answer: Yes
Predicted Answer: Yes, the Parx Men Green Printed Polo Collar T-shirt does have a collar.
Predicted Grade: CORRECT

Example 2:
Question: What is the product ID of the DKNY Unisex Black & Grey Printed Medium Trolley Bag?
Real Answer: The product ID of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is 10017413.
Predicted Answer: The product ID of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is 10017413.
Predicted Grade: CORRECT

Example 3:
Question: What is the product name and brand of the kurta set described in the document?
Real Answer: The product name is "EthnoVogue Women Beige & Grey Made to Measur