# LangChain: Evaluation
Outline:
* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [2]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Create our QandA application

In [3]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [4]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [5]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [6]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

Coming up with test datapoints

In [6]:
data[10]

Document(page_content='ProductID: 10000911\nProductName: Kenneth Cole Women Navy Blue Solid Backpack\nProductBrand: Kenneth Cole\nGender: Women\nPrice (INR): 2463\nNumImages: 5\nDescription: Navy Blue backpackNon-Padded haul loop1 main compartment with zip closurePadded backZip PocketPadded shoulder strap: PaddedWater-resistance: No\nPrimaryColor: Blue', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 10})

In [65]:
data[11]

Document(page_content='ProductID: 10000245\nProductName: Parx Men Green Printed Polo Collar T-shirt\nProductBrand: Parx\nGender: Men\nPrice (INR): 629\nNumImages: 5\nDescription: Green printed T-shirt, has a polo collar, and short sleeves\nPrimaryColor: Green', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 11})

Hard-coded examples

In [41]:
examples = [
    {
        "query": "What color is the trolley bag?",
        "answer": "Black"
    },
    {
        "query": "What is the price of the EthnoVogue?",
        "answer": "5810"
    }
]
# this doesn't scale well. WE have to build these individually. A better way is to use a LLM model

LLM-Generated examples

In [7]:
from langchain.evaluation.qa import QAGenerateChain # takes a document and generates a question and answer pair for each document

In [8]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

In [67]:
example_gen_chain

QAGenerateChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['doc'], output_parser=None, partial_variables={}, template='You are a teacher coming up with questions to ask on a quiz. \nGiven the following document, please generate a question and answer based on that document.\n\nExample Format:\n<Begin Document>\n...\n<End Document>\nQUESTION: question here\nANSWER: answer here\n\nThese questions should be detailed and be based explicitly on information in the document. Begin!\n\n<Begin Document>\n{doc}\n<End Document>', template_format='f-string', validate_template=True), llm=ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.7, model_kwargs={}, openai_api_key='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', openai_api_base='', opena

In [9]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)



In [10]:
new_examples

[{'qa_pairs': {'query': 'What is the product name and brand of the trolley bag mentioned in the document?',
   'answer': 'The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".'}},
 {'qa_pairs': {'query': 'What is the product name of the kurta set with jacket in the document?',
   'answer': 'The product name of the kurta set with jacket in the document is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket".'}},
 {'qa_pairs': {'query': 'What is the primary color of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?',
   'answer': 'The primary color of the jeans is pink.'}},
 {'qa_pairs': {'query': 'What is the product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit?',
   'answer': 'The product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit is 10015921.'}},
 {'qa_pairs': {'query': 'Wha

In [46]:

{'query': "What is the color of the trolley bag?",
 'answer': "The color of the trolley bag is black."}

{'query': 'What is the color of the trolley bag?',
 'answer': 'The color of the trolley bag is black.'}

In [47]:
data[0]

Document(page_content='ProductID: 10017413\nProductName: DKNY Unisex Black & Grey Printed Medium Trolley Bag\nProductBrand: DKNY\nGender: Unisex\nPrice (INR): 11745\nNumImages: 7\nDescription: Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer\nPrimaryColor: Black', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

Combine examples

In [12]:
examples = []

In [13]:
examples += new_examples
examples

[{'qa_pairs': {'query': 'What is the product name and brand of the trolley bag mentioned in the document?',
   'answer': 'The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".'}},
 {'qa_pairs': {'query': 'What is the product name of the kurta set with jacket in the document?',
   'answer': 'The product name of the kurta set with jacket in the document is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket".'}},
 {'qa_pairs': {'query': 'What is the primary color of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?',
   'answer': 'The primary color of the jeans is pink.'}},
 {'qa_pairs': {'query': 'What is the product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit?',
   'answer': 'The product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit is 10015921.'}},
 {'qa_pairs': {'query': 'Wha

In [14]:
qa.run(examples[0]["qa_pairs"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'The product name is "Calvin Klein Unisex Black Textured Medium Trolley Bag" and the brand is "Calvin Klein".'

### Manual Evaluation

In [15]:
import langchain
langchain.debug = True

In [16]:
qa.run(examples[0]["qa_pairs"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is the product name and brand of the trolley bag mentioned in the document?",
  "answer": "The product name of the trolley bag mentioned in the document is \"DKNY Unisex Black & Grey Printed Medium Trolley Bag\" and the brand is \"DKNY\"."
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the product name and brand of the trolley bag mentioned in the document?",
  "context": "ProductID: 10126767\nProductName: Calvin Klein Unisex Black Textured Medium Trolley Bag\nProductBrand: Calvin Klein\nGender: Unisex\nPrice (INR): 11880\nNumImages: 7\nDescription: Black textured medium trolley bag secured with zip closureOne handle on the top a

'The product name is "Calvin Klein Unisex Black Textured Medium Trolley Bag" and the brand is "Calvin Klein".'

In [17]:
# Turn off the debug mode
langchain.debug = False

### LLM assisted evaluation


In [18]:
examples

[{'qa_pairs': {'query': 'What is the product name and brand of the trolley bag mentioned in the document?',
   'answer': 'The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".'}},
 {'qa_pairs': {'query': 'What is the product name of the kurta set with jacket in the document?',
   'answer': 'The product name of the kurta set with jacket in the document is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket".'}},
 {'qa_pairs': {'query': 'What is the primary color of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?',
   'answer': 'The primary color of the jeans is pink.'}},
 {'qa_pairs': {'query': 'What is the product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit?',
   'answer': 'The product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit is 10015921.'}},
 {'qa_pairs': {'query': 'Wha

In [19]:
examples[0]['qa_pairs']

{'query': 'What is the product name and brand of the trolley bag mentioned in the document?',
 'answer': 'The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".'}

In [20]:
# the below qa.apply function requires a dictionary with "query" as the key
# convert the qa_pairs to a list of dictionaries with "query" as the key
#

example_data = []
for example in examples:
    example_data.append(example["qa_pairs"])
example_data

[{'query': 'What is the product name and brand of the trolley bag mentioned in the document?',
  'answer': 'The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".'},
 {'query': 'What is the product name of the kurta set with jacket in the document?',
  'answer': 'The product name of the kurta set with jacket in the document is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket".'},
 {'query': 'What is the primary color of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?',
  'answer': 'The primary color of the jeans is pink.'},
 {'query': 'What is the product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit?',
  'answer': 'The product ID of the Raymond Men Blue Self-Design Single-Breasted Bandhgala Suit is 10015921.'},
 {'query': 'What is the primary color of the Parx Men Brown & Off-White Slim Fit Printed

In [21]:
predictions = qa.apply(example_data) # create predictions for a list of examples




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [28]:
predictions

[{'query': 'What is the product name and brand of the trolley bag mentioned in the document?',
  'answer': 'The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".',
  'result': 'The product name is "Calvin Klein Unisex Black Textured Medium Trolley Bag" and the brand is "Calvin Klein".'},
 {'query': 'What is the product name of the kurta set with jacket in the document?',
  'answer': 'The product name of the kurta set with jacket in the document is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket".',
  'result': 'The product name of the kurta set with jacket in the document is "W Women Green & Golden Printed Kurta & Trousers With Ethnic Jacket".'},
 {'query': 'What is the primary color of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?',
  'answer': 'The primary color of the jeans is pink.',
  'result': 'The primary color 

In [22]:
from langchain.evaluation.qa import QAEvalChain

In [23]:
llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

In [30]:
graded_outputs = eval_chain.evaluate(example_data, predictions)
graded_outputs

[{'results': 'INCORRECT'},
 {'results': 'INCORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'INCORRECT'}]

In [32]:
for i, eg in enumerate(example_data):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: What is the product name and brand of the trolley bag mentioned in the document?
Real Answer: The product name of the trolley bag mentioned in the document is "DKNY Unisex Black & Grey Printed Medium Trolley Bag" and the brand is "DKNY".
Predicted Answer: The product name is "Calvin Klein Unisex Black Textured Medium Trolley Bag" and the brand is "Calvin Klein".
Predicted Grade: INCORRECT

Example 1:
Question: What is the product name of the kurta set with jacket in the document?
Real Answer: The product name of the kurta set with jacket in the document is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket".
Predicted Answer: The product name of the kurta set with jacket in the document is "W Women Green & Golden Printed Kurta & Trousers With Ethnic Jacket".
Predicted Grade: INCORRECT

Example 2:
Question: What is the primary color of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?
Real Answ