# LangChain: Evaluation


* Sample Data Generation
* Manual Evaluation
* LLM-assisted Evaluation of Groundtruth and Generated outputs.

In [None]:
!pip3 install docarray

### Create Q&A application ✅

In [1]:
from utils import SaladChatOllama,SaladOllamaEmbeddings

from langchain.chains import RetrievalQA
# from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.document_loaders import UnstructuredExcelLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [2]:
# Load your CSV data using CSVLoader method of document_loaders

file = 'data/earphones_description.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [3]:
MODEL = "llama3"

In [4]:
from langchain.indexes import VectorstoreIndexCreator

embedding = SaladOllamaEmbeddings(model=MODEL)
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding = embedding
).from_loaders([loader])



In [6]:
llm = SaladChatOllama(temperature = 0.0,model=MODEL)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

### Generating Test Data Points ✅

In [8]:
data[1]

Document(page_content='\ufeffProduct Name: SonicPro E40\nBrand: SoniTech\nDescription: In-Ear Wired Earphones with Enhanced Bass, Tangle-Free Cable, and Built-in Microphone\nConnectivity: Wired\nBattery Life: N/A\nPrice (USD): $29.99', metadata={'source': 'data/earphones_description.csv', 'row': 1})

In [9]:
data[9]

Document(page_content='\ufeffProduct Name: AirPods Max\nBrand: Apple\nDescription: Over-Ear Noise-Canceling Headphones with Hi-Res Audio, Adaptive EQ, and 20-Hour Battery Life\nConnectivity: Bluetooth\nBattery Life: 20 hours\nPrice (USD): $549.00', metadata={'source': 'data/earphones_description.csv', 'row': 9})

### Creating Manually typed examples ✅

In [10]:
examples = [
    {
        "query": "What is the Price of the AirPods Pro ?",
        "answer": "$249.00"
    },
    {
        "query": "What is the Brand of BassMasters 500 ?",
        "answer": "JBL"
    }
]

### LLM-Generated examples ✅

In [11]:
from langchain.evaluation.qa import QAGenerateChain

In [12]:
example_gen_chain = QAGenerateChain.from_llm(SaladChatOllama(model=MODEL))

In [13]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:2]]
)



In [40]:
new_examples

[{'query': 'What is the brand of the earphones described in the document?',
  'answer': 'Acoustic'},
 {'query': 'What is the name of the earphones product described in this document?',
  'answer': 'SonicPro E40'}]

In [18]:
def convert_qa_pairs(data_list):
    converted_data = []
    for item in data_list:
        qa_pairs = item.get('qa_pairs', {})
        converted_item = {
            'query': qa_pairs.get('query', ''),
            'answer': qa_pairs.get('answer', '')
        }
        converted_data.append(converted_item)
    return converted_data

new_examples = convert_qa_pairs(new_examples)

In [19]:
data[0]

Document(page_content='\ufeffProduct Name: SoundBeats X100\nBrand: Acoustic\nDescription: Wireless Bluetooth Earphones with Noise Cancelling, High-Resolution Audio, and 10-Hour Battery\nConnectivity: Bluetooth\nBattery Life: 10 hours\nPrice (USD): $79.99', metadata={'source': 'data/earphones_description.csv', 'row': 0})

### Combine examples into one list

In [20]:
examples += new_examples

In [21]:
qa.run(examples[0]["query"])

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'According to the provided context, the price of the AirPods Pro is $249.00 USD.'

### Manual Evaluation ✅

In [22]:
import langchain
langchain.debug = True

In [23]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is the Price of the AirPods Pro ?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the Price of the AirPods Pro ?",
  "context": "﻿Product Name: PowerBeats Pro\nBrand: Beats\nDescription: High-Performance Wireless Earbuds with Apple H1 Chip, Sweat and Water Resistance, 9-Hour Battery\nConnectivity: Bluetooth\nBattery Life: 9 hours\nPrice (USD): $199.95<<<<>>>>>﻿Product Name: AirPods Max\nBrand: Apple\nDescription: Over-Ear Noise-Canceling Headphones with Hi-Res Audio, Adaptive EQ, and 20-Hour Battery Life\nConnectivity: Bluetooth\nBattery Life: 20 hours\nPrice (USD): $549.00<<<<>>>>>﻿Product Name: AirPods Pro\nBrand: Apple\nDescri

'According to the provided context, the price of the AirPods Pro is $249.00 USD.'

In [24]:
# Turn off the debug mode
langchain.debug = False

## Evaluation using LLMs ✅

In [25]:
predictions = qa.apply(examples)

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [26]:
from langchain.evaluation.qa import QAEvalChain

In [27]:
llm = SaladChatOllama(temperature=0,model=MODEL)
eval_chain = QAEvalChain.from_llm(llm)

In [28]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [29]:
graded_outputs

[{'results': 'QUESTION: What is the Price of the AirPods Pro ?\nSTUDENT ANSWER: According to the provided context, the price of the AirPods Pro is $249.00 USD.\nTRUE ANSWER: $249.00\nGRADE: CORRECT'},
 {'results': 'QUESTION: What is the Brand of BassMasters 500 ?\nSTUDENT ANSWER: The brand of BassMasters 500 is JBL.\nTRUE ANSWER: JBL\nGRADE: CORRECT'},
 {'results': 'QUESTION: What is the brand of the earphones described in the document?\nSTUDENT ANSWER: The brands mentioned in the document are:\n\n* Beats (PowerBeats Pro)\n* Apple (AirPods Pro, AirPods Max)\n* Jabra (Elite Active 75t)\n\nTRUE ANSWER: Acoustic\n\nGRADE: INCORRECT'},
 {'results': 'QUESTION: What is the name of the earphones product described in this document?\nSTUDENT ANSWER: The product described in this document is not a specific pair of earphones, but rather three different products from Apple and Beats:\n1. PowerBeats Pro: High-Performance Wireless Earbuds with Apple H1 Chip, Sweat and Water Resistance, 9-Hour Batter

In [36]:
print(graded_outputs[0]['results'])

QUESTION: What is the Price of the AirPods Pro ?
STUDENT ANSWER: According to the provided context, the price of the AirPods Pro is $249.00 USD.
TRUE ANSWER: $249.00
GRADE: CORRECT


In [39]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print(graded_outputs[i]['results'])
    print()

Example 0:
QUESTION: What is the Price of the AirPods Pro ?
STUDENT ANSWER: According to the provided context, the price of the AirPods Pro is $249.00 USD.
TRUE ANSWER: $249.00
GRADE: CORRECT

Example 1:
QUESTION: What is the Brand of BassMasters 500 ?
STUDENT ANSWER: The brand of BassMasters 500 is JBL.
TRUE ANSWER: JBL
GRADE: CORRECT

Example 2:
QUESTION: What is the brand of the earphones described in the document?
STUDENT ANSWER: The brands mentioned in the document are:

* Beats (PowerBeats Pro)
* Apple (AirPods Pro, AirPods Max)
* Jabra (Elite Active 75t)

TRUE ANSWER: Acoustic

GRADE: INCORRECT

Example 3:
QUESTION: What is the name of the earphones product described in this document?
STUDENT ANSWER: The product described in this document is not a specific pair of earphones, but rather three different products from Apple and Beats:
1. PowerBeats Pro: High-Performance Wireless Earbuds with Apple H1 Chip, Sweat and Water Resistance, 9-Hour Battery
2. AirPods Pro: True Wireless Noi