# LangChain: Evaluation


* Sample Data Generation
* Manual Evaluation
* LLM-assisted Evaluation of Groundtruth and Generated outputs.

In [None]:
!pip3 install docarray

### Create Q&A application ✅

In [6]:
from utils import SaladChatOllama,SaladOllamaEmbeddings

from langchain.chains import RetrievalQA
# from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.document_loaders import UnstructuredExcelLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [3]:
# Load your CSV data using CSVLoader method of document_loaders

file = 'data/earphones_description.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [7]:
from langchain.indexes import VectorstoreIndexCreator

embedding = SaladOllamaEmbeddings()
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding = embedding
).from_loaders([loader])

In [8]:
llm = SaladChatOllama(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

### Generating Test Data Points ✅

In [9]:
data[1]

Document(page_content='\ufeffProduct Name: SonicPro E40\nBrand: SoniTech\nDescription: In-Ear Wired Earphones with Enhanced Bass, Tangle-Free Cable, and Built-in Microphone\nConnectivity: Wired\nBattery Life: N/A\nPrice (USD): $29.99', metadata={'source': 'data/earphones_description.csv', 'row': 1})

In [10]:
data[9]

Document(page_content='\ufeffProduct Name: AirPods Max\nBrand: Apple\nDescription: Over-Ear Noise-Canceling Headphones with Hi-Res Audio, Adaptive EQ, and 20-Hour Battery Life\nConnectivity: Bluetooth\nBattery Life: 20 hours\nPrice (USD): $549.00', metadata={'source': 'data/earphones_description.csv', 'row': 9})

### Creating Manually typed examples ✅

In [11]:
examples = [
    {
        "query": "What is the Price of the AirPods Pro ?",
        "answer": "$249.00"
    },
    {
        "query": "What is the Brand of BassMasters 500 ?",
        "answer": "JBL"
    }
]

### LLM-Generated examples ✅

In [12]:
from langchain.evaluation.qa import QAGenerateChain

In [13]:
example_gen_chain = QAGenerateChain.from_llm(SaladChatOllama())

In [14]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:2]]
)



In [15]:
new_examples[0]

{'qa_pairs': {'query': 'What is the brand of the wireless Bluetooth earphones described in the document?',
  'answer': 'The brand of the wireless Bluetooth earphones described in the document is Acoustic.'}}

In [16]:
def convert_qa_pairs(data_list):
    converted_data = []
    for item in data_list:
        qa_pairs = item.get('qa_pairs', {})
        converted_item = {
            'query': qa_pairs.get('query', ''),
            'answer': qa_pairs.get('answer', '')
        }
        converted_data.append(converted_item)
    return converted_data

new_examples = convert_qa_pairs(new_examples)

In [17]:
data[0]

Document(page_content='\ufeffProduct Name: SoundBeats X100\nBrand: Acoustic\nDescription: Wireless Bluetooth Earphones with Noise Cancelling, High-Resolution Audio, and 10-Hour Battery\nConnectivity: Bluetooth\nBattery Life: 10 hours\nPrice (USD): $79.99', metadata={'source': 'data/earphones_description.csv', 'row': 0})

### Combine examples into one list

In [18]:
examples += new_examples

In [19]:
qa.run(examples[0]["query"])

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'According to the context provided, the price of the AirPods Pro is $249.00.'

### Manual Evaluation ✅

In [20]:
import langchain
langchain.debug = True

In [21]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is the Price of the AirPods Pro ?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the Price of the AirPods Pro ?",
  "context": "﻿Product Name: BassMasters 500\nBrand: JBL\nDescription: Wireless In-Ear Earphones with Deep Bass, IPX7 Water Resistance, and 8-Hour Battery\nConnectivity: Bluetooth\nBattery Life: 8 hours\nPrice (USD): $69.99<<<<>>>>>﻿Product Name: AirPods Max\nBrand: Apple\nDescription: Over-Ear Noise-Canceling Headphones with Hi-Res Audio, Adaptive EQ, and 20-Hour Battery Life\nConnectivity: Bluetooth\nBattery Life: 20 hours\nPrice (USD): $549.00<<<<>>>>>﻿Product Name: AirPods Pro\nBrand: Apple\nDescription: True Wir

'According to the context provided, the price of the AirPods Pro is $249.00.'

In [22]:
# Turn off the debug mode
langchain.debug = False

## Evaluation using LLMs ✅

In [23]:
predictions = qa.apply(examples)

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [24]:
from langchain.evaluation.qa import QAEvalChain

In [25]:
llm = SaladChatOllama(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

In [26]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [27]:
graded_outputs

[{'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'}]

In [28]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: What is the Price of the AirPods Pro ?
Real Answer: $249.00
Predicted Answer: According to the context provided, the price of the AirPods Pro is $249.00.
Predicted Grade: CORRECT

Example 1:
Question: What is the Brand of BassMasters 500 ?
Real Answer: JBL
Predicted Answer: Based on the context provided, the brand of BassMasters 500 is JBL.
Predicted Grade: CORRECT

Example 2:
Question: What is the brand of the wireless Bluetooth earphones described in the document?
Real Answer: The brand of the wireless Bluetooth earphones described in the document is Acoustic.
Predicted Answer: Based on the information provided in the document, the brand of the wireless Bluetooth earphones is:

* BassMasters 500: JBL
* SoundBeats X100: Acoustic
* SoundWave E200: AudioTech
* AirPods Pro: Apple
Predicted Grade: CORRECT

Example 3:
Question: What is the brand of the earphones according to the document?
Real Answer: The brand of the earphones is SoniTech.
Predicted Answer: According 