# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Create our QandA application

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [5]:
file = 'OutdoorClothingCatalog_3.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [7]:
from langchain.embeddings import AzureOpenAIEmbeddings
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002-felix",
    openai_api_version="2023-12-01-preview",
)
embeddings

  warn_deprecated(


AzureOpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x137f5ecc0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x137f3d550>, model='text-embedding-ada-002', deployment='text-embedding-ada-002-felix', openai_api_version='2023-12-01-preview', openai_api_base=None, openai_api_type='azure', openai_proxy='', embedding_ctx_length=8191, openai_api_key='f49b792a6e60455f9237e607c9902d5b', openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=16, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, azure_endpoint='https://openai-felix.openai.azure.com', azure_ad_token=None, azure_ad_token_provider=None, validate_base_url=True)

In [8]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [10]:
llm = AzureChatOpenAI(temperature=0.0, openai_api_version="2023-12-01-preview", azure_deployment="gpt-35-turbo-felix", openai_api_key=os.getenv("AZURE_OPENAI_KEY"))

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

### Coming up with test datapoints

In [15]:
data[2]

Document(page_content=": 2\nname: Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece\ndescription: She'll love the bright colors, ruffles and exclusive whimsical prints of this toddler's two-piece swimsuit! Our four-way-stretch and chlorine-resistant fabric keeps its shape and resists snags. The UPF 50+ rated fabric provides the highest rated sun protection possible, blocking 98% of the sun's harmful rays. The crossover no-slip straps and fully lined bottom ensure a secure fit and maximum coverage. Machine wash and line dry for best results. Imported.", metadata={'source': 'OutdoorClothingCatalog_3.csv', 'row': 2})

In [16]:
data[3]

Document(page_content=": 319\nname: Men's Tropical Breeze Pants\ndescription: These Men's cargo hiking pants are adventure-ready in a cool, moisture-wicking fabric that has 50+ UPF sunblock woven right in. Size & Fit - Classic Fit: Sits at the natural waist and trim, with a traditional straight leg. Fabric & Care - 100% nylon wicks moisture and resists wrinkles. Built-in SunSmart™ UPF 50+ rated – the highest rated sun protection possible. Additional Features - Button front with zip fly. Adjustable nylon belt. Two front slash pockets, two cargo pockets, two rear pockets. Zippered security pocket. Elastic inserts in waist. Imported.Sun Protection That Won't Wear Off - Our high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun's harmful rays.", metadata={'source': 'OutdoorClothingCatalog_3.csv', 'row': 3})

### Hard-coded examples

In [57]:
examples = [
    {
        "query": "Please suggest a pant with sunblock",
        "answer": "The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage."
    }
]

### LLM-Generated examples

In [18]:
from langchain.evaluation.qa import QAGenerateChain


In [20]:
example_gen_chain = QAGenerateChain.from_llm(AzureChatOpenAI(temperature=0.0, openai_api_version="2023-12-01-preview", azure_deployment="gpt-35-turbo-felix", openai_api_key=os.getenv("AZURE_OPENAI_KEY")))

In [21]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)



In [33]:
new_examples

[{'qa_pairs': {'query': "What is the weight of one pair of Women's Campside Oxfords?",
   'answer': "The approximate weight of one pair of Women's Campside Oxfords is 1 lb. 1 oz."}},
 {'qa_pairs': {'query': 'What are the dimensions of the small and medium Recycled Waterhog dog mats?',
   'answer': 'The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium has dimensions of 22.5" x 34.5".'}},
 {'qa_pairs': {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?",
   'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. It is made of four-way-stretch and chlorine-resistant fabric that keeps its shape and resists snags. The swimsuit is also UPF 50+ rated, providing the highest rated sun protection possible, blocking 98% of the sun's harmful rays. The crossover no-slip straps and fully lined bottom ensure a secure fit and maximum coverage. The swimsuit can be machine washed and line dried for best results

In [26]:
new_examples[0]

{'qa_pairs': {'query': "What is the weight of one pair of Women's Campside Oxfords?",
  'answer': "The approximate weight of one pair of Women's Campside Oxfords is 1 lb. 1 oz."}}

In [23]:
data[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_3.csv', 'row': 0})

### Combine examples

In [58]:
for ex in new_examples:
    examples.append(ex["qa_pairs"])

In [59]:
examples

[{'query': 'Please suggest a pant with sunblock',
  'answer': "The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage."},
 {'query': "What is the weight of one pair of Women's Campside Oxfords?",
  'answer': "The approximate weight of one pair of Women's Campside Oxfords is 1 lb. 1 oz."},
 {'query': 'What are the dimensions of the small and medium Recycled Waterhog dog mats?',
  'answer': 'The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium has dimensions of 22.5" x 34.5".'},
 {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?",
  'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. It is made of four-way-stretch and chlorine-resistant fabric that keeps its shape and resists snags. The swimsuit is also UPF 50+ rated, providing the highest rated 

In [36]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Please suggest a pant with sunblock"
}


[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Please suggest a pant with sunblock",
  "context": ": 319\nname: Men's Tropical Breeze Pants\ndescription: These Men's cargo hiking pants are adventure-ready in a cool, moisture-wicking fabric that has 50+ UPF sunblock woven right in. Size & Fit - Classic Fit: Sits at the natural waist and trim, with a traditional straight leg. Fabric & Care - 100% nylon wicks moisture and resists wrinkles. Built-in SunSmart™ UPF 50+ rated – the highest rated sun protection possible. Additional Features - Button front with zip fly. Adjustable nylon belt. Two front slash pockets, two cargo pockets, two rear pockets. Zippered security pocket. Elastic inserts in waist. Imported.Sun Protection That Won't Wear Off - Our hig

"The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage."

In [37]:
qa.run(examples[2]["qa_pairs"]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What are the dimensions of the small and medium Recycled Waterhog dog mats?"
}


[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What are the dimensions of the small and medium Recycled Waterhog dog mats?",
  "context": ": 1\nname: Recycled Waterhog Dog Mat, Chevron Weave\ndescription: Protect your floors from spills and splashing with our ultradurable recycled Waterhog dog mat made right here in the USA. \n\nSpecs\nSmall - Dimensions: 18\" x 28\". \nMedium - Dimensions: 22.5\" x 34.5\".\n\nWhy We Love It\nMother nature, wet shoes and muddy paws have met their match with our Recycled Waterhog mats. Ruggedly constructed from recycled plastic materials, these ultratough mats help keep dirt and water off your floors and plastic out of landfills, trails and oceans. Now, that's a win-win for everyone.\n\nFabric & Care\nVacuum or hose

'The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium size has dimensions of 22.5" x 34.5".'

## Manual Evaluation

In [38]:
import langchain
langchain.debug = True

In [39]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Please suggest a pant with sunblock"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Please suggest a pant with sunblock",
  "context": ": 319\nname: Men's Tropical Breeze Pants\ndescription: These Men's cargo hiking pants are adventure-ready in a cool, moisture-wicking fabric that has 50+ UPF sunblock woven right in. Size & Fit - Classic Fit: Sits at the natural waist and trim, with a traditional straight leg. Fabric & Care - 100% nylon wicks moisture and resists wrinkles. Built-in SunSmart™ UPF 50+ rated – the highest rated sun protection possible. Additional Features - Button front with zip fly. Adjustable nylon belt. Two front slash pocket

"The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage."

In [40]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation

In [51]:
predictions = qa.apply(examples)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [53]:
from langchain.evaluation.qa import QAEvalChain

In [54]:
llm = AzureChatOpenAI(temperature=0.0, openai_api_version="2023-12-01-preview", azure_deployment="gpt-35-turbo-felix", openai_api_key=os.getenv("AZURE_OPENAI_KEY"))
eval_chain = QAEvalChain.from_llm(llm)

In [60]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [64]:
predictions

[{'query': 'Please suggest a pant with sunblock',
  'answer': '319...',
  'result': "The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage."},
 {'query': "What is the weight of one pair of Women's Campside Oxfords?",
  'answer': "The approximate weight of one pair of Women's Campside Oxfords is 1 lb. 1 oz.",
  'result': "The weight of one pair of Women's Campside Oxfords is approximately 1 lb. 1 oz."},
 {'query': 'What are the dimensions of the small and medium Recycled Waterhog dog mats?',
  'answer': 'The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium has dimensions of 22.5" x 34.5".',
  'result': 'The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium size has dimensions of 22.5" x 34.5".'},
 {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?",
  

In [65]:
graded_outputs

[{'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'}]

In [66]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: Please suggest a pant with sunblock
Real Answer: 319...
Predicted Answer: The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage.
Predicted Grade: CORRECT

Example 1:
Question: What is the weight of one pair of Women's Campside Oxfords?
Real Answer: The approximate weight of one pair of Women's Campside Oxfords is 1 lb. 1 oz.
Predicted Answer: The weight of one pair of Women's Campside Oxfords is approximately 1 lb. 1 oz.
Predicted Grade: CORRECT

Example 2:
Question: What are the dimensions of the small and medium Recycled Waterhog dog mats?
Real Answer: The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium has dimensions of 22.5" x 34.5".
Predicted Answer: The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium size has dimensions of 22.5" x 34.5".
Predicted Grade: CORREC

In [63]:
examples

[{'query': 'Please suggest a pant with sunblock',
  'answer': "The Men's Tropical Breeze Pants have 50+ UPF sunblock woven right in. They are made of 100% nylon that wicks moisture and resists wrinkles. They also have a classic fit and multiple pockets for storage."},
 {'query': "What is the weight of one pair of Women's Campside Oxfords?",
  'answer': "The approximate weight of one pair of Women's Campside Oxfords is 1 lb. 1 oz."},
 {'query': 'What are the dimensions of the small and medium Recycled Waterhog dog mats?',
  'answer': 'The small Recycled Waterhog dog mat has dimensions of 18" x 28" and the medium has dimensions of 22.5" x 34.5".'},
 {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?",
  'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. It is made of four-way-stretch and chlorine-resistant fabric that keeps its shape and resists snags. The swimsuit is also UPF 50+ rated, providing the highest rated 