# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
import semantic_kernel as sk
import os
import logging
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('__name__')
kernel=sk.Kernel(log=logger)

api_key = os.environ['OPENAI_API_KEY']
kernel.add_chat_service(
        "chat-gpt", OpenAIChatCompletion("gpt-3.5-turbo", api_key)
)

<semantic_kernel.kernel.Kernel at 0x7f96c811b670>

In [3]:
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
kernel.add_text_embedding_generation_service(
        "ada", OpenAITextEmbedding("text-embedding-ada-002", api_key)
    )

<semantic_kernel.kernel.Kernel at 0x7f96c811b670>

In [4]:
from semantic_kernel.connectors.memory.chroma import ChromaMemoryStore
memstore=ChromaMemoryStore(persist_directory="catalog")
kernel.register_memory_store(memory_store=memstore)

INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
INFO:clickhouse_connect.driver.ctypes:Successfully imported ClickHouse Connect C data optimizations
DEBUG:clickhouse_connect.driver.ctypes:Successfully import ClickHouse Connect C/Numpy optimizations
INFO:clickhouse_connect.json_impl:Using orjson library for writing JSON byte strings
INFO:chromadb.db.duckdb:loaded in 1000 embeddings
INFO:chromadb.db.duckdb:loaded in 1 collections


## Create our QandA application

In [5]:
import pandas as pd
df = pd.read_csv('OutdoorClothingCatalog_1000.csv')

### Coming up with test datapoints

In [6]:
print("name:", df.iloc[10]["name"], "\ndescription:", df.iloc[10].description)

name: Cozy Comfort Pullover Set, Stripe 
description: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.

Size & Fit
- Pants are Favorite Fit: Sits lower on the waist.
- Relaxed Fit: Our most generous fit sits farthest from the body.

Fabric & Care
- In the softest blend of 63% polyester, 35% rayon and 2% spandex.

Additional Features
- Relaxed fit top with raglan sleeves and rounded hem.
- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.

Imported.


In [7]:
print("name:", df.iloc[11]["name"], "\ndescription:", df.iloc[11].description)

name: Ultra-Lofty 850 Stretch Down Hooded Jacket 
description: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this jacket is suitable for light activity up to 20° and moderate activity up to -30°. The soft and durable 100% polyester shell offers complete windproof protection and is insulated with warm, lofty goose down. Other features include welded baffles for a no-stitch construction and excellent stretch, an adjustable hood, an interior media port and mesh stash pocket and a hem drawcord. Machine wash and dry. Imported.


### Hard-coded examples

In [8]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

### LLM-Generated examples

#### Pick a few record to generate question from the LLM

In [9]:
docs = await memstore.get_batch_async(collection_name="outdoordb", keys=["0", "1", "2", "3"], with_embeddings=False)


In [10]:
docs[0]._text

"Women's Campside Oxfords :  This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries."

In [11]:
qdocs = "\n```\n".join([docs[i]._text for i in range(len(docs))])
   
    

In [12]:
async def example_gen_from_llm(qdocs) -> str :

   
    prompt = """{{ $qdocs}} 
    
    Question: Please generate one question and answer from each of above documents delimited by triple backticks  
    and return results in a JSON list with fields named as query and answer.
    """
    
    questgen = kernel.create_semantic_function(prompt, temperature=0.0)
    context_variables = sk.ContextVariables(variables={
        "qdocs": qdocs
    })
    response = questgen(variables=context_variables)
    return response

In [None]:
new_examples = await example_gen_from_llm(qdocs)


In [14]:
new_examples["input"]

'[{\n    "query": "What is the weight of Women\'s Campside Oxfords?",\n    "answer": "The approximate weight of Women\'s Campside Oxfords is 1 lb.1 oz. per pair."\n},\n{\n    "query": "What is the material used to make Recycled Waterhog Dog Mat?",\n    "answer": "Recycled Waterhog Dog Mat is made from 24 oz. polyester fabric made from 94% recycled materials with rubber backing."\n},\n{\n    "query": "What is the sun protection rating of Infant and Toddler Girls\' Coastal Chill Swimsuit?",\n    "answer": "Infant and Toddler Girls\' Coastal Chill Swimsuit has a UPF 50+ rated fabric which provides the highest rated sun protection possible, blocking 98% of the sun\'s harmful rays."\n},\n{\n    "query": "What is the fabric composition of Refresh Swimwear, V-Neck Tankini Contrasts?",\n    "answer": "Refresh Swimwear, V-Neck Tankini Contrasts is made from 82% recycled nylon with 18% Lycra® spandex for the body and 90% recycled nylon with 10% Lycra® spandex for lining."\n}]'

In [15]:
import json
jlist = json.loads(new_examples["input"])


### Combine examples

In [16]:
examples += jlist

In [17]:
async def ragqna(kernel, query, limit) -> str:
    docs = await kernel.memory.search_async(collection="outdoordb", limit=limit, min_relevance_score=0.3, query=query)
    qdocs = "\n```\n".join([docs[i].text for i in range(len(docs))])
    
    prompt = """{{ $qdocs}} 
    
    Use the above documents delimited by triple backticks and answer the following question: {{ $query }}
    
    
    """
    
    qna = kernel.create_semantic_function(prompt, temperature=0.0)
    context_variables = sk.ContextVariables(variables={
        "qdocs": qdocs,
        "query": query
    })
    response = qna(variables=context_variables)
    return response

## Manual Evaluation

In [None]:
response = await ragqna(kernel, examples[0]["query"], 3)

In [20]:
response["input"]

'Yes, the Cozy Comfort Pullover Set has side pockets.'

In [None]:
for example in examples:
    response = await ragqna(kernel, example["query"], 3)
    example["Predicted"] = response["input"]

In [22]:
examples

[{'query': 'Do the Cozy Comfort Pullover Set        have side pockets?',
  'answer': 'Yes',
  'Predicted': 'Yes, the Cozy Comfort Pullover Set has side pockets.'},
 {'query': 'What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?',
  'answer': 'The DownTek collection',
  'Predicted': 'The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.'},
 {'query': "What is the weight of Women's Campside Oxfords?",
  'answer': "The approximate weight of Women's Campside Oxfords is 1 lb.1 oz. per pair.",
  'Predicted': "The weight of Women's Campside Oxfords is approximately 1 lb. 1 oz. per pair."},
 {'query': 'What is the material used to make Recycled Waterhog Dog Mat?',
  'answer': 'Recycled Waterhog Dog Mat is made from 24 oz. polyester fabric made from 94% recycled materials with rubber backing.',
  'Predicted': 'The material used to make Recycled Waterhog Dog Mat is 24 oz. polyester fabric made from 94% recycled materials with a rubber backing.

## LLM assisted evaluation

In [None]:
#TODO