# Import Modules

In [1]:
# Check LangChain Version

# !pip install --upgrade langchain
!pip show langchain --version

Name: langchain
Version: 0.0.271
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /Users/daveebbelaar/opt/anaconda3/envs/ai-experiments/lib/python3.10/site-packages
Requires: aiohttp, async-timeout, dataclasses-json, langsmith, numexpr, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 


In [2]:
import os
import nest_asyncio
import pandas as pd
from dotenv import find_dotenv, load_dotenv
from langsmith import Client
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.smith import RunEvalConfig, run_on_dataset

# To Avoid the Error on Jupyter Notebook (RuntimeError: This Event Loop Is Already Running)
# Patch Asyncio To Allow Nested Event Loops

nest_asyncio.apply()

# Load API Keys From the .env File

In [3]:
load_dotenv(find_dotenv())
os.environ["LANGCHAIN_API_KEY"] = str(os.getenv("LANGCHAIN_API_KEY"))
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "langsmith-tutorial"

# LangSmith Quick Start

In [5]:
# Load the LangSmith Client and Test Run

client = Client()

llm = ChatOpenAI()
llm.predict("Hello, world!")

'Hello! How can I assist you today?'

# Evaluation Quick Start

In [5]:
# 1. Create a Dataset (Only Inputs, No Output)

example_inputs = [
    "a rap battle between Atticus Finch and Cicero",
    "a rap battle between Barbie and Oppenheimer",
    "a Pythonic rap battle between two swallows: one European and one African",
    "a rap battle between Aubrey Plaza and Stephen Colbert",
]

dataset_name = "Rap Battle Dataset"

# Storing inputs in a dataset lets us
# run chains and LLMs over a shared set of examples.
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Rap battle prompts.",
)

for input_prompt in example_inputs:
    # Each example must be unique and have inputs defined.
    # Outputs are optional
    client.create_example(
        inputs={"question": input_prompt},
        outputs=None,
        dataset_id=dataset.id,
    )

In [6]:
# 2. Evaluate Datasets with LLM

eval_config = RunEvalConfig(
    evaluators=[
        # You can specify an evaluator by name/enum.
        # In this case, the default criterion is "helpfulness"
        "criteria",
        # Or you can configure the evaluator
        RunEvalConfig.Criteria("harmfulness"),
        RunEvalConfig.Criteria("misogyny"),
        RunEvalConfig.Criteria(
            {
                "cliche": "Are the lyrics cliche? "
                "Respond Y if they are, N if they're entirely unique."
            }
        ),
    ]
)

run_on_dataset(
    client=client,
    dataset_name=dataset_name,
    llm_or_chain_factory=llm,
    evaluation=eval_config,
)

View the evaluation results for project 'ddc0c03809e34bbca82f57dc7b0121f1-ChatOpenAI' at:
https://smith.langchain.com/projects/p/f0cb46ba-5525-43d0-aaa5-a0831c7e6550?eval=true


{'project_name': 'ddc0c03809e34bbca82f57dc7b0121f1-ChatOpenAI',
 'results': {'05eb0539-aa8d-4b0a-b3f9-d8bf6bd1042c': ["Barbie:\nYo, I'm Barbie, the true fashion queen,\nWith my dream house and convertible machine,\nI'm all about style, grace, and fame,\nOppenheimer, you're about to feel the flames!\n\nOppenheimer:\nI'm Oppenheimer, the father of the atomic bomb,\nMy intellect and innovation, so strong,\nI changed the world with my scientific might,\nBarbie, prepare to be blown away tonight!\n\nBarbie:\nOppenheimer, you may have made a big boom,\nBut I'm the icon that makes boys swoon,\nI rock the runway, run the world with my charm,\nBarbie dolls, accessories, I'm causing alarms!\n\nOppenheimer:\nBarbie, your beauty is just skin deep,\nWhile my discoveries made history leap,\nI split the atom, unleashed its power,\nYour plastic world can't match my brightest hour!\n\nBarbie:\nOppenheimer, you're stuck in the past,\nI'm living in the now, my popularity will last,\nI inspire girls to dre

# Different Ways of Creating Datasets in LangSmith

In [7]:
# 1. Create a Dataset From a List of Examples (Key-Value Pairs)

example_inputs = [
    ("What is the largest mammal?", "The blue whale"),
    ("What do mammals and birds have in common?", "They are both warm-blooded"),
    ("What are reptiles known for?", "Having scales"),
    (
        "What's the main characteristic of amphibians?",
        "They live both in water and on land",
    ),
]

dataset_name = "Elementary Animal Questions"

dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Questions and answers about animal phylogenetics.",
)

for input_prompt, output_answer in example_inputs:
    client.create_example(
        inputs={"question": input_prompt},
        outputs={"answer": output_answer},
        dataset_id=dataset.id,
    )

In [8]:
# 2. Create a Dataset From Existing Runs

dataset_name = "Example Dataset"

# Filter runs to add to the dataset
runs = client.list_runs(
    project_name="evaluators",
    execution_order=1,
    error=False,
)

dataset = client.create_dataset(dataset_name, description="An example dataset")

for run in runs:
    client.create_example(
        inputs=run.inputs,
        outputs=run.outputs,
        dataset_id=dataset.id,
    )

In [9]:
# 3. Create a Dataset From a Dataframe

# Create a Dataframe

example_inputs = [
    ("What is the largest mammal?", "The blue whale"),
    ("What do mammals and birds have in common?", "They are both warm-blooded"),
    ("What are reptiles known for?", "Having scales"),
    (
        "What's the main characteristic of amphibians?",
        "They live both in water and on land",
    ),
]

df_dataset = pd.DataFrame(example_inputs, columns=["Question", "Answer"])
df_dataset.head()

Unnamed: 0,Question,Answer
0,What is the largest mammal?,The blue whale
1,What do mammals and birds have in common?,They are both warm-blooded
2,What are reptiles known for?,Having scales
3,What's the main characteristic of amphibians?,They live both in water and on land


In [10]:
input_keys = ["Question"]
output_keys = ["Answer"]

# Create Dataset

dataset = client.upload_dataframe(
    df=df_dataset,
    input_keys=input_keys,
    output_keys=output_keys,
    name="My Dataframe Dataset",
    description="Dataset created from a dataframe",
    data_type="kv",  # The default
)

In [11]:
# 4. Create a Dataset From a CSV File

# Save the Dataframe as a CSV File

csv_path = "../data/dataset.csv"
df_dataset.to_csv(csv_path, index=False)

# Create Dataset

dataset = client.upload_csv(
    csv_file=csv_path,
    input_keys=input_keys,
    output_keys=output_keys,
    name="My CSV Dataset",
    description="Dataset created from a CSV file",
    data_type="kv",
)

# Correctness: LangSmith Question-Answer Evaluation

In [12]:
# 1. Evaluate Datasets That Contain Labels

evaluation_config = RunEvalConfig(
    evaluators=[
        "qa",  # correctness: right or wrong
        "context_qa",  # refer to example outputs
        "cot_qa",  # context_qa + reasoning
    ]
)

run_on_dataset(
    client=client,
    dataset_name="Elementary Animal Questions",
    llm_or_chain_factory=llm,
    evaluation=evaluation_config,
)

View the evaluation results for project '6fd126a7d9ea4d80b41d14047cb28b12-ChatOpenAI' at:
https://smith.langchain.com/projects/p/097c73a8-0808-4743-be60-ee95ce408109?eval=true


{'project_name': '6fd126a7d9ea4d80b41d14047cb28b12-ChatOpenAI',
 'results': {'03fc6975-64f6-41fb-af74-d1dddd6cead0': ['The blue whale (Balaenoptera musculus) holds the title for being the largest mammal. It is also the largest animal to have ever existed on Earth. Blue whales can reach lengths of up to 98 feet (30 meters) and can weigh over 200 tons.'],
  'f48df2c8-7d57-46b7-a5fc-c787362cc9f7': ['The main characteristic of amphibians is their ability to live both on land and in water. They have a dual life cycle, starting as aquatic larvae (such as tadpoles) and then transforming into terrestrial adults. Amphibians also have permeable skin that allows them to breathe through their skin, as well as lungs for breathing air. They typically lay eggs in water and undergo metamorphosis during their life cycle. Additionally, amphibians are cold-blooded and have a unique reproductive behavior, often involving courtship rituals and vocalizations.'],
  'f0444496-d769-4629-8254-3790fc420da8': ['M

In [13]:
# 2. Evaluate Datasets With Customized Criterias

evaluation_config = RunEvalConfig(
    evaluators=[
        # You can define an arbitrary criterion as a key: value pair in the criteria dict
        RunEvalConfig.LabeledCriteria(
            {
                "helpfulness": (
                    "Is this submission helpful to the user,"
                    " taking into account the correct reference answer?"
                )
            }
        ),
    ]
)

run_on_dataset(
    client=client,
    dataset_name="Elementary Animal Questions",
    llm_or_chain_factory=llm,
    evaluation=evaluation_config,
)

View the evaluation results for project '91c557f9ae6e4f06be87c029c09cc871-ChatOpenAI' at:
https://smith.langchain.com/projects/p/f2921fd9-ba78-4862-aa39-943021acd7b3?eval=true


{'project_name': '91c557f9ae6e4f06be87c029c09cc871-ChatOpenAI',
 'results': {'03fc6975-64f6-41fb-af74-d1dddd6cead0': ['The largest mammal is the blue whale (Balaenoptera musculus). It can reach lengths of up to 98 feet (30 meters) and weigh up to 200 tons.'],
  'f48df2c8-7d57-46b7-a5fc-c787362cc9f7': ['The main characteristic of amphibians is their ability to live both in water and on land. They typically have moist, thin skin that allows them to respire through their skin, as well as lungs or gills for breathing. Amphibians also undergo metamorphosis during their life cycle, starting as aquatic larvae and transforming into terrestrial adults.'],
  'b967d419-1e24-4336-b3f3-9196551db52e': ["Reptiles are known for several characteristics and features:\n\n1. Cold-blooded: Reptiles are ectothermic, which means they rely on external sources of heat to regulate their body temperature.\n\n2. Scales: They have scaly skin that helps to protect them from the environment and retain moisture.\n\n3

In [14]:
# 3. Evaluate Datasets Without Labels

evaluation_config = RunEvalConfig(
    evaluators=[
        # You can define an arbitrary criterion as a key: value pair in the criteria dict
        RunEvalConfig.Criteria(
            {"creativity": "Is this submission creative, imaginative, or novel?"}
        ),
        # We provide some simple default criteria like "conciseness" you can use as well
        RunEvalConfig.Criteria("conciseness"),
    ]
)

run_on_dataset(
    client=client,
    dataset_name="Rap Battle Dataset",
    llm_or_chain_factory=llm,
    evaluation=evaluation_config,
)

View the evaluation results for project 'e6a96e63db3c4e72a5f78a3b8b877073-ChatOpenAI' at:
https://smith.langchain.com/projects/p/2a570081-c7ac-456c-af66-b5b5bc05b25c?eval=true


{'project_name': 'e6a96e63db3c4e72a5f78a3b8b877073-ChatOpenAI',
 'results': {'05eb0539-aa8d-4b0a-b3f9-d8bf6bd1042c': ["Verse 1 (Barbie):\nYo, I'm Barbie, the queen of the dolls,\nI'll leave you in awe, watch as I break your walls.\nWith my pink Corvette and dreamhouse so fly,\nI'll show you a world you can't even imply.\n\nVerse 2 (Oppenheimer):\nI'm Oppenheimer, the father of the atomic bomb,\nI'll drop knowledge so heavy, you'll feel the calm.\nI split the atom and unleashed destruction,\nYour plastic world can't handle this eruption.\n\nVerse 3 (Barbie):\nOh, Oppenheimer, you may have made a bang,\nBut I'm the icon that forever will hang.\nFrom fashion to careers, I've done it all,\nWhile you're stuck with a bomb that made nations fall.\n\nVerse 4 (Oppenheimer):\nBarbie, you may have style, but I've got brains,\nI paved the way for a world that still remains.\nYour superficial existence won't last long,\nWhile my legacy in science is eternally strong.\n\nVerse 5 (Barbie):\nOppenheim

In [15]:
# 4. Evaluate Datasets Based on Cosine Distance Criteria
# Cosine Distance: Ranged Between 0 to 1. 0 = More Similar

evaluation_config = RunEvalConfig(
    evaluators=[
        # You can define an arbitrary criterion as a key: value pair in the criteria dict
        "embedding_distance",
        # Or to customize the embeddings:
        # Requires 'pip install sentence_transformers'
        # RunEvalConfig.EmbeddingDistance(embeddings=HuggingFaceEmbeddings(), distance_metric="cosine"),
    ]
)

run_on_dataset(
    client=client,
    dataset_name="Elementary Animal Questions",
    llm_or_chain_factory=llm,
    evaluation=evaluation_config,
)

View the evaluation results for project '2deda3cc7c644a109ba18b3ccdd9815c-ChatOpenAI' at:
https://smith.langchain.com/projects/p/9b5d57b1-41d3-4a37-be6d-6a46dc634c9e?eval=true


{'project_name': '2deda3cc7c644a109ba18b3ccdd9815c-ChatOpenAI',
 'results': {'03fc6975-64f6-41fb-af74-d1dddd6cead0': ['The blue whale (Balaenoptera musculus) is the largest mammal, and also the largest animal to have ever existed. They can reach lengths of up to 98 feet (30 meters) and can weigh up to 200 tons.'],
  'f48df2c8-7d57-46b7-a5fc-c787362cc9f7': ['The main characteristic of amphibians is their ability to live both in water and on land. They have a dual life cycle, starting as aquatic larvae and later undergoing metamorphosis to develop lungs and limbs for living on land as adults. Amphibians also have moist, permeable skin that allows them to breathe through their skin, as well as unique respiratory organs such as gills and lungs.'],
  'b967d419-1e24-4336-b3f3-9196551db52e': ['Reptiles are known for several distinguishing characteristics:\n\n1. Cold-bloodedness: Reptiles are ectothermic, which means they rely on external sources of heat to regulate their body temperature.\n\n

In [16]:
# 5. Evaluate Datasets Based on String Distance Criteria
# Jaro-Winkler Similarity Distance: 0 = Exact Match, 1 = No Similarity

evaluation_config = RunEvalConfig(
    evaluators=[
        # You can define an arbitrary criterion as a key: value pair in the criteria dict
        "string_distance",
        # Or to customize the distance metric:
        # RunEvalConfig.StringDistance(distance="levenshtein", normalize_score=True),
    ]
)

run_on_dataset(
    client=client,
    dataset_name="Elementary Animal Questions",
    llm_or_chain_factory=llm,
    evaluation=evaluation_config,
)

View the evaluation results for project '08faf61409cc4b988bdaff28abfa2bf3-ChatOpenAI' at:
https://smith.langchain.com/projects/p/640eee06-bb9c-406e-8470-4c6754a9e389?eval=true


{'project_name': '08faf61409cc4b988bdaff28abfa2bf3-ChatOpenAI',
 'results': {'03fc6975-64f6-41fb-af74-d1dddd6cead0': ['The largest mammal is the blue whale (Balaenoptera musculus). It can reach lengths of up to 98 feet (30 meters) and weigh up to 200 tons.'],
  'f48df2c8-7d57-46b7-a5fc-c787362cc9f7': ['The main characteristic of amphibians is their ability to live both on land and in water. They typically undergo metamorphosis, transitioning from an aquatic larval stage (such as tadpoles) to a terrestrial adult stage. Amphibians also have moist skin that allows them to breathe through their skin, as well as the ability to lay eggs in water.'],
  'f0444496-d769-4629-8254-3790fc420da8': ["Mammals and birds have several common characteristics, including:\n\n1. Endothermic metabolism: Both mammals and birds are warm-blooded, meaning they can regulate their body temperature internally.\n\n2. Vertebrate anatomy: Both mammals and birds have a backbone or vertebral column, which provides struc