In [2]:
%pip install --upgrade --quiet  langchain langsmith langchainhub --quiet
%pip install --upgrade --quiet  langchain-openai tiktoken pandas duckduckgo-search --quiet

In [3]:
import os
from uuid import uuid4

unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"LangSmith_WalkThrough"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "<Your LangSmith API Key>"  # Update to your API key


# Used by the agent in this tutorial
os.environ["OPENAI_API_KEY"] = "<Your OpenAI API Key>"

In [4]:
from langsmith import Client

client = Client()

In [5]:
from langchain import hub
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.tools import DuckDuckGoSearchResults
from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
from langchain_openai import ChatOpenAI

# Fetches the latest version of this prompt
prompt = hub.pull("wfh/langsmith-agent-prompt:5d466cbc")

llm = ChatOpenAI(
    model="gpt-3.5-turbo-16k",
    temperature=0,
)

tools = [
    DuckDuckGoSearchResults(
        name="duck_duck_go"
    ),  # General internet search using DuckDuckGo
]

llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

runnable_agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(
    agent=runnable_agent, tools=tools, handle_parsing_errors=True
)

In [23]:
print (prompt)

input_variables=['agent_scratchpad', 'input'] input_types={'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert senior software engineer. You are responsible for answering questions about LangChain. Use functions to consult the documentation before answering.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(variable_name='agent_scratchpad')]


In [6]:
inputs = [
    "What is LangChain?",
    "What's LangSmith?",
    "When was Llama-v2 released?",
    "What is the langsmith cookbook?",
    "When did langchain first announce the hub?",
]

results = agent_executor.batch([{"input": x} for x in inputs], return_exceptions=True)

In [19]:
results

[{'input': 'What is LangChain?',
  'output': 'I\'m sorry, but I couldn\'t find any information about "LangChain". Could you please provide more context or clarify your question?'},
 {'input': "What's LangSmith?",
  'output': 'I\'m sorry, but I couldn\'t find any information about "LangSmith". It could be a company, a product, or a person. Can you provide more context or details about what you are referring to?'},
 {'input': 'When was Llama-v2 released?',
  'output': 'Llama-v2 was released on July 18, 2023.'},
 {'input': 'What is the langsmith cookbook?',
  'output': 'The Langsmith Cookbook is a collection of recipes and cooking techniques created by Langsmith, a fictional character. It is a comprehensive guide that covers a wide range of cuisines and dishes. The cookbook includes step-by-step instructions, ingredient lists, and tips for successful cooking. Whether you are a beginner or an experienced cook, the Langsmith Cookbook can help you enhance your culinary skills and create deli

In [8]:
outputs = [
    "LangChain is an open-source framework for building applications using large language models. It is also the name of the company building LangSmith.",
    "LangSmith is a unified platform for debugging, testing, and monitoring language model applications and agents powered by LangChain",
    "July 18, 2023",
    "The langsmith cookbook is a github repository containing detailed examples of how to use LangSmith to debug, evaluate, and monitor large language model-powered applications.",
    "September 5, 2023",
]

In [10]:
dataset_name = f"LangSmith Walkthrough"

dataset = client.create_dataset(
    dataset_name,
    description="An example dataset of questions over the LangSmith documentation.",
)

client.create_examples(
    inputs=[{"input": query} for query in inputs],
    outputs=[{"output": answer} for answer in outputs],
    dataset_id=dataset.id,
)

In [22]:
from langchain import hub
from langchain.agents import AgentExecutor, AgentType, initialize_agent, load_tools
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
from langchain_openai import ChatOpenAI


# Since chains can be stateful (e.g. they can have memory), we provide
# a way to initialize a new chain for each row in the dataset. This is done
# by passing in a factory function that returns a new chain for each row.
def create_agent(prompt, llm_with_tools):
    runnable_agent = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_to_openai_function_messages(
                x["intermediate_steps"]
            ),
        }
        | prompt
        | llm_with_tools
        | OpenAIFunctionsAgentOutputParser()
    )
    return AgentExecutor(agent=runnable_agent, tools=tools, handle_parsing_errors=True)

In [12]:
from langsmith.evaluation import EvaluationResult, run_evaluator
from langsmith.schemas import Example, Run


@run_evaluator
def check_not_idk(run: Run, example: Example):
    """Illustration of a custom evaluator."""
    agent_response = run.outputs["output"]
    if "don't know" in agent_response or "not sure" in agent_response:
        score = 0
    else:
        score = 1
    # You can access the dataset labels in example.outputs[key]
    # You can also access the model inputs in run.inputs[key]
    return EvaluationResult(
        key="not_uncertain",
        score=score,
    )

In [13]:
from langchain.evaluation import EvaluatorType
from langchain.smith import RunEvalConfig

evaluation_config = RunEvalConfig(
    # Evaluators can either be an evaluator type (e.g., "qa", "criteria", "embedding_distance", etc.) or a configuration for that evaluator
    evaluators=[
        # Measures whether a QA response is "Correct", based on a reference answer
        # You can also select via the raw string "qa"
        EvaluatorType.QA,
        # Measure the embedding distance between the output and the reference answer
        # Equivalent to: EvalConfig.EmbeddingDistance(embeddings=OpenAIEmbeddings())
        EvaluatorType.EMBEDDING_DISTANCE,
        # Grade whether the output satisfies the stated criteria.
        # You can select a default one such as "helpfulness" or provide your own.
        RunEvalConfig.LabeledCriteria("helpfulness"),
        # The LabeledScoreString evaluator outputs a score on a scale from 1-10.
        # You can use default criteria or write our own rubric
        RunEvalConfig.LabeledScoreString(
            {
                "accuracy": """
Score 1: The answer is completely unrelated to the reference.
Score 3: The answer has minor relevance but does not align with the reference.
Score 5: The answer has moderate relevance but contains inaccuracies.
Score 7: The answer aligns with the reference but has minor errors or omissions.
Score 10: The answer is completely accurate and aligns perfectly with the reference."""
            },
            normalize_by=10,
        ),
    ],
    # You can add custom StringEvaluator or RunEvaluator objects here as well, which will automatically be
    # applied to each prediction. Check out the docs for examples.
    custom_evaluators=[check_not_idk],
)

In [16]:
from langchain import hub

# We will test this version of the prompt
prompt = hub.pull("wfh/langsmith-agent-prompt:798e7324")
print (prompt)

input_variables=['agent_scratchpad', 'input'] input_types={'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert senior software engineer. You are responsible for answering questions about LangChain. Use functions to consult the documentation before answering.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(variable_name='agent_scratchpad')]


In [15]:
import functools

from langchain.smith import arun_on_dataset, run_on_dataset

chain_results = run_on_dataset(
    dataset_name=dataset_name,
    llm_or_chain_factory=functools.partial(
        create_agent, prompt=prompt, llm_with_tools=llm_with_tools
    ),
    evaluation=evaluation_config,
    verbose=True,
    client=client,
    project_name=f"runnable-agent-test-5d466cbc-{unique_id}",
    # Project metadata communicates the experiment parameters,
    # Useful for reviewing the test results
    project_metadata={
        "env": "testing-notebook",
        "model": "gpt-3.5-turbo",
        "prompt": "5d466cbc",
    },
)

# Sometimes, the agent will error due to parsing issues, incompatible tool inputs, etc.
# These are logged as warnings here and captured as errors in the tracing UI.

View the evaluation results for project 'runnable-agent-test-5d466cbc-8d12025e' at:
https://smith.langchain.com/o/6427e792-08d3-562b-9f1d-a5e4094e5768/datasets/2d116ed2-af22-429a-b1f8-3f2f6518f295/compare?selectedSessions=aa968a26-eef5-41cc-a375-75db4a609a17

View all tests for Dataset LangSmith Walkthrough at:
https://smith.langchain.com/o/6427e792-08d3-562b-9f1d-a5e4094e5768/datasets/2d116ed2-af22-429a-b1f8-3f2f6518f295
[------------------------------------------------->] 5/5
 Experiment Results:
        feedback.correctness  feedback.embedding_cosine_distance  feedback.helpfulness  feedback.score_string:accuracy  feedback.not_uncertain error  execution_time                                run_id
count                   5.00                                5.00                  5.00                            5.00                    5.00     0            5.00                                     5
unique                   NaN                                 NaN                   NaN    

In [17]:
chain_results.to_dataframe()

Unnamed: 0,inputs.input,outputs.input,outputs.output,reference.output,feedback.correctness,feedback.embedding_cosine_distance,feedback.helpfulness,feedback.score_string:accuracy,feedback.not_uncertain,error,execution_time,run_id
2c28f665-0339-41bf-a321-c64d5955fba3,When did langchain first announce the hub?,When did langchain first announce the hub?,LangChain first announced the LangChain Hub on...,"September 5, 2023",1,0.153003,1,1.0,1,,3.616676,f49cd719-b16a-423f-a9d2-a8d8e80cc6b6
a5c4c002-3d65-4a7e-a5d2-8fbd2e999aea,What is the langsmith cookbook?,What is the langsmith cookbook?,The LangSmith Cookbook is a collection of code...,The langsmith cookbook is a github repository ...,1,0.04259,1,0.9,1,,4.373461,808a30d5-6a46-4167-9505-cc789387f835
49b521ed-ac39-4621-b7f5-3a1c71c32f2b,When was Llama-v2 released?,When was Llama-v2 released?,"Llama-v2 was released on July 18, 2023.","July 18, 2023",1,0.138944,1,1.0,1,,3.313871,bba5d3a9-38e1-4865-a586-d1018b5e8f10
80d51f0b-6a74-4fc7-a668-5b04798cc5d0,What's LangSmith?,What's LangSmith?,LangSmith is a platform that helps developers ...,"LangSmith is a unified platform for debugging,...",1,0.061878,1,0.7,1,,4.353315,7389d09c-9a5a-44ac-9c40-983aaba7ee0e
090f1511-b2e2-4615-9348-86f4a7ef186a,What is LangChain?,What is LangChain?,LangChain is a decentralized blockchain platfo...,LangChain is an open-source framework for buil...,0,0.092256,0,0.1,1,,2.366538,d057870e-8ffc-4e98-a933-3d69a0d4c729



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

