In [4]:
import os
from langsmith import Client

from dotenv import load_dotenv
load_dotenv('./.env')

os.environ["LANGCHAIN_PROJECT"] = "Demo LangSmith 03"

In [5]:
client = Client()

In [6]:
# Define dataset

dataset_name = "QA Example Dataset"
dataset = client.create_dataset(dataset_name)
client.create_examples(
    inputs=[
        {"question": "O que é LangChain?"},
        {"question": "O que é LangSmith?"},
        {"question": "O que é OpenAI?"},
        {"question": "O que é Google?"},
        {"question": "O que é Mistral?"},
    ],
    outputs=[
        {"answer": "Um framework para desenvolver aplicações LLM"},
        {"answer": "Uma plataforma de observabilidade para aplicações LLM"},
        {"answer": "Uma empresa que cria LLMs"},
        {"answer": "Uma empresa de tecnologia conhecida por dominar buscas"},
        {"answer": "Uma empresa que cria LLMs"},
    ],
    dataset_id=dataset.id,
)

In [26]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts.prompt import PromptTemplate
from langsmith.evaluation import LangChainStringEvaluator

_PROMPT_TEMPLATE = """Você é um professor especialista em avaliar respostas de alunos.
Você está avaliando a seguinte pergunta:
{query}
Aqui está a resposta real:
{answer}
Você está avaliando a seguinte resposta prevista:
{result}
Responda com CORRETO ou INCORRETO:
Avaliação:
"""

PROMPT = PromptTemplate(
    input_variables=["query", "answer", "result"], template=_PROMPT_TEMPLATE
)
eval_llm = ChatOpenAI(temperature=0.0, model_name="gpt-4o-mini")

qa_evaluator = LangChainStringEvaluator("qa", config={"llm": eval_llm, "prompt": PROMPT})

In [27]:
from langsmith.schemas import Run, Example

def evaluate_length(run: Run, example: Example) -> dict:
    prediction = run.outputs.get("output") or ""
    required = example.outputs.get("answer") or ""
    score = int(len(prediction) < 2 * len(required))
    return {"key":"length", "score": score}

In [28]:
import openai

openai_client = openai.Client()

def my_app(question):
    return openai_client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "Responda às perguntas dos usuários de forma curta e concisa (uma frase curta)."
            },
            {
                "role": "user",
                "content": question,
            }
        ],
    ).choices[0].message.content

In [29]:
def langsmith_app(inputs):
    output = my_app(inputs["question"])
    return {"output": output}

In [30]:
from langsmith.evaluation import evaluate

experiment_results = evaluate(
    langsmith_app,
    data=dataset_name,
    evaluators=[evaluate_length, qa_evaluator],
    experiment_prefix="openai-4o-mini",
)

View the evaluation results for experiment: 'openai-4o-mini-05271edc' at:
https://smith.langchain.com/o/f8237160-33a4-53f2-b52e-75bb63c1854e/datasets/1de0865d-d403-4018-9035-f34a74c11209/compare?selectedSessions=a4223239-f3f5-4c7a-aece-301bd7075774




0it [00:00, ?it/s]

In [31]:
import openai

openai_client = openai.Client()

def my_app_2(question):
    return openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "Responda às perguntas dos usuários de forma curta e concisa (uma frase curta)."
            },
            {
                "role": "user",
                "content": question,
            }
        ],
    ).choices[0].message.content


def langsmith_app_2(inputs):
    output = my_app_2(inputs["question"])
    return {"output": output}

from langsmith.evaluation import evaluate

experiment_results = evaluate(
    langsmith_app_2, 
    data=dataset_name, 
    evaluators=[evaluate_length, qa_evaluator], 
    experiment_prefix="openai-gpt-3.5-turbo",
)

View the evaluation results for experiment: 'openai-gpt-3.5-turbo-1097f19a' at:
https://smith.langchain.com/o/f8237160-33a4-53f2-b52e-75bb63c1854e/datasets/1de0865d-d403-4018-9035-f34a74c11209/compare?selectedSessions=23acec67-9e0a-4230-b3ac-0fab4a6aa4b4




0it [00:00, ?it/s]