In [1]:
!pip install langfuse openai langchain --upgrade

Collecting langfuse
  Obtaining dependency information for langfuse from https://files.pythonhosted.org/packages/35/b6/0a4a5a3914f2648176bf4af4959d27c79389eada49c2111946d30e9a5d5f/langfuse-1.1.9-py3-none-any.whl.metadata
  Downloading langfuse-1.1.9-py3-none-any.whl.metadata (3.1 kB)
Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/1e/9f/385c25502f437686e4aa715969e5eaf5c2cb5e5ffa7c5cdd52f3c6ae967a/openai-0.28.1-py3-none-any.whl.metadata
  Using cached openai-0.28.1-py3-none-any.whl.metadata (11 kB)
Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/42/4e/86204994aeb2e4ac367a7fade896b13532eae2430299052eb2c80ca35d2c/langchain-0.0.329-py3-none-any.whl.metadata
  Downloading langchain-0.0.329-py3-none-any.whl.metadata (16 kB)
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/f8/f9/66b03ceac5e7ac45392d3ab207e029ec4d13296cff5b

In [2]:
from dotenv import load_dotenv
load_dotenv()

from langfuse import Langfuse
 
langfuse = Langfuse()

In [3]:
from langfuse.model import CreateDatasetRequest
 
langfuse.create_dataset(CreateDatasetRequest(name="capital_cities"))

Dataset(id='clohk78do0001u03a7dvu5e5a', name='capital_cities', status=<DatasetStatus.ACTIVE: 'ACTIVE'>, project_id='clohajzh20013ns3b9s3mh9wk', created_at=datetime.datetime(2023, 11, 2, 19, 7, 56, 844000, tzinfo=datetime.timezone.utc), updated_at=datetime.datetime(2023, 11, 2, 19, 7, 56, 844000, tzinfo=datetime.timezone.utc), items=[], runs=[])

In [4]:
# example items, could also be json instead of strings
local_items = [
    {"input": {"country": "Italy"}, "expected_output": "Rome"},
    {"input": {"country": "Spain"}, "expected_output": "Madrid"},
    {"input": {"country": "Brazil"}, "expected_output": "Brasília"},
    {"input": {"country": "Japan"}, "expected_output": "Tokyo"},
    {"input": {"country": "India"}, "expected_output": "New Delhi"},
    {"input": {"country": "Canada"}, "expected_output": "Ottawa"},
    {"input": {"country": "South Korea"}, "expected_output": "Seoul"},
    {"input": {"country": "Argentina"}, "expected_output": "Buenos Aires"},
    {"input": {"country": "South Africa"}, "expected_output": "Pretoria"},
    {"input": {"country": "Egypt"}, "expected_output": "Cairo"},
]

In [5]:
from langfuse.model import CreateDatasetItemRequest
 
# Upload to Langfuse
for item in local_items:
  langfuse.create_dataset_item(
    CreateDatasetItemRequest(
        dataset_name="capital_cities",
        # any python object or value
        input=item["input"],
        # any python object or value, optional
        expected_output=item["expected_output"]
    )
)

In [6]:
# we use a very simple eval here, you can use any eval library
def simple_evaluation(output, expected_output):
  return output == expected_output

In [13]:
from datetime import datetime
from langfuse.client import InitialGeneration
 
def run_my_custom_llm_app(input, system_prompt):
  messages = [
      {"role":"system", "content": system_prompt},
      {"role":"user", "content": input["country"]}
  ]
 
  generationStartTime = datetime.now()
 
  openai_completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=messages
  ).choices[0].message.content
 
  langfuse_generation = langfuse.generation(InitialGeneration(
      name="guess-countries",
      prompt=messages,
      completion=openai_completion,
      model="gpt-3.5-turbo",
      startTime=generationStartTime,
      endTime=datetime.now()
  ))
 
  return openai_completion, langfuse_generation

In [14]:
from langfuse.client import CreateScore
 
def run_experiment(experiment_name, system_prompt):
  dataset = langfuse.get_dataset("capital_cities")
 
  for item in dataset.items:
    completion, langfuse_generation = run_my_custom_llm_app(item.input, system_prompt)
 
    item.link(langfuse_generation, experiment_name) # pas the observation/generation object or the id
 
    langfuse_generation.score(CreateScore(
      name="exact_match",
      value=simple_evaluation(completion, item.expected_output)
    ))

In [15]:
run_experiment(
    "famous_city",
    "The user will input countries, respond with the mst famous city in this country"
)
run_experiment(
    "directly_ask",
    "What is the capital of the following country?"
)
run_experiment(
    "asking_specifically",
    "The user will input countries, respond with only the name of the capital"
)
run_experiment(
    "asking_specifically_2nd_try",
    "The user will input countries, respond with only the name of the capital. State only the name of the city."
)

RateLimitError: Rate limit reached for gpt-3.5-turbo in organization org-hlc7lWO9D9K29sQNyGlPfR55 on requests per min. Limit: 3 / min. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.