In [1]:
from __future__ import annotations

from dotenv import load_dotenv
from langsmith import Client

import pandas as pd

In [2]:
load_dotenv()

True

In [3]:
def extract_latency(run):
    return (run.end_time - run.start_time).total_seconds()

In [19]:
client = Client()
# Download all runs in a project
langchain_project_runs = client.list_runs(project_name="latency-openai-summarymem")
langchain_runs = list(langchain_project_runs)

zep_project_runs = client.list_runs(project_name="latency-exploration")
zep_runs = list(zep_project_runs)

In [28]:
summarizer_mem_runs = [
    run
    for run in langchain_runs
    if run.run_type == "chain"
    and "LLMChain" in run.name
    and run.parent_run_id is None
    and "summary" in run.inputs
]

summarizer_mem_runs_latency = [extract_latency(run) for run in summarizer_mem_runs]

summarizer_mem_runs_latency_s = pd.Series(summarizer_mem_runs_latency)
summarizer_mem_runs_latency_s.describe(percentiles=[0.5, 0.9, 0.95, 0.99])

count    50.000000
mean      5.822905
std       3.010595
min       1.472577
50%       5.520660
90%      10.278092
95%      11.394816
99%      12.723813
max      13.681910
dtype: float64

In [27]:
zep_retriever_runs = [
    run for run in zep_runs if run.run_type == "retriever" and "zep" in run.tags[0]
]
zep_retriever_latency = [extract_latency(run) for run in zep_retriever_runs]
zep_retriever_latency_s = pd.Series(zep_retriever_latency)
zep_retriever_latency_s.describe(percentiles=[0.5, 0.9, 0.95, 0.99])

count    50.000000
mean      0.072708
std       0.042126
min       0.039604
50%       0.058675
90%       0.119549
95%       0.171659
99%       0.211197
max       0.231698
dtype: float64

In [29]:
open_ai_chroma_retriever_runs = [
    run
    for run in langchain_runs
    if run.run_type == "retriever" and any("openai" in tag.lower() for tag in run.tags)
]
open_ai_chroma_retriever_latency = [
    extract_latency(run) for run in open_ai_chroma_retriever_runs
]

open_ai_chroma_retriever_latency_s = pd.Series(open_ai_chroma_retriever_latency)
open_ai_chroma_retriever_latency_s.describe(percentiles=[0.5, 0.9, 0.95, 0.99])

count    50.000000
mean      0.187475
std       0.302954
min       0.090668
50%       0.121353
90%       0.214180
95%       0.337083
99%       1.388666
max       2.216810
dtype: float64

In [31]:
lanchain_chain_runs = [
    run
    for run in langchain_runs
    if run.run_type == "chain"
    and "LLMChain" not in run.name
    and run.parent_run_id is None
]

lanchain_chain_runs_latency = [extract_latency(run) for run in lanchain_chain_runs]

lanchain_chain_runs_latency_s = pd.Series(lanchain_chain_runs_latency)
lanchain_chain_runs_latency_s.describe(percentiles=[0.5, 0.9, 0.95, 0.99])

count      5.000000
mean     152.300911
std      142.875848
min       72.070114
50%       79.294726
90%      295.768399
95%      349.722451
99%      392.885693
max      403.676503
dtype: float64

In [39]:
lanchain_chain_runs_tokens = [run.extra.get("total_tokens") for run in lanchain_chain_runs]

lanchain_chain_runs_tokens_s = pd.Series(lanchain_chain_runs_tokens)
lanchain_chain_runs_tokens_s.describe()

count       5.000000
mean     6931.600000
std       239.272439
min      6569.000000
25%      6914.000000
50%      6951.000000
75%      6986.000000
max      7238.000000
dtype: float64

In [36]:
zep_chain_runs = [
    run
    for run in zep_runs
    if run.run_type == "chain"
    and "LLMChain" not in run.name
    and run.parent_run_id is None
]

zep_chain_runs_latency = [extract_latency(run) for run in zep_chain_runs]

zep_chain_runs_latency_s = pd.Series(zep_chain_runs_latency)
zep_chain_runs_latency_s.describe(percentiles=[0.5, 0.9, 0.95, 0.99])

count     5.000000
mean     22.754092
std      13.678106
min      15.453398
50%      17.104494
90%      35.487205
95%      41.324212
99%      45.993818
max      47.161219
dtype: float64

In [40]:
zep_chain_runs_tokens = [run.extra.get("total_tokens") for run in zep_chain_runs]

zep_chain_runs_tokens_s = pd.Series(zep_chain_runs_tokens)
zep_chain_runs_tokens_s.describe()

count       5.000000
mean     6439.600000
std       337.502296
min      5891.000000
25%      6347.000000
50%      6581.000000
75%      6665.000000
max      6714.000000
dtype: float64