In [1]:
import pandas as pd
from collections import defaultdict
from concurrent.futures import Future, ThreadPoolExecutor
from datetime import datetime, timedelta

from langsmith import Client
from tqdm.auto import tqdm

[API Docs](https://docs.smith.langchain.com/how_to_guides/tracing/export_traces)

In [4]:
langsmith_api = " " # <- key here
project_name = "Agent One | 4o" # <- project name here
num_days = 10 # <- days 

In [None]:
client = Client(api_key=langsmith_api)

llm_runs = client.list_runs(
    project_name=project_name,
    start_time=datetime.now() - timedelta(days=num_days),
    run_type="llm",
    select=["trace_id", "name", "run_type", "start_time", "end_time"], 
)

data = []
futures: list[Future] = []
trace_cursor = 0
trace_batch_size = 50

llm_runs_by_parent = defaultdict(lambda: defaultdict(set))

def process_trace_batch(trace_ids):
    return client.list_runs(
        project_name=project_name,
        run_ids=trace_ids,
        select=["name", "inputs", "outputs", "run_type", "start_time", "end_time"],  
    )

with ThreadPoolExecutor(max_workers=2) as executor:
    for run in tqdm(llm_runs, desc="Grouping LLM Runs by Trace ID"):
        llm_runs_by_parent[run.trace_id]["llms_involved"].add(run.name)

        if len(llm_runs_by_parent) % trace_batch_size == 0:
            current_traces = list(llm_runs_by_parent.keys())
            if this_batch := current_traces[trace_cursor: trace_cursor + trace_batch_size]:
                trace_cursor += trace_batch_size
                futures.append(
                    executor.submit(process_trace_batch, this_batch)
                )

    remaining_traces = list(llm_runs_by_parent.keys())[trace_cursor:]
    if remaining_traces:
        futures.append(
            executor.submit(process_trace_batch, remaining_traces)
        )

for future in tqdm(futures, desc="Processing Batches"):
    try:
        root_runs = future.result()
        for root_run in root_runs:
            print(dir(root_run))  # Add this line to inspect root_run
            root_data = llm_runs_by_parent[root_run.id]
            print(root_data)
            data.append(
                {
                    "run_id": root_run.id,
                    "run_name": root_run.name,
                    "run_type": root_run.run_type,
                    "start_time": root_run.start_time,  
                    "end_time": root_run.end_time, 
                    "inputs": root_run.inputs,
                    "outputs": root_run.outputs,
                    "llms_involved": list(root_data["llms_involved"]),
                }
            )
    except Exception as e:
        print(f"Error processing batch: {e}")

df = pd.DataFrame(data)

df['run_id'] = df['run_id'].astype(str)
df['start_time'] = pd.to_datetime(df['start_time'])
df['end_time'] = pd.to_datetime(df['end_time'])

display(df.head())

Grouping LLM Runs by Trace ID: 0it [00:00, ?it/s]

Processing Batches:   0%|          | 0/2 [00:00<?, ?it/s]

defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultdict(<class 'set'>, {'llms_involved': {'ChatOpenAI'}})
defaultd

Unnamed: 0,run_id,run_name,run_type,start_time,end_time,inputs,outputs,llms_involved
0,90ed7ff1-6762-450a-8aaf-2bb7e50fd56f,LangGraph,chain,2024-11-10 13:39:02.915854,2024-11-10 13:39:08.751190,"{'messages': [['user', 'im done of r tdoay. gi...","{'messages': [['user', 'Hello! I am Edward'], ...",[ChatOpenAI]
1,fa22b898-b4b7-42d0-af82-6673b798975c,LangGraph,chain,2024-11-10 13:38:32.702147,2024-11-10 13:38:35.793551,"{'messages': [['user', 'so whet we ahvent lear...","{'messages': [['user', 'Hello! I am Edward'], ...",[ChatOpenAI]
2,b2b846af-9998-4b17-a7d7-3fa7e7507753,LangGraph,chain,2024-11-10 13:38:16.337007,2024-11-10 13:38:22.022200,"{'messages': [['user', 'lets se full scope if ...","{'messages': [['user', 'Hello! I am Edward'], ...",[ChatOpenAI]
3,9f4cdd18-a04c-4c56-aa55-cde596074985,LangGraph,chain,2024-11-10 13:37:33.244703,2024-11-10 13:37:40.215952,"{'messages': [['user', 'ok']]}","{'messages': [['user', 'Hello! I am Edward'], ...",[ChatOpenAI]
4,23abb208-4cfa-48e1-a0bb-5c1ec2f1409b,LangGraph,chain,2024-11-10 13:37:22.501437,2024-11-10 13:37:26.085656,"{'messages': [['user', 'so what then?']]}","{'messages': [['user', 'Hello! I am Edward'], ...",[ChatOpenAI]


In [34]:
df.to_csv("output.csv", index=False)