# Upload NAT Calculator Traces to DBNL

This notebook uploads OTEL traces from the NAT calculator agent to DBNL for analysis.

**Prerequisites:**
- DBNL sandbox running (`dbnl sandbox start`)
- Trace files generated via `run_batch.py` or downloaded from S3

In [None]:
import dbnl
import pandas as pd
import json
from datetime import UTC, datetime, timedelta
import numpy as np
import random
from nat_otel_converter import dbnl_df_from_otel_file
from data_augment_helpers import compute_expected_answer, compute_feedback, compute_abs_error, est_cost_from_llm_tokens

print("dbnl version:", dbnl.__version__)

In [None]:
# Login to DBNL (using default Sandbox url)
app_url = "http://localhost:8080/"
dbnl.login(
    api_url="http://localhost:8080/api",
    api_token="<DBNL_API_KEY>", # found at http://localhost:8080/tokens
)

In [None]:
# Create project
project = dbnl.get_or_create_project(
    name="NAT Calculator HPO Example",
    schedule="daily",
    default_llm_model_name="quickstart_model" # From step (2) in quickstart
)

In [None]:
# Helper functions

def break_into_days(df, start_day, num_days, variation=0.10):
    """
    Split dataframe into daily chunks with Â±variation in size.
    Ensures all rows are used exactly once.
    """
    total_rows = len(df)
    base = total_rows / num_days
    sizes = []
    for _ in range(num_days):
        factor = 1 + np.random.uniform(-variation, variation)
        sizes.append(int(base * factor))

    scale = total_rows / sum(sizes)
    sizes = [int(s * scale) for s in sizes]
    sizes[-1] = total_rows - sum(sizes[:-1])

    day_dfs = []
    idx = 0
    for day_idx, size in enumerate(sizes):
        chunk = df.iloc[idx : idx + size].copy()
        idx += size

        chunk["timestamp"] = start_day.replace(hour=12) + timedelta(days=day_idx)

        for row in chunk.itertuples(index=True):
            chunk.at[row.Index, "timestamp"] = randomize_timestamps_within_day(row)
            
        day_dfs.append(chunk.reset_index(drop=True))

    return day_dfs

def randomize_timestamps_within_day(row):
    rand_hour = random.randint(0, 23)
    rand_min = random.randint(0, 59)
    rand_sec = random.randint(0, 59)
    return row.timestamp.replace(
                hour=rand_hour,
                minute=rand_min,
                second=rand_sec,
    )

def upload_to_dbnl(day_dfs):
    """Upload daily dataframes to DBNL."""
    print(f"Status: {app_url}/ns/{project.namespace_id}/projects/{project.id}/status")
    for idx, day_df in enumerate(day_dfs):
        print(f"{idx + 1}/{len(day_dfs)} uploading {min(day_df['timestamp']).date()} : {len(day_df)} traces.")
        data_start = min(day_df['timestamp']).replace(hour=0, minute=0, second=0, microsecond=0)
        data_end = data_start + timedelta(days=1)
        try:
            dbnl.log(
                project_id=project.id,
                data_start_time=data_start,
                data_end_time=data_end,
                data=day_df,
            )
        except Exception as e:
            if "Data already exists" in str(e):
                print("  Data already exists, skipping...")
                continue
            raise
    print(f"\nExplore: {app_url}/ns/{project.namespace_id}/projects/{project.id}")

## Upload v0 Traces (Pre-Optimization)

These traces have `hyper_error_term=1.0` which causes errors on calculations with larger numbers.

In [None]:
# Load, augment, and upload v0 traces
df_v0 = dbnl_df_from_otel_file("traces_v0.jsonl")
print(f"Loaded {len(df_v0)} v0 traces")

df_v0["output_expected"] = df_v0["input"].apply(compute_expected_answer)
df_v0[["feedback_score", "feedback_text"]] = df_v0.apply(compute_feedback, axis=1)
df_v0["absolute_error"] = df_v0.apply(compute_abs_error, axis=1)
df_v0["total_cost"] = df_v0["traces_data"].apply(est_cost_from_llm_tokens)
df_v0["agent_version"] = json.dumps("v0")

# Upload as 8 days of data, ending 8 days ago
day_dfs_v0 = break_into_days(df_v0, start_day=datetime.now(tz=UTC) - timedelta(days=16), num_days=8)
upload_to_dbnl(day_dfs_v0)

## Upload v1 Traces (Post-Optimization)

These traces have `hyper_error_term=0.0` (optimized value) which fixes the calculation errors.

In [None]:
# Load, augment, and upload v1 traces
df_v1 = dbnl_df_from_otel_file("traces_v1.jsonl")
print(f"Loaded {len(df_v1)} v1 traces")

df_v1["output_expected"] = df_v1["input"].apply(compute_expected_answer)
df_v1[["feedback_score", "feedback_text"]] = df_v1.apply(compute_feedback, axis=1)
df_v1["absolute_error"] = df_v1.apply(compute_abs_error, axis=1)
df_v1["total_cost"] = df_v1["traces_data"].apply(est_cost_from_llm_tokens)
df_v0["agent_version"] = json.dumps("v1")

# Upload as 8 days of data, ending 8 days ago
day_dfs_v1 = break_into_days(df_v1, start_day=datetime.now(tz=UTC) - timedelta(days=8), num_days=8)
upload_to_dbnl(day_dfs_v1)

## View Results in DBNL

In the DBNL dashboard, you should see:
- **v0 period**: Higher `absolute_error` values, especially for calculations with larger numbers
- **v1 period**: Near-zero `absolute_error` across all calculations

The `agent_version` field lets you segment by version to compare performance.