In [133]:
from analysis.read_logs import load_benchmark_dir
import duckdb
import plotly.express as px
import polars as pl

In [134]:
data = load_benchmark_dir("/Users/arc/iavl-bench-data/run-mixed-large2")

In [135]:
def calculate_batch_ops_per_sec(versions_df, batch_size=100):
    """Calculate ops_per_sec for every batch_size versions by summing counts and durations."""
    return duckdb.sql(f"""
        SELECT 
            CEIL(version / {batch_size}.0) * {batch_size} as version,
            SUM(count) / (SUM(duration) / 1000000000.0) as ops_per_sec
        FROM versions_df
        GROUP BY CEIL(version / {batch_size}.0)
        ORDER BY version
    """).pl()

In [136]:
# Apply the function to each benchmark dataset
batch_size = 100
batch_results = {}
for  benchmark in data:
    batch_results[benchmark.name] = calculate_batch_ops_per_sec(benchmark.versions_df, batch_size=batch_size)

In [137]:
# Combine all datasets into one dataframe for plotly
plot_data = []
for name, df in batch_results.items():
    df_with_name = df.with_columns(pl.lit(name).alias('dataset'))
    plot_data.append(df_with_name)

combined_df = pl.concat(plot_data)

# Create interactive line chart
fig = px.line(combined_df, 
              x='version', 
              y='ops_per_sec', 
              color='dataset',
              title=f'Batch Operations per Second (Every {batch_size} Versions)',
              labels={'version': 'Version Batch', 'ops_per_sec': 'Operations per Second'})

# Set x-axis ticks every 1000 versions
max_version = combined_df['version'].max()
fig.update_xaxes(dtick=1000, range=[0, max_version])

# Make plot taller
fig.update_layout(height=600)

fig.show()

In [138]:
# Convert memory chart to plotly
mem_plot_data = []
for d in data:
    df_with_name = d.versions_df.with_columns(pl.lit(d.name).alias('dataset'))
    mem_plot_data.append(df_with_name)

mem_combined_df = pl.concat(mem_plot_data)

# Create interactive memory chart with no markers and thin lines
fig = px.line(mem_combined_df, 
              x='version', 
              y='mem_gb', 
              color='dataset',
              markers=False,
              title='Memory Usage',
              labels={'version': 'Version', 'mem_gb': 'Mem (GB)'})

# Update line width to be thin and make plot taller
fig.update_traces(line=dict(width=1))
fig.update_layout(height=600)

fig.show()

In [139]:
def get_gc_pauses(versions):
    last_pause = versions[0].full_stats.mem_stats["PauseTotalNs"]
    pauses = []
    for v in versions:
        current_pause = v.full_stats.mem_stats["PauseTotalNs"]
        pause = (current_pause - last_pause) / 1_000_000_000
        pauses.append(pause)
        last_pause = current_pause
    return pauses

In [140]:
data_dict = {d.name: d for d in data}
iavl_evict_20 = data_dict["iavl-v2-alpha6-evict-20"]

In [141]:
px.line(calculate_batch_ops_per_sec(iavl_evict_20.versions_df), x='version', y='ops_per_sec')

In [142]:
px.line(get_gc_pauses(iavl_evict_20.versions))