In [47]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from IPython.display import display

# Use a dark Plotly template across all charts
pio.templates.default = "plotly_dark"


In [48]:
nhv_flash = pd.read_csv("benchmarks/nhv_flash.csv")
nhv_dash = pd.read_csv("benchmarks/nhv_dash.csv")
cpu_flash = pd.read_csv("benchmarks/cpu_bound_flash.csv")
cpu_dash = pd.read_csv("benchmarks/cpu_bound_dash.csv")
cpu_w_sse_flash = pd.read_csv("benchmarks/cpu_bound_flash_w_sse.csv")
cpu_w_sse_dash = pd.read_csv("benchmarks/cpu_bound_dash_w_sse.csv")
super_heavy_net = pd.read_csv("benchmarks/super_heavy_net.csv")

In [None]:
# Overlayed distributions by framework for Heavy network and CPU bound scenarios
import pandas as pd
import numpy as np
import plotly.express as px

# Shared colors across all charts
COLOR_MAP = {'Dash': '#636EFA', 'Flash': '#EF553B'}  # blue/red

# Helper to build an overlay histogram with per-framework medians

def overlay_hist(df_dash, df_flash, scenario_title: str, nbins: int = 50):
    col = 'response-time'
    # Prepare tidy data
    d1 = df_dash[[col]].rename(columns={col: 'response_time'}).dropna().copy()
    d1['framework'] = 'Dash'
    d2 = df_flash[[col]].rename(columns={col: 'response_time'}).dropna().copy()
    d2['framework'] = 'Flash'
    dfc = pd.concat([d1, d2], ignore_index=True)

    fig = px.histogram(
        dfc,
        x='response_time',
        color='framework',
        barmode='overlay',
        histnorm='probability density',
        nbins=nbins,
        opacity=0.55,
        color_discrete_map=COLOR_MAP,
        labels={'response_time': 'Response time (s)', 'framework': 'Framework'},
        title=f"{scenario_title}: Response time distribution"
    )

    # Add per-framework medians as reference lines
    med_dash = d1['response_time'].median() if len(d1) else np.nan
    med_flash = d2['response_time'].median() if len(d2) else np.nan
    if pd.notna(med_dash):
        fig.add_vline(
            x=med_dash,
            line_dash='dash',
            line_color=COLOR_MAP['Dash'],
            annotation_text=f"Dash median {med_dash:.2f}s",
            annotation_position='top right'
        )
    if pd.notna(med_flash):
        fig.add_vline(
            x=med_flash,
            line_dash='dash',
            line_color=COLOR_MAP['Flash'],
            annotation_text=f"Flash median {med_flash:.2f}s",
            annotation_position='top left'
        )

    fig.update_layout(legend_title_text='')
    return fig

# Heavy network scenario (nhv_* data)
fig_nhv = overlay_hist(nhv_dash, nhv_flash, 'Heavy network')
fig_nhv.show()

# CPU bound scenario (cpu_* data)
fig_cpu = overlay_hist(cpu_dash, cpu_flash, 'CPU bound')
fig_cpu.show()

fig_cpu_w_sse = overlay_hist(cpu_w_sse_dash, cpu_w_sse_flash, 'CPU bound with SSE')
fig_cpu_w_sse.show()


In [50]:
# Requests/sec per framework and per scenario (no combined rows) — nicer presentation
import pandas as pd
import numpy as np
import plotly.express as px
from IPython.display import display

# Reuse COLOR_MAP if defined earlier; else define here
try:
    COLOR_MAP
except NameError:
    COLOR_MAP = {'Dash': '#636EFA', 'Flash': '#EF553B'}


def calc_rps(df: pd.DataFrame) -> tuple[int, float, float]:
    n = len(df)
    if 'offset' in df.columns and n > 1:
        t_min = float(df['offset'].min())
        t_max = float(df['offset'].max())
        elapsed = max(1e-9, t_max - t_min)  # avoid divide-by-zero
        rps = n / elapsed
    else:
        elapsed = float('nan')
        rps = float('nan')
    return n, elapsed, rps

rows = []
for scenario, (fw1, df1), (fw2, df2) in [
    ("Heavy network", ("Dash", nhv_dash), ("Flash", nhv_flash)),
    ("CPU bound", ("Dash", cpu_dash), ("Flash", cpu_flash)),
    ("CPU bound with SSE", ("Dash", cpu_w_sse_dash), ("Flash", cpu_w_sse_flash)),
]:
    for fw, df in [(fw1, df1), (fw2, df2)]:
        n, elapsed, rps = calc_rps(df)
        rows.append({
            'scenario': scenario,
            'framework': fw,
            'total_requests': n,
            'elapsed_seconds': elapsed,
            'requests_per_sec': rps,
        })

summary_rps = pd.DataFrame(rows)

# Pretty tables per scenario (pandas-version compatible index hiding)

def pretty_table(df: pd.DataFrame, title: str):
    d = df.copy()
    d['elapsed_seconds'] = d['elapsed_seconds'].round(2)
    d['requests_per_sec'] = d['requests_per_sec'].round(4)
    styler = (
        d[['framework', 'total_requests', 'elapsed_seconds', 'requests_per_sec']]
        .rename(columns={
            'total_requests': 'requests',
            'elapsed_seconds': 'elapsed (s)',
            'requests_per_sec': 'req/s',
        })
        .style
        .set_caption(title)
        .background_gradient(subset=['req/s'], cmap='Blues')
        .set_properties(**{'font-size': '12pt'})
    )
    # Try hide/hide_index if available; else CSS fallback
    try:
        if hasattr(styler, 'hide_index'):
            styler = styler.hide_index()
        elif hasattr(styler, 'hide'):
            styler = styler.hide(axis='index')
        else:
            raise AttributeError
    except Exception:
        styler = styler.set_table_styles(
            [
                {'selector': 'th.row_heading', 'props': [('display', 'none')]},
                {'selector': 'th.blank', 'props': [('display', 'none')]},
            ],
            overwrite=False,
        )
    display(styler)

pretty_table(summary_rps.query("scenario == 'Heavy network'"), "Heavy network — totals")
pretty_table(summary_rps.query("scenario == 'CPU bound'"), "CPU bound — totals")
pretty_table(summary_rps.query("scenario == 'CPU bound with SSE'"), "CPU bound with SSE — totals")

# Bar charts per scenario with big labels

def rps_bar(df: pd.DataFrame, scenario_title: str):
    d = df.copy()
    d['requests_per_sec'] = d['requests_per_sec'].astype(float)
    fig = px.bar(
        d,
        x='framework', y='requests_per_sec', color='framework',
        text=d['requests_per_sec'].map(lambda x: f"{x:.3f}"),
        labels={'framework': 'Framework', 'requests_per_sec': 'Requests/sec'},
        title=f"{scenario_title}: Requests/sec (total)",
        color_discrete_map=COLOR_MAP,
    )
    fig.update_traces(textposition='outside', cliponaxis=False)
    fig.update_layout(yaxis=dict(tickformat='.2f'), uniformtext_minsize=10, uniformtext_mode='hide', showlegend=False)
    fig.show()

rps_bar(summary_rps.query("scenario == 'Heavy network'"), "Heavy network")
rps_bar(summary_rps.query("scenario == 'CPU bound'"), "CPU bound")
rps_bar(summary_rps.query("scenario == 'CPU bound with SSE'"), "CPU bound with SSE")

framework,requests,elapsed (s),req/s
Dash,100,115.36,0.8669
Flash,100,26.25,3.8088


framework,requests,elapsed (s),req/s
Dash,200,48.21,4.1489
Flash,200,42.55,4.7002


framework,requests,elapsed (s),req/s
Dash,200,62.97,3.176
Flash,200,43.45,4.603


In [51]:
# Requests per second (RPS) over time with consistent colors
import pandas as pd
import numpy as np
import plotly.express as px

try:
    COLOR_MAP
except NameError:
    COLOR_MAP = {'Dash': '#636EFA', 'Flash': '#EF553B'}

# We treat 'offset' (s) as the event time; group by floor-second to compute RPS

def compute_rps(df: pd.DataFrame) -> pd.DataFrame:
    if 'offset' not in df.columns:
        raise ValueError('CSV missing required column: offset')
    t = df['offset'].astype(float).values
    sec = np.floor(t).astype(int)
    rps = (
        pd.DataFrame({'sec': sec})
        .value_counts()
        .rename('count')
        .reset_index()
        .sort_values('sec')
    )
    rps['rps'] = rps['count']
    rps = rps[['sec', 'rps']]
    return rps


def overlay_rps(df_dash: pd.DataFrame, df_flash: pd.DataFrame, scenario_title: str):
    rps_dash = compute_rps(df_dash)
    rps_dash['framework'] = 'Dash'
    rps_flash = compute_rps(df_flash)
    rps_flash['framework'] = 'Flash'
    rps_all = pd.concat([rps_dash, rps_flash], ignore_index=True)

    full_index = pd.DataFrame({'sec': np.arange(rps_all['sec'].min(), rps_all['sec'].max() + 1)})
    out = []
    for fw, sub in rps_all.groupby('framework'):
        m = full_index.merge(sub, on='sec', how='left')
        m['framework'] = fw
        m['rps'] = m['rps'].fillna(0)
        out.append(m)
    rps_all = pd.concat(out, ignore_index=True)

    fig = px.line(
        rps_all,
        x='sec', y='rps', color='framework',
        markers=True,
        labels={'sec': 'Time (s)', 'rps': 'Requests per second', 'framework': ''},
        title=f"{scenario_title}: Requests per second over time",
        color_discrete_map=COLOR_MAP,
    )
    fig.update_traces(mode='lines+markers')
    return fig

# Heavy network (nhv_*)
fig_rps_nhv = overlay_rps(nhv_dash, nhv_flash, 'Heavy network')
fig_rps_nhv.show()

# CPU bound (cpu_*)
fig_rps_cpu = overlay_rps(cpu_dash, cpu_flash, 'CPU bound')
fig_rps_cpu.show()

# CPU bound with SSE (cpu_w_sse_*)
fig_rps_cpu_w_sse = overlay_rps(cpu_w_sse_dash, cpu_w_sse_flash, 'CPU bound with SSE')
fig_rps_cpu_w_sse.show()

In [52]:
# Flash-only response-time density plot
# This cell will try common variable names for a flash-only dataframe.
try:
    super_heavy_net
except NameError:
    super_heavy_net = None
    for name in ['flash_only', 'super_heavy_net', 'super_heavy_net', 'nhv_flash', 'cpu_flash', 'cpu_w_sse_flash']:
        try:
            candidate = eval(name)
            # Heuristic: prefer tables that look like per-request logs (have an 'offset' or 'response-time' column)
            if isinstance(candidate, pd.DataFrame) and any(c in candidate.columns for c in ['offset', 'response-time', 'response_time']):
                super_heavy_net = candidate
                print(f"Using dataframe '{name}' as super_heavy_net")
                break
        except Exception:
            pass
    if super_heavy_net is None:
        raise NameError("No flash dataframe found. Create one named 'flash_only' or 'super_heavy_net', or ensure 'nhv_flash' exists in this notebook.")

# Choose the response-time column name
if 'response-time' in super_heavy_net.columns:
    rt_col = 'response-time'
elif 'response_time' in super_heavy_net.columns:
    rt_col = 'response_time'
else:
    raise ValueError("Flash dataframe missing expected column 'response-time' or 'response_time'.")

# Plot density (histogram normalized to density)
import plotly.express as px
flash_color = COLOR_MAP.get('Flash', '#EF553B') if 'COLOR_MAP' in globals() else '#EF553B'
fig_flash_density = px.histogram(
    super_heavy_net.dropna(subset=[rt_col]),
    x=rt_col,
    nbins=60,
    histnorm='probability density',
    opacity=0.75,
    color_discrete_sequence=[flash_color],
    title='Flash: Response time density (Flash-only)'
)
fig_flash_density.update_layout(xaxis_title='Response time (s)', yaxis_title='Density', showlegend=False)
fig_flash_density.show()

# Print quick summary stats
med = super_heavy_net[rt_col].median()
mean = super_heavy_net[rt_col].mean()
print(f"Flash {rt_col} — median: {med:.3f}s, mean: {mean:.3f}s, rows: {len(super_heavy_net)}")

Flash response-time — median: 4.450s, mean: 4.465s, rows: 4800


In [56]:
# Flash-only Requests Per Second (RPS) over time
# Reuse super_heavy_net determined earlier; if not present, try to find it again
try:
    super_heavy_net
except NameError:
    super_heavy_net = None
    for name in ['flash_only', 'super_heavy_net', 'nhv_flash', 'cpu_flash', 'cpu_w_sse_flash']:
        try:
            candidate = eval(name)
            if isinstance(candidate, pd.DataFrame) and any(c in candidate.columns for c in ['offset']):
                super_heavy_net = candidate
                print(f"Using dataframe '{name}' as super_heavy_net for RPS")
                break
        except Exception:
            pass
    if super_heavy_net is None:
        raise NameError("No flash dataframe with an 'offset' column found for RPS plot.")

# Compute RPS (floor-second grouping)
if 'offset' not in super_heavy_net.columns:
    raise ValueError("Flash dataframe missing required column: offset")

sec = np.floor(super_heavy_net['offset'].astype(float).values).astype(int)
rps = (
    pd.DataFrame({'sec': sec})
    .value_counts()
    .rename('count')
    .reset_index()
    .sort_values('sec')
)
rps['rps'] = rps['count']
rps = rps[['sec', 'rps']]

# Fill missing seconds for smooth line
full_index = pd.DataFrame({'sec': np.arange(rps['sec'].min(), rps['sec'].max() + 1)})
m = full_index.merge(rps, on='sec', how='left')
m['rps'] = m['rps'].fillna(0)

flash_color = COLOR_MAP.get('Flash', '#EF553B') if 'COLOR_MAP' in globals() else '#EF553B'
# include data labels: show integer rps values above markers
fig_flash_rps = px.line(
    m,
    x='sec', y='rps',
    text='rps',
    labels={'sec': 'Time (s)', 'rps': 'Requests per second'},
    title='Flash: Requests per second over time',
    color_discrete_sequence=[flash_color]
)
# show markers, lines and text; format text as integers and position above markers
fig_flash_rps.update_traces(mode='lines+markers+text',
                            textposition='top center',
                            texttemplate='%{text:.0f}',
                            marker=dict(size=6))
fig_flash_rps.update_layout(yaxis=dict(tickformat='.0f'))
fig_flash_rps.show()

# Build summary DataFrame matching the format used by pretty_table
total = int(super_heavy_net.shape[0])
if 'offset' in super_heavy_net.columns and total > 1:
    elapsed = float(super_heavy_net['offset'].max() - super_heavy_net['offset'].min())
    elapsed = max(0.0, elapsed)
    reqs_per_sec = total / elapsed if elapsed > 0 else float('nan')
else:
    elapsed = float('nan')
    reqs_per_sec = float('nan')

flash_summary = pd.DataFrame([
    {
        'scenario': 'Flash-only',
        'framework': 'Flash',
        'total_requests': total,
        'elapsed_seconds': elapsed,
        'requests_per_sec': reqs_per_sec,
    }
])

# Use the existing pretty_table helper to display the nicely formatted table
try:
    pretty_table(flash_summary[['framework', 'total_requests', 'elapsed_seconds', 'requests_per_sec']], "Flash — totals")
except NameError:
    # If pretty_table isn't available, fall back to printing the DataFrame
    display(flash_summary)

# Quick console stats as fallback
print(f"Flash total requests: {total}, elapsed seconds: {elapsed}, req/s: {reqs_per_sec}")

framework,requests,elapsed (s),req/s
Flash,4800,45.65,105.1463


Flash total requests: 4800, elapsed seconds: 45.6507, req/s: 105.14625186470306
