Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion public/_headers
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Referrer-Policy: strict-origin-when-cross-origin
Permissions-Policy: camera=(), microphone=(), geolocation=(), payment=(), usb=(), vr=(), magnetometer=(), gyroscope=(), fullscreen=(self), accelerometer=()
Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
Content-Security-Policy: default-src 'self'; script-src 'self' https://cdn.plot.ly https://static.cloudflareinsights.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: https: blob:; font-src 'self' https://fonts.gstatic.com data:; connect-src 'self' https://api.github.com https://api.rss2json.com https://cloudflareinsights.com; media-src 'self'; object-src 'none'; base-uri 'self'; form-action 'self'; frame-ancestors 'none'; upgrade-insecure-requests; block-all-mixed-content
Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.plot.ly https://static.cloudflareinsights.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: https: blob:; font-src 'self' https://fonts.gstatic.com data:; connect-src 'self' https://api.github.com https://api.rss2json.com https://cloudflareinsights.com; media-src 'self'; object-src 'none'; base-uri 'self'; form-action 'self'; frame-ancestors 'none'; upgrade-insecure-requests; block-all-mixed-content

/_astro/*
Cache-Control: public, max-age=31536000, immutable
Expand Down
16 changes: 8 additions & 8 deletions src/components/merbench/CombinedFilters.astro
Original file line number Diff line number Diff line change
Expand Up @@ -677,11 +677,11 @@ const { difficulties, providers } = Astro.props;
(cb as HTMLInputElement).checked = true;
});

// Update filter count and trigger change event
// Update filter count and trigger change event on all checkboxes
updateFilterCount();
if (checkboxes.length > 0) {
checkboxes[0].dispatchEvent(new Event('change', { bubbles: true }));
}
checkboxes.forEach((cb) => {
cb.dispatchEvent(new Event('change', { bubbles: true }));
});
});
}

Expand All @@ -692,11 +692,11 @@ const { difficulties, providers } = Astro.props;
(cb as HTMLInputElement).checked = false;
});

// Update filter count and trigger change event
// Update filter count and trigger change event on all checkboxes
updateFilterCount();
if (checkboxes.length > 0) {
checkboxes[0].dispatchEvent(new Event('change', { bubbles: true }));
}
checkboxes.forEach((cb) => {
cb.dispatchEvent(new Event('change', { bubbles: true }));
});
});
}

Expand Down
94 changes: 85 additions & 9 deletions src/components/merbench/LeaderboardTable.astro
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,47 @@ export interface Props {
}

const { leaderboard } = Astro.props;

// Calculate cost range for progress bar normalization
const costs = leaderboard.map((entry) => entry.Avg_Cost || calculateCost(entry.Avg_Tokens));
const minCost = Math.min(...costs);
const maxCost = Math.max(...costs);
const costRange = maxCost - minCost;
---

<section class="leaderboard-section">
<h2>Model Leaderboard</h2>
<div class="leaderboard-table">
<table>
<table id="leaderboard-table">
<thead>
<tr>
<th>Rank</th>
<th>Model</th>
<th>Success Rate</th>
<th>Avg Cost/Run</th>
<th>Avg Duration</th>
<th>Avg Tokens</th>
<th>Runs</th>
<th>Provider</th>
<th class="sortable" data-sort-key="Model" data-sort-type="string">
Model <span class="sort-indicator"></span>
</th>
<th
class="sortable active"
data-sort-key="Success_Rate"
data-sort-type="number"
data-sort-direction="desc"
>
Success Rate <span class="sort-indicator">↓</span>
</th>
<th class="sortable" data-sort-key="Avg_Cost" data-sort-type="number">
Avg Cost/Run <span class="sort-indicator"></span>
</th>
<th class="sortable" data-sort-key="Avg_Duration" data-sort-type="number">
Avg Duration <span class="sort-indicator"></span>
</th>
<th class="sortable" data-sort-key="Avg_Tokens" data-sort-type="number">
Avg Tokens <span class="sort-indicator"></span>
</th>
<th class="sortable" data-sort-key="Runs" data-sort-type="number">
Runs <span class="sort-indicator"></span>
</th>
<th class="sortable" data-sort-key="Provider" data-sort-type="string">
Provider <span class="sort-indicator"></span>
</th>
</tr>
</thead>
<tbody>
Expand All @@ -46,7 +71,17 @@ const { leaderboard } = Astro.props;
<span class="progress-text">{entry.Success_Rate.toFixed(1)}%</span>
</div>
</td>
<td class="cost">${calculateCost(entry.Avg_Tokens).toFixed(4)}</td>
<td class="cost">
<div class="progress-bar">
<div
class="progress-fill progress-fill--cost"
style={`width: ${costRange > 0 ? ((entry.Avg_Cost || calculateCost(entry.Avg_Tokens)) / maxCost) * 100 : 0}%`}
/>
<span class="progress-text">
${(entry.Avg_Cost || calculateCost(entry.Avg_Tokens)).toFixed(4)}
</span>
</div>
</td>
<td class="duration">{entry.Avg_Duration.toFixed(2)}s</td>
<td class="tokens">{entry.Avg_Tokens.toLocaleString()}</td>
<td class="runs">{entry.Runs}</td>
Expand Down Expand Up @@ -163,6 +198,42 @@ const { leaderboard } = Astro.props;
letter-spacing: 0.5px;
}

/* Sortable header styles */
.sortable {
cursor: pointer;
user-select: none;
position: relative;
transition: background-color 0.2s ease;
}

.sortable:hover {
background-color: var(--bg-tertiary);
color: var(--text-primary);
}

.sortable.active {
/* No special styling - only the arrow indicator shows active state */
}

.sort-indicator {
font-size: 0.8rem;
opacity: 0.5;
margin-left: 0.25rem;
}

.sortable.active .sort-indicator {
opacity: 1;
color: var(--accent-primary);
}

.sortable:not(.active) .sort-indicator {
opacity: 0;
}

.sortable:hover:not(.active) .sort-indicator {
opacity: 0.3;
}

tbody tr:hover {
background-color: var(--bg-primary);
}
Expand Down Expand Up @@ -265,6 +336,11 @@ const { leaderboard } = Astro.props;
background-color: var(--progress-low);
}

/* Cost progress bar - single muted color */
.progress-fill--cost {
background-color: #9ca3af; /* Muted gray for all cost bars */
}

@media (max-width: 768px) {
.leaderboard-section {
padding: 0.5rem;
Expand Down
81 changes: 76 additions & 5 deletions src/lib/merbench.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type {
FailureAnalysisData,
ParetoData,
ModelStats,
LeaderboardEntry,
} from './merbench-types';

// Calculate cost per run (simplified pricing model) - DEPRECATED
Expand Down Expand Up @@ -269,6 +270,60 @@ const calculateParetoFrontier = (data: Array<{ cost: number; Success_Rate: numbe
return paretoPoints;
};

// Sorting utilities
let currentSortKey = 'Success_Rate';
let currentSortDirection: 'asc' | 'desc' = 'desc';

export const sortLeaderboard = (
data: LeaderboardEntry[],
sortKey: string,
direction: 'asc' | 'desc'
): LeaderboardEntry[] => {
const sorted = [...data].sort((a, b) => {
let aVal: any;
let bVal: any;

// Handle special cases for cost calculation
if (sortKey === 'Avg_Cost') {
aVal = a.Avg_Cost || calculateCost(a.Avg_Tokens);
bVal = b.Avg_Cost || calculateCost(b.Avg_Tokens);
} else {
aVal = a[sortKey as keyof LeaderboardEntry];
bVal = b[sortKey as keyof LeaderboardEntry];
}

// Handle null/undefined values
if (aVal == null && bVal == null) return 0;
if (aVal == null) return direction === 'asc' ? -1 : 1;
if (bVal == null) return direction === 'asc' ? 1 : -1;

// Numeric comparison
if (typeof aVal === 'number' && typeof bVal === 'number') {
return direction === 'asc' ? aVal - bVal : bVal - aVal;
}

// String comparison
const aStr = String(aVal).toLowerCase();
const bStr = String(bVal).toLowerCase();

if (aStr < bStr) return direction === 'asc' ? -1 : 1;
if (aStr > bStr) return direction === 'asc' ? 1 : -1;
return 0;
});

return sorted;
};

export const setSortState = (sortKey: string, direction: 'asc' | 'desc'): void => {
currentSortKey = sortKey;
currentSortDirection = direction;
};

export const getSortState = () => ({
key: currentSortKey,
direction: currentSortDirection,
});

// DOM manipulation utilities
export const updateSummaryStats = (filteredData: FilteredData): void => {
const totalRuns = filteredData.rawData.length;
Expand Down Expand Up @@ -303,9 +358,20 @@ export const updateLeaderboard = (filteredData: FilteredData): void => {
const tbody = document.querySelector('.leaderboard-table tbody');
if (!tbody) return;

// Calculate cost range for progress bar normalization
const costs = filteredData.leaderboard.map(
(entry) => entry.Avg_Cost || calculateCost(entry.Avg_Tokens)
);
const minCost = Math.min(...costs);
const maxCost = Math.max(...costs);
const costRange = maxCost - minCost;

tbody.innerHTML = filteredData.leaderboard
.map(
(entry, index) => `
.map((entry, index) => {
const currentCost = entry.Avg_Cost || calculateCost(entry.Avg_Tokens);
const costWidth = costRange > 0 ? (currentCost / maxCost) * 100 : 0;

return `
<tr>
<td class="rank">${index + 1}</td>
<td class="model-name">${entry.Model}</td>
Expand All @@ -317,14 +383,19 @@ export const updateLeaderboard = (filteredData: FilteredData): void => {
<span class="progress-text">${entry.Success_Rate.toFixed(1)}%</span>
</div>
</td>
<td class="cost">$${(entry.Avg_Cost || calculateCost(entry.Avg_Tokens)).toFixed(4)}</td>
<td class="cost">
<div class="progress-bar">
<div class="progress-fill progress-fill--cost" style="width: ${costWidth}%; background-color: #9ca3af;"></div>
<span class="progress-text">$${currentCost.toFixed(4)}</span>
</div>
</td>
<td class="duration">${entry.Avg_Duration.toFixed(2)}s</td>
<td class="tokens">${entry.Avg_Tokens.toLocaleString()}</td>
<td class="runs">${entry.Runs}</td>
<td class="provider">${entry.Provider}</td>
</tr>
`
)
`;
})
.join('');
};

Expand Down
11 changes: 4 additions & 7 deletions src/scripts/merbench-filters.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
import {
getFilteredData,
updateSummaryStats,
updateLeaderboard,
showEmptyState,
} from '../lib/merbench';
import { getFilteredData, updateSummaryStats, showEmptyState } from '../lib/merbench';
import { updateLeaderboardData } from './merbench-sorting';
import type { RawData, TestGroupData, MerbenchData } from '../lib/merbench-types';
import { MerbenchCharts } from './merbench-charts';

Expand Down Expand Up @@ -317,7 +313,8 @@ export class MerbenchFilters {

private updateUI(filteredData: any): void {
updateSummaryStats(filteredData);
updateLeaderboard(filteredData);
// Use sorting-aware leaderboard update instead of basic update
updateLeaderboardData(filteredData.leaderboard);
}

private showNoDataMessage(): void {
Expand Down
4 changes: 4 additions & 0 deletions src/scripts/merbench-init-csp.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { MerbenchCharts } from './merbench-charts';
import { MerbenchFilters } from './merbench-filters';
import { initializeLeaderboardSorting } from './merbench-sorting';
import type { MerbenchData, RawData } from '../lib/merbench-types';

declare global {
Expand Down Expand Up @@ -82,6 +83,9 @@ async function initializeMerbench() {
const filters = new MerbenchFilters(data, charts);
filters.initialize();

// Initialize leaderboard sorting
initializeLeaderboardSorting(originalData.leaderboard);

// Initialize charts with all data
try {
await charts.waitForPlotly();
Expand Down
Loading