diff --git a/explorer.qmd b/explorer.qmd index e955f23..5ca191b 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -3193,6 +3193,17 @@ zoomWatcher = { if (scope === 'area' || scope === 'world') _searchScope = scope; const effectiveScope = _searchScope; + // Bump the freshness token FIRST — even for invalid submits + // (empty / 1-char). Codex review of #236 round 3 caught the gap: + // if a valid search is in flight and the user clears the box and + // resubmits, the early-return below used to write "Type at least + // 2 characters" WITHOUT invalidating the in-flight search. When + // that older search later rejected, its catch would see + // `searchId === _searchSeq` and overwrite the "Type at least..." + // message with `Search error: ...`. Bumping first ensures any + // submission — valid or not — supersedes prior in-flight work. + const searchId = ++_searchSeq; + const term = searchInput.value.trim(); if (!term || term.length < 2) { searchResults.textContent = 'Type at least 2 characters'; @@ -3210,13 +3221,22 @@ zoomWatcher = { // result count, and bytes transferred from data.isamples.org during // the search window. transferSize is 0 for cross-origin responses // missing Timing-Allow-Origin; we fall back to encodedBodySize. - const searchId = ++_searchSeq; const markStart = `search-${searchId}-start`; const markEnd = `search-${searchId}-end`; performance.mark(markStart); const tStart = performance.now(); const terms = searchTerms(term); + // Snapshot the filter-state telemetry booleans here, BEFORE the + // try block, so they remain in scope through `finally`. They + // reflect the DOM state at search-fire time — independent of + // any toggles the user makes during the await chain (#167 + #236 + // round 2 review). + const hadSourceFilter = getActiveSources().length !== SOURCE_VALUES.length; + const hadFacetFilter = hasFacetFilters(); let resultsCount = 0; + let totalCount = null; // Real count when cap is hit (#232); null otherwise. + let countMs = 0; // Duration of the follow-up COUNT scan, ms. + let wasSuperseded = false; // True when a newer search bumped _searchSeq mid-flight. let errorMessage = null; try { @@ -3253,6 +3273,22 @@ zoomWatcher = { { col: 'CAST(f.place_name AS VARCHAR)', weight: 2 }, ]); + // Snapshot the source / facet predicates ONCE per search so the + // follow-up COUNT(*) at the end of this try block uses the same + // filter state as the SELECT. Without this snapshot, a user who + // toggles a source checkbox or material facet while the SELECT + // is in flight would see a label like "50 of M" where M reflects + // the NEW filter state, not the rendered 50 rows (Codex review + // of PR #236 round 1). The string is captured here, before any + // `db.query` await, and re-used by both the SELECT and the + // COUNT below. + const sourceSQL = sourceFilterSQL('f.source'); + const facetSQL = facetFilterSQL(); + // Telemetry-equivalent of the SQL snapshots above (`hadSourceFilter` + // / `hadFacetFilter`) is declared OUTSIDE this try block so it + // remains in scope through `finally` — see comment near the + // function-level `let` declarations. + // Two SQL shapes — one per scope. The fix per #179 round-2 // review: in area mode, the viewport predicate MUST run BEFORE // the top-50 selection, otherwise we're searching only the @@ -3272,7 +3308,14 @@ zoomWatcher = { // because area-scoped search by definition requires // coordinates. Drop coord-less samples before ranking; apply // the viewport predicate; THEN top-50. + // + // Track which shape actually ran so the follow-up COUNT(*) + // query for #232 uses the matching WHERE/JOIN. The area-scope + // path falls back to `world` when `viewerBboxSQL` returns null, + // so `effectiveScope === 'area'` is NOT a reliable predicate. let results; + let effectiveQueryShape = 'world'; + let effectiveBboxSQL = null; if (effectiveScope === 'area') { const bboxSQL = viewerBboxSQL('l.latitude', 'l.longitude'); if (!bboxSQL) { @@ -3290,11 +3333,13 @@ zoomWatcher = { INNER JOIN read_parquet('${lite_url}') l USING (pid) WHERE ${searchWhere} ${bboxSQL} - ${sourceFilterSQL('f.source')} - ${facetFilterSQL()} + ${sourceSQL} + ${facetSQL} ORDER BY relevance_score DESC, f.label LIMIT 50 `); + effectiveQueryShape = 'area'; + effectiveBboxSQL = bboxSQL; } } else { results = await runWorldQuery(); @@ -3307,8 +3352,8 @@ zoomWatcher = { (${score}) AS relevance_score FROM read_parquet('${facets_url}') f WHERE ${searchWhere} - ${sourceFilterSQL('f.source')} - ${facetFilterSQL()} + ${sourceSQL} + ${facetSQL} ORDER BY relevance_score DESC LIMIT 50 ) @@ -3319,12 +3364,38 @@ zoomWatcher = { ORDER BY m.relevance_score DESC, m.label `); } + + // Stale-search guard (Codex review of PR #236 round 1). A + // second search the user fires while this SELECT is in flight + // bumps `_searchSeq`. Without this early guard, the superseded + // search would still mutate `searchResults.textContent`, + // repaint `samplesSection`, trigger an unwanted `camera.flyTo`, + // and launch a wasted COUNT scan — even though a later guard + // would correctly suppress the COUNT result itself. Bail BEFORE + // any of that work. The later COUNT-specific guard remains as + // a second line of defense for the case where supersession + // happens between this check and the COUNT. + if (searchId !== _searchSeq) { + wasSuperseded = true; + return; + } resultsCount = results.length; if (results.length === 0) { searchResults.textContent = `No results for "${term}"`; return; } - searchResults.textContent = `${results.length}${results.length === 50 ? '+' : ''} results for "${term}"`; + + // Initial render. The "+" indicates "more exist" when the cap was + // hit; if the cap was hit, the follow-up COUNT below replaces + // this with the real total ("50 of 7,124 results"). This keeps + // the user informed during cold-cache COUNT scans (where the + // second query can add a few hundred ms) rather than leaving the + // bare result-row count visible while the precise total + // resolves. See issue #232. + const capReached = results.length === 50; + searchResults.textContent = capReached + ? `${results.length}+ results for "${term}"` + : `${results.length} results for "${term}"`; // Show results in the samples panel const sampEl = document.getElementById('samplesSection'); @@ -3419,10 +3490,95 @@ zoomWatcher = { duration: 1.5 }); } + + // Real-count follow-up (#232). When the SELECT hit `LIMIT 50` we + // can't know whether the true result set is 51 or 50,000 — the + // legacy `50+` label hid this entirely. Mirror the pattern used + // by `loadViewportSamples()` (where this same kind of follow-up + // turned "Samples in View capped at 5,000" into a real count): + // re-run the WHERE clause as a `COUNT(*)` only when the cap was + // hit. Below the cap, `results.length` IS the truth and a second + // scan would be wasted. + // + // Cancellation: a second search the user fires while this COUNT + // is in flight bumps `_searchSeq`. The guard below ensures a + // stale COUNT can't overwrite the newer search's result text. + // We also skip the structured-log `total_count` update on stale + // returns so the #167 log line still reflects only the live + // search's measurements. + // + // Failure handling: a COUNT error leaves the initial `50+` text + // in place — better to under-disclose than to clear the line + // entirely. The cost is logged via `count_ms` so #167 analysis + // can track when the COUNT path runs. + if (capReached) { + // Use the snapshotted `sourceSQL` / `facetSQL` so the COUNT + // matches the SELECT's filter state byte-for-byte even if + // the user has toggled source/facet filters during the + // SELECT await. + const countSQL = effectiveQueryShape === 'area' + ? ` + SELECT COUNT(*) AS n + FROM read_parquet('${facets_url}') f + INNER JOIN read_parquet('${lite_url}') l USING (pid) + WHERE ${searchWhere} + ${effectiveBboxSQL} + ${sourceSQL} + ${facetSQL} + ` + : ` + SELECT COUNT(*) AS n + FROM read_parquet('${facets_url}') f + WHERE ${searchWhere} + ${sourceSQL} + ${facetSQL} + `; + const countStart = performance.now(); + try { + const countRow = await db.query(countSQL); + countMs = performance.now() - countStart; + if (searchId !== _searchSeq) { + // A newer search owns `searchResults.textContent` + // now; leaving `totalCount` null also keeps this + // stale COUNT out of the structured log. + wasSuperseded = true; + return; + } + const total = Number(countRow[0]?.n ?? results.length); + totalCount = total; + // When `total === results.length`, the cap happened to + // catch exactly the full set — drop the "of N" to + // avoid a redundant "50 of 50 results" reading. + searchResults.textContent = total > results.length + ? `${results.length} of ${total.toLocaleString()} results for "${term}"` + : `${results.length} results for "${term}"`; + } catch(err) { + countMs = performance.now() - countStart; + if (searchId !== _searchSeq) { + wasSuperseded = true; + return; + } + console.warn("Search COUNT(*) failed; keeping '50+' label:", err); + } + } } catch(err) { - console.error("Search failed:", err); - searchResults.textContent = `Search error: ${err.message}`; - errorMessage = err.message || String(err); + // Stale-search guard for the error path (Codex review of #236 + // round 2). Without this, a search whose SELECT later rejects + // would overwrite a newer search's UI with "Search error: …". + // The freshness-token check matches the success-path guard + // above so both paths are coherent: a superseded search is + // recorded in the structured log (with `superseded: true` and + // the original error captured in `error`) but does NOT mutate + // user-visible state. + if (searchId !== _searchSeq) { + wasSuperseded = true; + errorMessage = err?.message || String(err); + console.warn(`Stale search (id=${searchId}) rejected after supersession; suppressing UI write.`, err); + } else { + console.error("Search failed:", err); + searchResults.textContent = `Search error: ${err.message}`; + errorMessage = err.message || String(err); + } } finally { performance.mark(markEnd); try { performance.measure(`search-${searchId}`, markStart, markEnd); } catch (e) {} @@ -3451,7 +3607,20 @@ zoomWatcher = { } } catch (e) {} - // Structured log for Playwright capture (#167). + // Structured log for Playwright capture (#167). `total_count` + // and `count_ms` (added #232): null/0 when the cap wasn't hit + // or when the COUNT follow-up failed/was superseded; otherwise + // the real result-set size and the time it took to compute. + // `elapsed_ms` continues to mark end-of-handler so it still + // covers SELECT+render+COUNT for end-to-end user-perceived + // latency. + // + // `superseded` (added round 2 of #236): a search whose work was + // abandoned mid-flight because the user fired a newer search. + // The log line still emits so downstream perf analysis can see + // how often this happens, but consumers should generally treat + // superseded rows as best-effort (results / counts may be + // partial and the UI never reflected them). try { console.log(JSON.stringify({ event: 'isamples.search', @@ -3460,12 +3629,15 @@ zoomWatcher = { terms_count: terms.length, scope: effectiveScope, results_count: resultsCount, + total_count: totalCount, elapsed_ms: Math.round(elapsedMs), + count_ms: Math.round(countMs), bytes_transfer: transferBytes, bytes_body: bodyBytes, seen_urls: seenUrls, - has_source_filter: getActiveSources().length !== SOURCE_VALUES.length, - has_facet_filter: hasFacetFilters(), + has_source_filter: hadSourceFilter, + has_facet_filter: hadFacetFilter, + superseded: wasSuperseded, error: errorMessage, })); } catch (e) {} diff --git a/tests/playwright/search-real-count.spec.js b/tests/playwright/search-real-count.spec.js new file mode 100644 index 0000000..19e070e --- /dev/null +++ b/tests/playwright/search-real-count.spec.js @@ -0,0 +1,243 @@ +/** + * Search-results "50+" → "50 of N" real-count display (issue #232, + * roadmap step 2 of #234). + * + * Regression-prone area: the search-results line under the in-map search + * box used to read `50+ results for "pottery"` whenever the SELECT hit + * `LIMIT 50`, hiding whether the true result set is 51 or 50,000. The + * fix follows the same pattern as `loadViewportSamples()`'s real-count + * follow-up: when the cap is hit, fire a second `COUNT(*)` query with + * the same WHERE clause and replace the label. + * + * What this spec covers: + * - Cap-hit case ("pottery"): initial `50+` followed by the + * `50 of N` final label where N > 50, AND the structured + * `isamples.search` log payload reports `total_count: N`. + * - No-results case ("xyzzyqqqplugh"): `No results for "term"` still + * short-circuits without firing the COUNT scan; structured log + * reports `total_count: null`. + * + * What this spec does NOT cover: + * - Cancellation race when a second search supersedes the COUNT + * in flight. Covered conceptually by the `searchId !== _searchSeq` + * guard in the implementation; a Playwright test that reliably + * races two SELECTs against each other would be fragile. + * - Sub-cap results (e.g., 12 results) — the rendering path is + * unchanged from pre-#232 ("N results for term" with no "of M"). + */ +const { test, expect } = require('@playwright/test'); + +const EXPLORER_PATH = '/explorer.html'; + +/** Wait until the explorer has rendered the search input. Boot sequence: + * phase1 (viewer + cluster cache) → facetFilters → search input wiring. + * The input is in the DOM from page load but only functional after wiring. */ +async function waitForSearchReady(page, ms = 60000) { + await page.locator('#sampleSearch').first().waitFor({ timeout: ms }); + await page.locator('#searchSubmitBtn').first().waitFor({ timeout: ms }); + // The search results line is created at boot too — wait for the + // facetFilters cell to settle so a search fires against a populated + // facet UI (matches what a real user would experience). + await page.waitForFunction( + () => document.querySelectorAll('#materialFilterBody input[type="checkbox"]').length > 0, + null, { timeout: ms } + ); +} + +/** Collect `isamples.search` JSON payloads from the console (matches the + * pattern used in tests/test_search_perf.py:204). */ +function attachSearchLogCollector(page) { + const captured = []; + page.on('console', (msg) => { + if (msg.type() !== 'log') return; + const text = msg.text(); + if (!text.includes('isamples.search')) return; + try { + const payload = JSON.parse(text); + if (payload?.event === 'isamples.search') captured.push(payload); + } catch { /* not a structured search log */ } + }); + return captured; +} + +async function runSearch(page, term) { + // Quarto's see-also rendering produces a duplicate #sampleSearch in the + // DOM; `document.getElementById` (which the live JS uses) resolves to + // the FIRST instance, so the test mirrors that with `.first()`. + const input = page.locator('#sampleSearch').first(); + await input.click(); + await input.press('ControlOrMeta+a'); + await input.press('Delete'); + await input.fill(term); + await page.locator('#searchSubmitBtn').first().click(); +} + +test.describe('Search real-count display (#232 / #234 step 2)', () => { + test.setTimeout(180000); + + test('cap-hit search shows "N of M results" + structured log carries total_count', async ({ page }) => { + const logs = attachSearchLogCollector(page); + await page.goto(EXPLORER_PATH); + await waitForSearchReady(page); + + await runSearch(page, 'pottery'); + + // First wait for the "50+" initial state — proves the cap-hit + // initial render is in place before the COUNT lands. + await page.waitForFunction( + () => /^50\+ results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), + null, { timeout: 90000 } + ); + + // Then wait for the COUNT to land and the label to flip to "50 of N". + // The thousands separator is from `Number.toLocaleString()`, which uses + // the test browser's locale (en-US in playwright defaults), so we + // tolerate either comma-separated or plain digits to be portable. + await page.waitForFunction( + () => { + const text = document.getElementById('searchResults')?.textContent || ''; + return /^50 of [\d,]+ results for "pottery"$/.test(text); + }, + null, { timeout: 60000 } + ); + + // Pull the structured log payload back and verify total_count > 50. + // Wait until the COUNT-bearing log appears (the log is written from + // `finally`, which runs after COUNT). + await page.waitForFunction( + () => true, // give the console event a tick to land + null, { timeout: 1000 } + ).catch(() => {}); + + const potteryLogs = logs.filter(l => l.term === 'pottery'); + expect(potteryLogs.length).toBeGreaterThan(0); + const last = potteryLogs[potteryLogs.length - 1]; + expect(last.results_count).toBe(50); + expect(last.total_count).toBeGreaterThan(50); + expect(typeof last.count_ms).toBe('number'); + expect(last.count_ms).toBeGreaterThan(0); + expect(last.superseded).toBe(false); + + // And the visible label matches the log payload. `.first()` for the + // same reason as #sampleSearch above — Quarto duplicates the node. + const visible = await page.locator('#searchResults').first().textContent(); + const m = visible.match(/^50 of ([\d,]+) results for "pottery"$/); + expect(m).toBeTruthy(); + const visibleTotal = Number(m[1].replace(/,/g, '')); + expect(visibleTotal).toBe(last.total_count); + }); + + test('telemetry snapshot survives a facet toggle during held-open COUNT', async ({ page }) => { + // Codex round-3 review observed that the round-2 race test was still + // timing-dependent: the wait accepted either "50+" or the final + // "50 of N" label, so if the COUNT happened to finish before the + // toggle, the test passed trivially (snapshot or not). To make the + // test deterministic, we monkey-patch the page's DuckDB `db.query` + // to add a fixed delay on COUNT(*) queries. That guarantees the + // toggle lands BETWEEN SELECT completion (visible "50+") and COUNT + // completion (`finally` log emission), exercising the snapshot's + // entire purpose. + // + // What the snapshot guarantees: + // `has_facet_filter` AND `has_source_filter` in the structured + // log reflect search-fire-time DOM state, not whatever the user + // toggled mid-flight. + // + // Fault-injection (verified manually): if the log reads + // `has_facet_filter` / `has_source_filter` live from DOM in + // `finally`, this test fails. Both round-1 (predicate snapshot) + // and round-2 (telemetry snapshot) fixes are exercised. + const logs = attachSearchLogCollector(page); + await page.goto(EXPLORER_PATH); + await waitForSearchReady(page); + + // Sanity: no facet active at search-fire time. + // (Source-filter sanity is NOT checked here because Quarto's + // see-also rendering duplicates `#sourceFilter` checkboxes in the + // page DOM, so a naive count is ambiguous. Source-filter snapshot + // coverage is left as a TODO requiring a non-Quarto fixture.) + const facetActive = await page.evaluate(() => + document.querySelectorAll('#materialFilterBody input[type="checkbox"]:checked').length > 0 + || document.querySelectorAll('#contextFilterBody input[type="checkbox"]:checked').length > 0 + || document.querySelectorAll('#objectTypeFilterBody input[type="checkbox"]:checked').length > 0 + ); + expect(facetActive).toBe(false); + + // Wrap `db.query` to hold COUNT(*) queries open for 1.5s. The page + // exposes the OJS `db` reactive value via + // `_ojs.ojsConnector.mainModule.value('db')` — same accessor pattern + // used by `url-roundtrip.spec.js` for `viewer`. The wrap is scoped + // to this test's page context so other tests aren't affected. + await page.evaluate(async () => { + const db = await window._ojs.ojsConnector.mainModule.value('db'); + if (!db.__countDelayInstalled) { + const orig = db.query.bind(db); + db.query = async (sql, ...rest) => { + if (typeof sql === 'string' && /SELECT\s+COUNT\(\*\)/i.test(sql)) { + await new Promise(r => setTimeout(r, 1500)); + } + return orig(sql, ...rest); + }; + db.__countDelayInstalled = true; + } + }); + + await runSearch(page, 'pottery'); + + // EXACT "50+" wait — proves SELECT has settled AND COUNT has not yet + // emitted its post-finally log. This is the window the snapshot + // must survive. Loosening this back to "50+ OR 50 of N" would + // restore the timing flake Codex round 3 flagged. + await page.waitForFunction( + () => document.getElementById('searchResults')?.textContent === '50+ results for "pottery"', + null, { timeout: 90000 } + ); + + // Toggle the facet checkbox. Programmatic .click() ensures the + // change event fires through the page's normal handlers. + await page.evaluate(() => { + const mat = document.querySelector('#materialFilterBody input[type="checkbox"]'); + if (mat) mat.click(); + }); + + // Now wait for the final "50 of N" label. The COUNT was held by + // the wrap for 1.5s, then runs against the snapshotted SQL, then + // `finally` logs against the snapshotted booleans. + await page.waitForFunction( + () => /^50 of [\d,]+ results for "pottery"$/.test(document.getElementById('searchResults')?.textContent || ''), + null, { timeout: 90000 } + ); + + const potteryLogs = logs.filter(l => l.term === 'pottery'); + expect(potteryLogs.length).toBeGreaterThan(0); + const last = potteryLogs[potteryLogs.length - 1]; + // The headline assertion: the snapshotted boolean must be false + // even though the user ticked a facet between SELECT and COUNT. + expect(last.has_facet_filter).toBe(false); + expect(last.results_count).toBe(50); + expect(last.total_count).not.toBeNull(); + expect(last.total_count).toBeGreaterThan(50); + expect(last.superseded).toBe(false); + }); + + test('no-results search short-circuits without COUNT (total_count remains null)', async ({ page }) => { + const logs = attachSearchLogCollector(page); + await page.goto(EXPLORER_PATH); + await waitForSearchReady(page); + + await runSearch(page, 'xyzzyqqqplugh'); + + await page.waitForFunction( + () => /^No results for "xyzzyqqqplugh"$/.test(document.getElementById('searchResults')?.textContent || ''), + null, { timeout: 90000 } + ); + + const xyzzyLogs = logs.filter(l => l.term === 'xyzzyqqqplugh'); + expect(xyzzyLogs.length).toBeGreaterThan(0); + const last = xyzzyLogs[xyzzyLogs.length - 1]; + expect(last.results_count).toBe(0); + expect(last.total_count).toBeNull(); + // COUNT path not entered ⇒ countMs stayed at 0. + expect(last.count_ms).toBe(0); + }); +});