diff --git a/apps/backend/scripts/benchmark-internal-metrics.ts b/apps/backend/scripts/benchmark-internal-metrics.ts new file mode 100644 index 0000000000..605a988305 --- /dev/null +++ b/apps/backend/scripts/benchmark-internal-metrics.ts @@ -0,0 +1,1843 @@ +/** + * Local-only benchmark + equivalence harness for the ClickHouse queries in + * apps/backend/src/app/api/latest/internal/metrics/route.tsx. + * + * Three modes, selected via env flags (all run by default): + * + * 1. MAU equivalence matrix (default ON; set BENCH_SKIP_MATRIX=1 to skip) + * Small-data test cases for loadMonthlyActiveUsers. Asserts that the + * BEFORE (pre-fix) and AFTER (current) queries return the same MAU + * count AND the same set of individual users across 13 edge cases: + * empty, dedup, anonymous filter, window boundary, null user_id, + * non-UUID user_id, case variation, project isolation, etc. + * + * 2. MAU perf run (default ON; set BENCH_SKIP_PERF=1 to skip) + * Runs OLD vs NEW MAU query on the heavy seed. Reads + * memory_usage/read_rows/result_bytes from system.query_log and prints + * a comparison table plus all candidate variants (v1 uniqExact strings, + * v2 inline regex, v3 UUID keys, v4 sipHash64 [shipped], v5-v7 HLL + * sketches). Also includes a set-equality check so "same count, + * different users" can't slip through. + * + * 3. Full-route benchmark (set BENCH_ROUTE_QUERIES=1) + * Runs every ClickHouse query in the internal-metrics route + * (loadUsersByCountry, loadDailyActiveUsers, the splits, + * loadMonthlyActiveUsers, analyticsOverview:{dailyEvents, + * totalVisitors, topReferrers, topRegion, online}) in three stages: + * BEFORE (pre-fix), AFTER (current: fixes 1 + 3), and OPTIMIZED + * (further candidate opts not yet shipped — e.g. dropping the + * analyticsOverview LEFT JOIN, hashed split partition keys, + * loadUsersByCountry time window). Prints ranked per-query deltas and + * endpoint-level totals (sum peak memory, max duration). + * + * Seeds synthetic events under a unique project_id so real data is never + * touched; cleans up via ALTER TABLE ... DELETE on exit. + * + * Run: pnpm --filter @stackframe/backend run with-env:dev tsx scripts/benchmark-internal-metrics.ts + * Env knobs: + * BENCH_USERS (default 200_000) – distinct users in the perf seed + * BENCH_EVENTS_USER (default 5) – $token-refresh events per user + * BENCH_ANON_RATIO (default 0.1) – fraction flagged is_anonymous + * BENCH_BATCH (default 50_000) – insert batch size + * BENCH_SKIP_PERF=1 – skip the heavy MAU perf run + * BENCH_SKIP_MATRIX=1 – skip the equivalence matrix + * BENCH_ROUTE_QUERIES=1 – also run the full-route + * BEFORE/AFTER/OPTIMIZED suite + * BENCH_PAGE_VIEWS_USER (default 3) – $page-view events per user + * BENCH_CLICKS_USER (default 1) – $click events per user + * BENCH_TEAM_RATIO (default 0.3) – fraction of users with a team + */ + +import { getClickhouseAdminClient } from "@/lib/clickhouse"; +import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env"; +import { randomUUID } from "node:crypto"; + +const RUN_ID = randomUUID(); +const BENCH_PROJECT_ID = `bench-mau-${RUN_ID}`; +const PERF_BRANCH_ID = "perf"; + +const METRICS_WINDOW_DAYS = 30; +const METRICS_WINDOW_MS = METRICS_WINDOW_DAYS * 24 * 60 * 60 * 1000; +const ONE_DAY_MS = 24 * 60 * 60 * 1000; + +const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; +const UUID_RE_CH = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$"; + +function envInt(name: string, fallback: number): number { + const v = getEnvVariable(name, ""); + if (v === "") return fallback; + const n = Number(v); + if (!Number.isFinite(n)) throw new Error(`bad ${name}: ${v}`); + return n; +} +function envFloat(name: string, fallback: number): number { + const v = getEnvVariable(name, ""); + if (v === "") return fallback; + const n = Number(v); + if (!Number.isFinite(n)) throw new Error(`bad ${name}: ${v}`); + return n; +} +function envBool(name: string): boolean { + const v = getEnvVariable(name, ""); + return v === "1" || v === "true"; +} + +function formatCh(date: Date): string { + return date.toISOString().slice(0, 19); +} + +function normalizeUuidFromEvent(value: string): string | null { + const n = value.trim().toLowerCase(); + return UUID_RE.test(n) ? n : null; +} + +type EventRow = { + event_type: string, + event_at: string, + data: Record, + project_id: string, + branch_id: string, + user_id: string | null, + team_id: string | null, +}; + +const OLD_QUERY = ` + SELECT assumeNotNull(user_id) AS user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + GROUP BY user_id +`; + +// Proposed query. Counts on the server (so we never ship N user_ids back to +// the client) and filters via direct JSON path access (skips the per-row +// toJSONString → JSONExtract round-trip that blows up memory in prod). +// Matches the old JS normalization: lower/trim + isUuid regex. +const NEW_QUERY = ` + SELECT uniqExact(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) +`; + +type QueryParams = { + projectId: string, + branchId: string, + since: Date, + untilExclusive: Date, + includeAnonymous: boolean, +}; + +async function runOld(p: QueryParams): Promise<{ count: number, set: Set, queryId: string }> { + const client = getClickhouseAdminClient(); + const queryId = `bench-old-${randomUUID()}`; + const res = await client.query({ + query: OLD_QUERY, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { user_id: string }[]; + const set = new Set(); + for (const r of rows) { + const n = normalizeUuidFromEvent(r.user_id); + if (n != null) set.add(n); + } + return { count: set.size, set, queryId }; +} + +async function runNew(p: QueryParams): Promise<{ count: number, queryId: string }> { + const client = getClickhouseAdminClient(); + const queryId = `bench-new-${randomUUID()}`; + const res = await client.query({ + query: NEW_QUERY, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { mau: string | number }[]; + return { count: Number(rows[0]?.mau ?? 0), queryId }; +} + +// Diagnostic-only variant of the NEW query that returns the set of distinct +// normalized user_ids instead of just the count. Used by the equivalence +// check to prove the OLD pipeline and the NEW pipeline would have counted +// the *same users*, not just the same number of users. +const NEW_QUERY_SET = ` + SELECT DISTINCT normalized_user_id + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) +`; + +async function runNewSet(p: QueryParams): Promise> { + const client = getClickhouseAdminClient(); + const res = await client.query({ + query: NEW_QUERY_SET, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { normalized_user_id: string }[]; + return new Set(rows.map((r) => r.normalized_user_id)); +} + +function setDiff(a: Set, b: Set): { onlyInA: string[], onlyInB: string[] } { + const onlyInA: string[] = []; + const onlyInB: string[] = []; + for (const x of a) if (!b.has(x)) onlyInA.push(x); + for (const x of b) if (!a.has(x)) onlyInB.push(x); + return { onlyInA, onlyInB }; +} + +// ── Alternate query variants explored for further memory/duration wins ────── +// Each variant returns a single row with `mau` (count). The equivalence check +// compares against the OLD pipeline's exact count. + +type Variant = { + name: string, + description: string, + approximate?: boolean, + sql: string, +}; + +const COMMON_FILTERS = ` + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) +`; + +const VARIANTS: Variant[] = [ + { + name: "v1_uniqExact_string", + description: "uniqExact on lower(trim(user_id)) string; regex filter in outer WHERE (current fix)", + sql: ` + SELECT uniqExact(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v2_uniqExact_inline", + description: "Same as v1 but regex + normalization folded into inner WHERE (no subquery)", + sql: ` + SELECT uniqExact(lower(trim(assumeNotNull(user_id)))) AS mau + FROM analytics_internal.events + ${COMMON_FILTERS} + AND match(lower(trim(assumeNotNull(user_id))), {uuidRe:String}) + `, + }, + { + name: "v3_uniqExact_toUUID", + description: "uniqExact on toUUIDOrNull(...) — 16-byte native UUID keys instead of 36-byte strings", + sql: ` + SELECT uniqExact(uid) AS mau + FROM ( + SELECT toUUIDOrNull(lower(trim(assumeNotNull(user_id)))) AS uid + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE uid IS NOT NULL + AND match(toString(uid), {uuidRe:String}) + `, + }, + { + name: "v4_uniqExact_sipHash64", + description: "uniqExact on sipHash64(...) — 8-byte keys; collision prob negligible at <<2^32 users", + sql: ` + SELECT uniqExact(h) AS mau + FROM ( + SELECT sipHash64(lower(trim(assumeNotNull(user_id)))) AS h, + lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v5_uniq_hll", + description: "uniq() HyperLogLog — bounded ~16 KiB state, typical error ~0.5% (APPROXIMATE)", + approximate: true, + sql: ` + SELECT uniq(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v6_uniqCombined", + description: "uniqCombined(17) — exact for small N, HLL after threshold; ~96 KiB state (APPROXIMATE)", + approximate: true, + sql: ` + SELECT uniqCombined(17)(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v7_uniqHLL12", + description: "uniqHLL12 — ~4 KiB state, typical error ~2% (APPROXIMATE)", + approximate: true, + sql: ` + SELECT uniqHLL12(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, +]; + +// ── Route-wide query census ───────────────────────────────────────────────── +// Every ClickHouse query from apps/backend/src/app/api/latest/internal/metrics/route.tsx, +// captured verbatim so we can measure the full shape of the endpoint. + +type RouteQuery = { + name: string, + desc: string, + sql: string, + extraParams?: (now: Date, untilExclusive: Date) => Record, +}; + +const ANALYTICS_USER_JOIN = ` + LEFT JOIN ( + SELECT + user_id, + argMax(JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8'), event_at) AS latest_is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + GROUP BY user_id + ) AS token_refresh_users + ON e.user_id = token_refresh_users.user_id +`; +const NON_ANON_FILTER = "({includeAnonymous:UInt8} = 1 OR coalesce(JSONExtract(toJSONString(e.data), 'is_anonymous', 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; + +// Same joins/filters after fix 1 (direct CAST instead of JSONExtract(toJSONString(...))) +const ANALYTICS_USER_JOIN_AFTER = ` + LEFT JOIN ( + SELECT + user_id, + argMax(coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0), event_at) AS latest_is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + GROUP BY user_id + ) AS token_refresh_users + ON e.user_id = token_refresh_users.user_id +`; +const NON_ANON_FILTER_AFTER = "({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; + +const ROUTE_QUERIES_BEFORE: RouteQuery[] = [ + { + name: "loadUsersByCountry", + desc: "argMax country per user over all $token-refresh events (no window)", + sql: ` + SELECT + country_code, + count() AS userCount + FROM ( + SELECT + user_id, + argMax(cc, event_at) AS country_code + FROM ( + SELECT + user_id, + event_at, + CAST(data.ip_info.country_code, 'Nullable(String)') AS cc, + CAST(data.is_anonymous, 'UInt8') AS is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + ) + WHERE cc IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR is_anonymous = 0) + GROUP BY user_id + ) + WHERE country_code IS NOT NULL + GROUP BY country_code + ORDER BY userCount DESC + `, + }, + { + name: "loadDailyActiveUsers", + desc: "DAU per day over 30d (uniqExact on raw user_id)", + sql: ` + SELECT + toDate(event_at) AS day, + uniqExact(assumeNotNull(user_id)) AS dau + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "loadDailyActiveUsersSplit", + desc: "All (day, user_id) pairs — ships N rows back to Node for split processing", + sql: ` + SELECT + toDate(event_at) AS day, + assumeNotNull(user_id) AS user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + GROUP BY day, user_id + `, + }, + { + name: "loadDailyActiveTeamsSplit", + desc: "All (day, team_id) pairs — same shape as DAU split, team side", + sql: ` + SELECT + toDate(event_at) AS day, + assumeNotNull(team_id) AS team_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND team_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + GROUP BY day, team_id + `, + }, + { + name: "loadMonthlyActiveUsers (FIXED: v4)", + desc: "NEW: uniqExact(sipHash64(normalized)) — what we just shipped", + sql: ` + SELECT uniqExact(sipHash64(normalized_user_id)) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "analyticsOverview:dailyEvents", + desc: "page-view+click daily counts, visitors/day — with LEFT JOIN to token_refresh_users", + sql: ` + SELECT + toDate(e.event_at) AS day, + countIf( + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS pv, + countIf( + e.event_type = '$click' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS cl, + uniqExactIf( + assumeNotNull(e.user_id), + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN} + WHERE e.event_type IN ('$page-view', '$click') + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "analyticsOverview:totalVisitors", + desc: "uniq visitors over 30d (page-view + join)", + sql: ` + SELECT + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.user_id IS NOT NULL + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + `, + }, + { + name: "analyticsOverview:topReferrers", + desc: "top 100 referrers by uniq visitors (GROUP BY referrer)", + sql: ` + SELECT + nullIf(CAST(e.data.referrer, 'String'), '') AS referrer, + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY referrer + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 100 + `, + }, + { + name: "analyticsOverview:topRegion", + desc: "top (country, region) by uniq visitors (LIMIT 1)", + sql: ` + SELECT + CAST(data.ip_info.country_code, 'Nullable(String)') AS country_code, + CAST(data.ip_info.region_code, 'Nullable(String)') AS region_code, + uniqExactIf( + assumeNotNull(user_id), + user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + ) AS visitors + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + GROUP BY country_code, region_code + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 1 + `, + }, + { + name: "analyticsOverview:online", + desc: "uniq users active in last 5 minutes", + sql: ` + SELECT + uniqExact(assumeNotNull(user_id)) AS online + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {onlineSince:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + `, + extraParams: (now, untilExclusive) => ({ + onlineSince: formatCh(new Date(now.getTime() - 5 * 60 * 1000)), + untilExclusive: formatCh(untilExclusive), + }), + }, +]; + +// After fixes 1 + 3. Same names, updated SQL. Queries not touched by either +// fix (loadUsersByCountry, loadMonthlyActiveUsers-FIXED) reuse the BEFORE entry. +function splitSqlAfter(idCol: "user_id" | "team_id", withAnonFilter: boolean): string { + const anonFilter = withAnonFilter + ? "AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0)" + : ""; + return ` + SELECT + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + ${idCol}, + lagInFrame(day, 1) OVER (PARTITION BY ${idCol} ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + assumeNotNull(${idCol}) AS ${idCol} + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + ) + ) AS w + LEFT JOIN ( + SELECT + assumeNotNull(${idCol}) AS ${idCol}, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + GROUP BY ${idCol} + ) AS f USING (${idCol}) + GROUP BY w.day + ORDER BY w.day ASC + `; +} + +const ROUTE_QUERIES_AFTER: RouteQuery[] = [ + // Unchanged by fix 1/3 (already uses CAST). + ROUTE_QUERIES_BEFORE[0], // loadUsersByCountry + { + name: "loadDailyActiveUsers", + desc: "DAU per day (fix 1: CAST instead of JSONExtract)", + sql: ` + SELECT + toDate(event_at) AS day, + uniqExact(assumeNotNull(user_id)) AS dau + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "loadDailyActiveUsersSplit", + desc: "fix 3: server-side new/retained/reactivated (no PG join, 31-row result)", + sql: splitSqlAfter("user_id", true), + }, + { + name: "loadDailyActiveTeamsSplit", + desc: "fix 3: server-side new/retained/reactivated (no PG join, 31-row result)", + sql: splitSqlAfter("team_id", false), + }, + // Unchanged — already v4 fixed. + ROUTE_QUERIES_BEFORE[4], // loadMonthlyActiveUsers + { + name: "analyticsOverview:dailyEvents", + desc: "fix 1: direct CAST in join + non-anon filter", + sql: ` + SELECT + toDate(e.event_at) AS day, + countIf( + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS pv, + countIf( + e.event_type = '$click' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS cl, + uniqExactIf( + assumeNotNull(e.user_id), + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN_AFTER} + WHERE e.event_type IN ('$page-view', '$click') + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "analyticsOverview:totalVisitors", + desc: "fix 1: direct CAST in join + non-anon filter", + sql: ` + SELECT + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN_AFTER} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.user_id IS NOT NULL + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + `, + }, + { + name: "analyticsOverview:topReferrers", + desc: "fix 1: direct CAST in join + non-anon filter", + sql: ` + SELECT + nullIf(CAST(e.data.referrer, 'String'), '') AS referrer, + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN_AFTER} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY referrer + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 100 + `, + }, + { + name: "analyticsOverview:topRegion", + desc: "fix 1: direct CAST", + sql: ` + SELECT + CAST(data.ip_info.country_code, 'Nullable(String)') AS country_code, + CAST(data.ip_info.region_code, 'Nullable(String)') AS region_code, + uniqExactIf( + assumeNotNull(user_id), + user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + GROUP BY country_code, region_code + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 1 + `, + }, + { + name: "analyticsOverview:online", + desc: "fix 1: direct CAST", + sql: ` + SELECT + uniqExact(assumeNotNull(user_id)) AS online + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {onlineSince:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + `, + extraParams: (now, untilExclusive) => ({ + onlineSince: formatCh(new Date(now.getTime() - 5 * 60 * 1000)), + untilExclusive: formatCh(untilExclusive), + }), + }, +]; + +// More aggressive optimizations stacked on top of fixes 1+3. Each entry is +// paired with its BEFORE/AFTER counterpart by name (normalized) so the +// comparator can line them up. +const ROUTE_QUERIES_OPTIMIZED: RouteQuery[] = [ + { + name: "loadUsersByCountry", + desc: "opt: add 30-day event_at window (was unbounded)", + sql: ` + SELECT + country_code, + count() AS userCount + FROM ( + SELECT + user_id, + argMax(cc, event_at) AS country_code + FROM ( + SELECT + user_id, + event_at, + CAST(data.ip_info.country_code, 'Nullable(String)') AS cc, + CAST(data.is_anonymous, 'UInt8') AS is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ) + WHERE cc IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR is_anonymous = 0) + GROUP BY user_id + ) + WHERE country_code IS NOT NULL + GROUP BY country_code + ORDER BY userCount DESC + `, + }, + { + name: "loadDailyActiveUsersSplit", + desc: "opt: sipHash64(user_id) as window partition key + join key", + sql: ` + SELECT + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + user_hash, + lagInFrame(day, 1) OVER (PARTITION BY user_hash ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + sipHash64(assumeNotNull(user_id)) AS user_hash + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + ) AS w + LEFT JOIN ( + SELECT + sipHash64(assumeNotNull(user_id)) AS user_hash, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + GROUP BY user_hash + ) AS f USING (user_hash) + GROUP BY w.day + ORDER BY w.day ASC + `, + }, + { + name: "loadDailyActiveTeamsSplit", + desc: "opt: sipHash64(team_id) as window partition key + join key", + sql: ` + SELECT + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + team_hash, + lagInFrame(day, 1) OVER (PARTITION BY team_hash ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + sipHash64(assumeNotNull(team_id)) AS team_hash + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND team_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ) + ) AS w + LEFT JOIN ( + SELECT + sipHash64(assumeNotNull(team_id)) AS team_hash, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND team_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + GROUP BY team_hash + ) AS f USING (team_hash) + GROUP BY w.day + ORDER BY w.day ASC + `, + }, + { + name: "analyticsOverview:dailyEvents", + desc: "opt: drop LEFT JOIN, trust e.data.is_anonymous", + sql: ` + SELECT + toDate(e.event_at) AS day, + countIf( + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS pv, + countIf( + e.event_type = '$click' + AND e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS cl, + uniqExactIf( + assumeNotNull(e.user_id), + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events AS e + WHERE e.event_type IN ('$page-view', '$click') + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "analyticsOverview:totalVisitors", + desc: "opt: drop LEFT JOIN, trust e.data.is_anonymous", + sql: ` + SELECT + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events AS e + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.user_id IS NOT NULL + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + `, + }, + { + name: "analyticsOverview:topReferrers", + desc: "opt: drop LEFT JOIN, trust e.data.is_anonymous", + sql: ` + SELECT + nullIf(CAST(e.data.referrer, 'String'), '') AS referrer, + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events AS e + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY referrer + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 100 + `, + }, +]; + +async function runRouteQuery(rq: RouteQuery, p: QueryParams, now: Date): Promise { + const client = getClickhouseAdminClient(); + const queryId = `bench-route-${rq.name.replace(/[^a-z0-9]/gi, "-")}-${randomUUID()}`; + const baseParams: Record = { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }; + const extra = rq.extraParams ? rq.extraParams(now, p.untilExclusive) : {}; + await client.query({ + query: rq.sql, + query_params: { ...baseParams, ...extra }, + query_id: queryId, + format: "JSONEachRow", + }).then((r) => r.json()); // drain stream + return queryId; +} + +async function benchmarkRouteQueries(now: Date): Promise { + const untilExclusive = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const since = new Date(untilExclusive.getTime() - METRICS_WINDOW_MS); + const params: QueryParams = { + projectId: BENCH_PROJECT_ID, + branchId: PERF_BRANCH_ID, + since, + untilExclusive, + includeAnonymous: false, + }; + + console.log("\n── Route-wide query benchmark (every ClickHouse query in /internal/metrics) ──"); + + // Warm cache once. + await runRouteQuery(ROUTE_QUERIES_BEFORE[1], params, now); + + async function runAll(list: RouteQuery[]): Promise> { + const out = new Map(); + for (const rq of list) { + const qid = await runRouteQuery(rq, params, now); + out.set(rq.name, await readStats(qid)); + } + return out; + } + + // Also capture the actual row payload so we can check correctness for OPT + // variants (e.g., dropping the LEFT JOIN on analyticsOverview must not change counts). + async function runAndCollect(list: RouteQuery[]): Promise<{ stats: Map, payloads: Map }> { + const stats = new Map(); + const payloads = new Map(); + for (const rq of list) { + const client = getClickhouseAdminClient(); + const queryId = `bench-route-${rq.name.replace(/[^a-z0-9]/gi, "-")}-${randomUUID()}`; + const baseParams: Record = { + projectId: params.projectId, + branchId: params.branchId, + since: formatCh(params.since), + untilExclusive: formatCh(params.untilExclusive), + includeAnonymous: params.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }; + const extra = rq.extraParams ? rq.extraParams(now, params.untilExclusive) : {}; + const res = await client.query({ + query: rq.sql, + query_params: { ...baseParams, ...extra }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as unknown[]; + payloads.set(rq.name, rows); + stats.set(rq.name, await readStats(queryId)); + } + return { stats, payloads }; + } + + const before = await runAndCollect(ROUTE_QUERIES_BEFORE); + const after = await runAndCollect(ROUTE_QUERIES_AFTER); + const opt = await runAndCollect(ROUTE_QUERIES_OPTIMIZED); + const beforeStats = before.stats; + const afterStats = after.stats; + + // Normalize query names for the comparison table. Some AFTER queries have + // the same name as BEFORE so they line up; loadMonthlyActiveUsers's BEFORE + // entry is labeled "(FIXED: v4)" — normalize to match. + const normalize = (n: string) => n.replace(/\s*\(FIXED.*\)$/, "").trim(); + + const padL = (s: string, n: number) => s.padEnd(n); + const padR = (s: string, n: number) => s.padStart(n); + const fmtDelta = (a: number, b: number): string => { + if (a === 0) return "—"; + const ratio = a / Math.max(b, 1); + if (ratio >= 1) return `${ratio.toFixed(2)}× less`; + return `${(1 / ratio).toFixed(2)}× more`; + }; + + console.log("\n Per-query comparison (BEFORE → AFTER):"); + console.log(" " + [ + padL("query", 36), + padR("mem BEFORE", 12), + padR("mem AFTER", 12), + padR("Δ mem", 10), + padR("dur BEFORE", 11), + padR("dur AFTER", 11), + padR("Δ dur", 10), + ].join(" ")); + console.log(" " + "─".repeat(130)); + + const pairs: { name: string, before: QueryStats, after: QueryStats }[] = []; + for (const rq of ROUTE_QUERIES_BEFORE) { + const name = normalize(rq.name); + const b = beforeStats.get(rq.name); + const a = afterStats.get(rq.name) ?? afterStats.get(name) ?? + [...afterStats.entries()].find(([k]) => normalize(k) === name)?.[1]; + if (!b || !a) continue; + pairs.push({ name, before: b, after: a }); + } + // Sort by BEFORE memory descending for readability. + pairs.sort((x, y) => y.before.memory_usage - x.before.memory_usage); + + for (const { name, before, after } of pairs) { + console.log(" " + [ + padL(name, 36), + padR(fmtBytes(before.memory_usage), 12), + padR(fmtBytes(after.memory_usage), 12), + padR(fmtDelta(before.memory_usage, after.memory_usage), 10), + padR(`${before.query_duration_ms} ms`, 11), + padR(`${after.query_duration_ms} ms`, 11), + padR(fmtDelta(before.query_duration_ms, after.query_duration_ms), 10), + ].join(" ")); + } + + const sumMemBefore = pairs.reduce((a, b) => a + b.before.memory_usage, 0); + const sumMemAfter = pairs.reduce((a, b) => a + b.after.memory_usage, 0); + const maxDurBefore = Math.max(...pairs.map((p) => p.before.query_duration_ms)); + const maxDurAfter = Math.max(...pairs.map((p) => p.after.query_duration_ms)); + const sumDurBefore = pairs.reduce((a, b) => a + b.before.query_duration_ms, 0); + const sumDurAfter = pairs.reduce((a, b) => a + b.after.query_duration_ms, 0); + const sumReadBefore = pairs.reduce((a, b) => a + b.before.read_bytes, 0); + const sumReadAfter = pairs.reduce((a, b) => a + b.after.read_bytes, 0); + const sumResultBefore = pairs.reduce((a, b) => a + b.before.result_bytes, 0); + const sumResultAfter = pairs.reduce((a, b) => a + b.after.result_bytes, 0); + + console.log("\n Totals (BEFORE → AFTER):"); + console.log(` Sum peak memory: ${fmtBytes(sumMemBefore)} → ${fmtBytes(sumMemAfter)} (${fmtDelta(sumMemBefore, sumMemAfter)})`); + console.log(` Max query dur: ${maxDurBefore} ms → ${maxDurAfter} ms (${fmtDelta(maxDurBefore, maxDurAfter)}) [endpoint wall-clock floor]`); + console.log(` Sum query dur: ${sumDurBefore} ms → ${sumDurAfter} ms (${fmtDelta(sumDurBefore, sumDurAfter)}) [total CPU work]`); + console.log(` Sum bytes read: ${fmtBytes(sumReadBefore)} → ${fmtBytes(sumReadAfter)} (${fmtDelta(sumReadBefore, sumReadAfter)})`); + console.log(` Sum result ship: ${fmtBytes(sumResultBefore)} → ${fmtBytes(sumResultAfter)} (${fmtDelta(sumResultBefore, sumResultAfter)})`); + + // ── AFTER vs OPTIMIZED (additional peak-memory work) ─────────────────────── + console.log("\n AFTER vs OPTIMIZED (stacked on top of fixes 1+3):"); + console.log(" " + [ + padL("query", 36), + padR("mem AFTER", 12), + padR("mem OPT", 12), + padR("Δ mem", 12), + padR("dur AFTER", 11), + padR("dur OPT", 11), + padR("Δ dur", 10), + padL("counts=", 10), + ].join(" ")); + console.log(" " + "─".repeat(140)); + + type OptRow = { name: string, after: QueryStats, optStats: QueryStats, countsMatch: boolean | null }; + const optRows: OptRow[] = []; + for (const rq of ROUTE_QUERIES_OPTIMIZED) { + const optStats = opt.stats.get(rq.name); + const afterS = after.stats.get(rq.name); + const optPayload = opt.payloads.get(rq.name); + const afterPayload = after.payloads.get(rq.name); + if (!optStats || !afterS || !optPayload || !afterPayload) continue; + // Deep-equal JSON of both sets (ordered matters for top-N, fine otherwise). + const countsMatch = JSON.stringify(optPayload) === JSON.stringify(afterPayload); + optRows.push({ name: rq.name, after: afterS, optStats, countsMatch }); + } + optRows.sort((a, b) => b.after.memory_usage - a.after.memory_usage); + for (const r of optRows) { + console.log(" " + [ + padL(r.name, 36), + padR(fmtBytes(r.after.memory_usage), 12), + padR(fmtBytes(r.optStats.memory_usage), 12), + padR(fmtDelta(r.after.memory_usage, r.optStats.memory_usage), 12), + padR(`${r.after.query_duration_ms} ms`, 11), + padR(`${r.optStats.query_duration_ms} ms`, 11), + padR(fmtDelta(r.after.query_duration_ms, r.optStats.query_duration_ms), 10), + padL(r.countsMatch ? "yes" : "NO", 10), + ].join(" ")); + } + + // Totals if we stack OPTIMIZED on top (using OPT for queries that have an + // OPT variant, AFTER for queries that don't). + const optByName = new Map(ROUTE_QUERIES_OPTIMIZED.map((q) => [q.name, q])); + let sumMemStacked = 0; + let maxDurStacked = 0; + let sumDurStacked = 0; + for (const rq of ROUTE_QUERIES_AFTER) { + const nm = rq.name; + const optHasIt = optByName.has(nm); + const s = optHasIt ? opt.stats.get(nm) : after.stats.get(nm); + if (!s) continue; + sumMemStacked += s.memory_usage; + sumDurStacked += s.query_duration_ms; + maxDurStacked = Math.max(maxDurStacked, s.query_duration_ms); + } + console.log("\n Totals (AFTER → OPTIMIZED-stacked):"); + console.log(` Sum peak memory: ${fmtBytes(sumMemAfter)} → ${fmtBytes(sumMemStacked)} (${fmtDelta(sumMemAfter, sumMemStacked)})`); + console.log(` Max query dur: ${maxDurAfter} ms → ${maxDurStacked} ms (${fmtDelta(maxDurAfter, maxDurStacked)})`); + console.log(` Sum query dur: ${sumDurAfter} ms → ${sumDurStacked} ms (${fmtDelta(sumDurAfter, sumDurStacked)})`); +} + +async function runVariant(v: Variant, p: QueryParams): Promise<{ count: number, queryId: string }> { + const client = getClickhouseAdminClient(); + const queryId = `bench-${v.name}-${randomUUID()}`; + const res = await client.query({ + query: v.sql, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { mau: string | number }[]; + return { count: Number(rows[0]?.mau ?? 0), queryId }; +} + +type QueryStats = { + memory_usage: number, + read_rows: number, + read_bytes: number, + result_rows: number, + result_bytes: number, + query_duration_ms: number, +}; + +async function readStats(queryId: string): Promise { + const client = getClickhouseAdminClient(); + await client.command({ query: "SYSTEM FLUSH LOGS" }); + const delays = [100, 200, 400, 800, 1600]; + for (let i = 0; i <= delays.length; i++) { + const res = await client.query({ + query: ` + SELECT + toUInt64(memory_usage) AS memory_usage, + toUInt64(read_rows) AS read_rows, + toUInt64(read_bytes) AS read_bytes, + toUInt64(result_rows) AS result_rows, + toUInt64(result_bytes) AS result_bytes, + toUInt64(query_duration_ms) AS query_duration_ms + FROM system.query_log + WHERE query_id = {qid:String} AND type = 'QueryFinish' + ORDER BY event_time DESC + LIMIT 1 + `, + query_params: { qid: queryId }, + format: "JSONEachRow", + }); + const rows = (await res.json()) as Array>; + if (rows.length === 1) { + const r = rows[0]; + return { + memory_usage: Number(r.memory_usage), + read_rows: Number(r.read_rows), + read_bytes: Number(r.read_bytes), + result_rows: Number(r.result_rows), + result_bytes: Number(r.result_bytes), + query_duration_ms: Number(r.query_duration_ms), + }; + } + if (i < delays.length) await new Promise((r) => setTimeout(r, delays[i])); + } + throw new Error(`no query_log row for ${queryId}`); +} + +async function seed(rows: EventRow[], batch = envInt("BENCH_BATCH", 50_000)): Promise { + const client = getClickhouseAdminClient(); + for (let i = 0; i < rows.length; i += batch) { + const chunk = rows.slice(i, i + batch); + await client.insert({ + table: "analytics_internal.events", + values: chunk, + format: "JSONEachRow", + clickhouse_settings: { date_time_input_format: "best_effort" }, + }); + } +} + +async function cleanup(): Promise { + const client = getClickhouseAdminClient(); + await client.command({ + query: `ALTER TABLE analytics_internal.events DELETE WHERE project_id = {p:String}`, + query_params: { p: BENCH_PROJECT_ID }, + // Block until the mutation is applied so the script exits clean. + clickhouse_settings: { mutations_sync: "2" }, + }); +} + +// ── Edge-case matrix ───────────────────────────────────────────────────────── + +type Case = { + name: string, + branchId: string, + includeAnonymous: boolean, + expected: number, + buildEvents: (windowStart: Date, windowEnd: Date) => EventRow[], +}; + +function mkUuid(): string { + // randomUUID is v4, matches isUuid regex. + return randomUUID(); +} + +function mkEvent(opts: { + branchId: string, + at: Date, + userId: string | null, + isAnonymous?: boolean | null, + eventType?: string, + projectId?: string, + teamId?: string | null, + extraData?: Record, +}): EventRow { + const eventType = opts.eventType ?? "$token-refresh"; + let data: Record; + if (eventType === "$token-refresh") { + data = { + refresh_token_id: mkUuid(), + ip_info: null, + }; + if (opts.isAnonymous !== undefined) data.is_anonymous = opts.isAnonymous; + } else { + data = {}; + if (opts.isAnonymous !== undefined) data.is_anonymous = opts.isAnonymous; + } + if (opts.extraData) Object.assign(data, opts.extraData); + return { + event_type: eventType, + event_at: formatCh(opts.at), + data, + project_id: opts.projectId ?? BENCH_PROJECT_ID, + branch_id: opts.branchId, + user_id: opts.userId, + team_id: opts.teamId ?? null, + }; +} + +function buildMatrix(): Case[] { + const otherProjectId = `${BENCH_PROJECT_ID}-other`; + return [ + { + name: "empty", + branchId: "m-empty", + includeAnonymous: false, + expected: 0, + buildEvents: () => [], + }, + { + name: "one user one event", + branchId: "m-one", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-one", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "one user many events (dedup)", + branchId: "m-dedup", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => { + const u = mkUuid(); + const rows: EventRow[] = []; + for (let i = 0; i < 50; i++) { + rows.push(mkEvent({ branchId: "m-dedup", at: new Date(s.getTime() + (i + 1) * 60_000), userId: u, isAnonymous: false })); + } + return rows; + }, + }, + { + name: "mixed anon + non-anon, include_anonymous=false", + branchId: "m-mix-false", + includeAnonymous: false, + expected: 3, + buildEvents: (s) => [ + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 4 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 5 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + ], + }, + { + name: "mixed anon + non-anon, include_anonymous=true", + branchId: "m-mix-true", + includeAnonymous: true, + expected: 5, + buildEvents: (s) => [ + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 4 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 5 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + ], + }, + { + name: "window boundary (before since / after until)", + branchId: "m-boundary", + includeAnonymous: false, + expected: 1, + buildEvents: (s, e) => [ + // just before since — should be excluded + mkEvent({ branchId: "m-boundary", at: new Date(s.getTime() - 1000), userId: mkUuid(), isAnonymous: false }), + // inside window — counted + mkEvent({ branchId: "m-boundary", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + // at untilExclusive — excluded (half-open interval) + mkEvent({ branchId: "m-boundary", at: new Date(e.getTime()), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "null user_id", + branchId: "m-null-uid", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-null-uid", at: new Date(s.getTime() + ONE_DAY_MS), userId: null, isAnonymous: false }), + mkEvent({ branchId: "m-null-uid", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "non-UUID user_id (filtered)", + branchId: "m-non-uuid", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-non-uuid", at: new Date(s.getTime() + ONE_DAY_MS), userId: "not-a-uuid", isAnonymous: false }), + mkEvent({ branchId: "m-non-uuid", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: "12345678-1234-1234-1234-123456789012", isAnonymous: false }), // v1 UUID shape, fails v4 regex + mkEvent({ branchId: "m-non-uuid", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "case variation on user_id (dedup)", + branchId: "m-case", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => { + const u = mkUuid(); + return [ + mkEvent({ branchId: "m-case", at: new Date(s.getTime() + ONE_DAY_MS), userId: u, isAnonymous: false }), + mkEvent({ branchId: "m-case", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: u.toUpperCase(), isAnonymous: false }), + mkEvent({ branchId: "m-case", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: ` ${u} `, isAnonymous: false }), + ]; + }, + }, + { + name: "project isolation", + branchId: "m-iso", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-iso", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-iso", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false, projectId: otherProjectId }), + ], + }, + { + name: "missing is_anonymous field (treated as non-anon)", + branchId: "m-missing", + includeAnonymous: false, + expected: 2, + buildEvents: (s) => [ + mkEvent({ branchId: "m-missing", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid() /* no is_anonymous */ }), + mkEvent({ branchId: "m-missing", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "null is_anonymous (treated as non-anon)", + branchId: "m-null-anon", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-null-anon", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: null }), + ], + }, + { + name: "wrong event_type ignored", + branchId: "m-wrong-type", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-wrong-type", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false, eventType: "$page-view" }), + mkEvent({ branchId: "m-wrong-type", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + ]; +} + +async function runMatrix(now: Date): Promise { + const untilExclusive = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const since = new Date(untilExclusive.getTime() - METRICS_WINDOW_MS - ONE_DAY_MS); + + const cases = buildMatrix(); + // Seed everything for matrix in one go (lots of small branches). + const all: EventRow[] = cases.flatMap((c) => c.buildEvents(since, untilExclusive)); + if (all.length) await seed(all); + + let allPass = true; + console.log("\n── Equivalence matrix (set equality, not just count) ──"); + for (const c of cases) { + const params: QueryParams = { + projectId: BENCH_PROJECT_ID, + branchId: c.branchId, + since, + untilExclusive, + includeAnonymous: c.includeAnonymous, + }; + const [oldRes, newRes, newSet] = await Promise.all([ + runOld(params), + runNew(params), + runNewSet(params), + ]); + const countMatch = oldRes.count === newRes.count && oldRes.count === c.expected; + const { onlyInA, onlyInB } = setDiff(oldRes.set, newSet); + const setMatch = onlyInA.length === 0 && onlyInB.length === 0; + const match = countMatch && setMatch; + const tag = match ? "OK" : "FAIL"; + console.log( + ` [${tag}] ${c.name.padEnd(48)} expected=${c.expected} old_count=${oldRes.count} new_count=${newRes.count} set_match=${setMatch}`, + ); + if (!setMatch) { + if (onlyInA.length > 0) console.log(` only in OLD: ${onlyInA.slice(0, 3).join(", ")}${onlyInA.length > 3 ? ` …(+${onlyInA.length - 3})` : ""}`); + if (onlyInB.length > 0) console.log(` only in NEW: ${onlyInB.slice(0, 3).join(", ")}${onlyInB.length > 3 ? ` …(+${onlyInB.length - 3})` : ""}`); + } + if (!match) allPass = false; + } + return allPass; +} + +// ── Heavy perf seed ────────────────────────────────────────────────────────── + +const COUNTRY_CODES = ["US", "DE", "FR", "GB", "JP", "IN", "BR", "CA", "AU", "ES"]; +const REFERRERS = ["https://google.com/", "https://twitter.com/", "https://news.ycombinator.com/", "", "https://github.com/", "https://reddit.com/"]; + +function pick(arr: T[]): T { + return arr[Math.floor(Math.random() * arr.length)]; +} + +async function seedPerf(now: Date): Promise { + const users = envInt("BENCH_USERS", 200_000); + const perUser = envInt("BENCH_EVENTS_USER", 5); + const pvPerUser = envInt("BENCH_PAGE_VIEWS_USER", 3); + const clicksPerUser = envInt("BENCH_CLICKS_USER", 1); + const teamRatio = envFloat("BENCH_TEAM_RATIO", 0.3); + const teamCount = Math.max(1, Math.floor(users * 0.05)); // ~5% as many teams as users + const anonRatio = envFloat("BENCH_ANON_RATIO", 0.1); + const tokenEvents = users * perUser; + const pvEvents = users * pvPerUser; + const clickEvents = users * clicksPerUser; + const total = tokenEvents + pvEvents + clickEvents; + console.log( + `\n── Seeding perf data: ${users.toLocaleString()} users ` + + `× (${perUser} $token-refresh + ${pvPerUser} $page-view + ${clicksPerUser} $click) ` + + `+ ${teamCount.toLocaleString()} teams = ${total.toLocaleString()} rows ──`, + ); + + const batchRows = envInt("BENCH_BATCH", 50_000); + const windowEnd = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const windowStart = new Date(windowEnd.getTime() - METRICS_WINDOW_MS); + const spanMs = windowEnd.getTime() - windowStart.getTime(); + const teamIds: string[] = Array.from({ length: teamCount }, () => mkUuid()); + + const t0 = Date.now(); + let buf: EventRow[] = []; + const flushIfNeeded = async () => { + if (buf.length >= batchRows) { + await seed(buf, batchRows); + buf = []; + } + }; + for (let u = 0; u < users; u++) { + const uid = mkUuid(); + const isAnon = Math.random() < anonRatio; + const country = pick(COUNTRY_CODES); + const region = country + "-" + Math.floor(Math.random() * 50).toString(36); + const teamId = Math.random() < teamRatio ? pick(teamIds) : null; + // $token-refresh events (realistic ip_info payload) + for (let e = 0; e < perUser; e++) { + const at = new Date(windowStart.getTime() + Math.floor(Math.random() * spanMs)); + buf.push({ + event_type: "$token-refresh", + event_at: formatCh(at), + data: { + refresh_token_id: mkUuid(), + is_anonymous: isAnon, + ip_info: { + ip: `${Math.floor(Math.random() * 255)}.${Math.floor(Math.random() * 255)}.${Math.floor(Math.random() * 255)}.${Math.floor(Math.random() * 255)}`, + is_trusted: true, + country_code: country, + region_code: region, + city_name: `City-${Math.floor(Math.random() * 1000)}`, + latitude: Math.random() * 180 - 90, + longitude: Math.random() * 360 - 180, + tz_identifier: "UTC", + }, + }, + project_id: BENCH_PROJECT_ID, + branch_id: PERF_BRANCH_ID, + user_id: uid, + team_id: teamId, + }); + await flushIfNeeded(); + } + // $page-view events + for (let e = 0; e < pvPerUser; e++) { + const at = new Date(windowStart.getTime() + Math.floor(Math.random() * spanMs)); + buf.push(mkEvent({ + branchId: PERF_BRANCH_ID, + at, + userId: uid, + isAnonymous: isAnon, + eventType: "$page-view", + extraData: { referrer: pick(REFERRERS), url: `https://example.com/page-${Math.floor(Math.random() * 100)}` }, + })); + await flushIfNeeded(); + } + // $click events + for (let e = 0; e < clicksPerUser; e++) { + const at = new Date(windowStart.getTime() + Math.floor(Math.random() * spanMs)); + buf.push(mkEvent({ + branchId: PERF_BRANCH_ID, + at, + userId: uid, + isAnonymous: isAnon, + eventType: "$click", + extraData: { element: `btn-${Math.floor(Math.random() * 50)}` }, + })); + await flushIfNeeded(); + } + if ((u + 1) % 20_000 === 0) { + console.log(` seeded ${(u + 1).toLocaleString()} / ${users.toLocaleString()} users (${((Date.now() - t0) / 1000).toFixed(1)}s)`); + } + } + if (buf.length) await seed(buf, batchRows); + // Force parts to settle so first-query cost isn't dominated by merges. + const client = getClickhouseAdminClient(); + await client.command({ query: "OPTIMIZE TABLE analytics_internal.events FINAL", clickhouse_settings: { mutations_sync: "2" } }); + console.log(` done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); +} + +function fmtBytes(n: number): string { + if (n < 1024) return `${n} B`; + if (n < 1024 ** 2) return `${(n / 1024).toFixed(1)} KiB`; + if (n < 1024 ** 3) return `${(n / 1024 ** 2).toFixed(1)} MiB`; + return `${(n / 1024 ** 3).toFixed(2)} GiB`; +} + +async function runPerf(now: Date): Promise { + const untilExclusive = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const since = new Date(untilExclusive.getTime() - METRICS_WINDOW_MS); + const params: QueryParams = { + projectId: BENCH_PROJECT_ID, + branchId: PERF_BRANCH_ID, + since, + untilExclusive, + includeAnonymous: false, + }; + + console.log("\n── Perf run (include_anonymous=false) ──"); + // Warm up caches so variants compete on equal footing. + const warmup = await runVariant(VARIANTS[0], params); + void warmup; + + const oldRes = await runOld(params); + const oldStats = await readStats(oldRes.queryId); + + // Set-equality baseline: the "ground truth" set of users the OLD pipeline counts. + const truthSet = oldRes.set; + + type Row = { + name: string, + description: string, + approximate: boolean, + count: number, + stats: QueryStats, + setMatch: boolean | null, // null if approximate (skipped) + errorPct: number | null, + }; + + const rows: Row[] = [ + { + name: "v0_old (baseline)", + description: "current query: GROUP BY user_id + JS normalize", + approximate: false, + count: oldRes.count, + stats: oldStats, + setMatch: true, + errorPct: 0, + }, + ]; + + for (const v of VARIANTS) { + const { count, queryId } = await runVariant(v, params); + const stats = await readStats(queryId); + const errorPct = oldRes.count > 0 ? ((count - oldRes.count) / oldRes.count) * 100 : 0; + let setMatch: boolean | null = null; + if (!v.approximate) { + // Exact variant: verify it sees the *same users* as the old pipeline, not + // just the same count. Skip for approximate variants since they don't + // return a recoverable set. + const resSet = await runNewSet(params); + const { onlyInA, onlyInB } = setDiff(truthSet, resSet); + setMatch = onlyInA.length === 0 && onlyInB.length === 0; + } + rows.push({ name: v.name, description: v.description, approximate: v.approximate ?? false, count, stats, setMatch, errorPct }); + } + + // Table output, ranked by peak memory. + const baselineMem = oldStats.memory_usage; + const baselineDur = oldStats.query_duration_ms; + const padR = (s: string, n: number) => s.padStart(n); + const padL = (s: string, n: number) => s.padEnd(n); + console.log(`\n Ground truth (v0_old): MAU=${oldRes.count}`); + console.log(" " + [ + padL("variant", 24), + padR("memory", 12), + padR("vs base", 8), + padR("duration", 10), + padR("vs base", 8), + padR("read", 12), + padR("result", 10), + padR("count", 9), + padR("err%", 7), + padL("set=", 6), + ].join(" ")); + console.log(" " + "─".repeat(120)); + for (const r of rows) { + const memRatio = baselineMem / Math.max(r.stats.memory_usage, 1); + const durRatio = baselineDur / Math.max(r.stats.query_duration_ms, 1); + const setMatch = r.setMatch == null ? "—" : r.setMatch ? "yes" : "NO"; + console.log(" " + [ + padL(r.name + (r.approximate ? " ~" : ""), 24), + padR(fmtBytes(r.stats.memory_usage), 12), + padR(memRatio >= 1 ? `${memRatio.toFixed(2)}×` : `${memRatio.toFixed(2)}×`, 8), + padR(`${r.stats.query_duration_ms} ms`, 10), + padR(durRatio >= 1 ? `${durRatio.toFixed(2)}×` : `${durRatio.toFixed(2)}×`, 8), + padR(fmtBytes(r.stats.read_bytes), 12), + padR(fmtBytes(r.stats.result_bytes), 10), + padR(r.count.toLocaleString(), 9), + padR(r.errorPct == null ? "—" : `${r.errorPct >= 0 ? "+" : ""}${r.errorPct.toFixed(3)}%`, 7), + padL(setMatch, 6), + ].join(" ")); + } + console.log("\n Legend: ~ = approximate variant. set=yes means the variant counts the same individual users as the OLD pipeline."); +} + +async function main(): Promise { + console.log(`Benchmark run_id=${RUN_ID}`); + console.log(`project_id=${BENCH_PROJECT_ID}`); + + const now = new Date(); + let matrixOk = true; + + try { + if (!envBool("BENCH_SKIP_MATRIX")) { + matrixOk = await runMatrix(now); + if (!matrixOk) { + console.error("\nEquivalence matrix failed — skipping perf run."); + } + } else { + console.log("Skipping equivalence matrix (BENCH_SKIP_MATRIX=1)"); + } + + const doPerf = matrixOk && !envBool("BENCH_SKIP_PERF"); + const doRouteQueries = matrixOk && envBool("BENCH_ROUTE_QUERIES"); + if (doPerf || doRouteQueries) { + await seedPerf(now); + if (doPerf) await runPerf(now); + if (doRouteQueries) await benchmarkRouteQueries(now); + } else if (envBool("BENCH_SKIP_PERF")) { + console.log("Skipping perf run (BENCH_SKIP_PERF=1)"); + } + } finally { + console.log("\nCleaning up seeded rows…"); + try { + await cleanup(); + console.log(" done."); + } catch (e) { + console.error(" cleanup failed:", e); + } + } + + if (!matrixOk) process.exit(1); +} + +try { + await main(); +} catch (e) { + console.error(e); + process.exit(1); +} diff --git a/apps/backend/src/app/api/latest/internal/metrics/route.tsx b/apps/backend/src/app/api/latest/internal/metrics/route.tsx index 2c1162cfac..26398cac0c 100644 --- a/apps/backend/src/app/api/latest/internal/metrics/route.tsx +++ b/apps/backend/src/app/api/latest/internal/metrics/route.tsx @@ -2,7 +2,7 @@ import { Prisma } from "@/generated/prisma/client"; import { EmailOutboxSimpleStatus } from "@/generated/prisma/enums"; import { getClickhouseAdminClient } from "@/lib/clickhouse"; import { ClickHouseError } from "@clickhouse/client"; -import { ActivitySplit, buildSplitFromDailyEntitySets } from "@/lib/metrics-activity-split"; +import { ActivitySplit } from "@/lib/metrics-activity-split"; import { Tenancy } from "@/lib/tenancies"; import { getPrismaClientForTenancy, getPrismaSchemaForTenancy, sqlQuoteIdent } from "@/prisma-client"; import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler"; @@ -19,7 +19,6 @@ import { MetricsRecentUserSchema, } from "@stackframe/stack-shared/dist/interface/admin-metrics"; import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors"; -import { isUuid } from "@stackframe/stack-shared/dist/utils/uuids"; import { adaptSchema, adminAuthTypeSchema, yupArray, yupMixed, yupNumber, yupObject, yupRecord, yupString } from "@stackframe/stack-shared/dist/schema-fields"; import { userFullInclude, userPrismaToCrud, usersCrudHandlers } from "../../users/crud"; @@ -58,11 +57,6 @@ function formatClickhouseDateTimeParam(date: Date): string { return date.toISOString().slice(0, 19); } -function normalizeUuidFromEvent(value: string): string | null { - const normalized = value.trim().toLowerCase(); - return isUuid(normalized) ? normalized : null; -} - async function loadUsersByCountry(tenancy: Tenancy, includeAnonymous: boolean = false): Promise> { const clickhouseClient = getClickhouseAdminClient(); const res = await clickhouseClient.query({ @@ -163,7 +157,7 @@ async function loadDailyActiveUsers(tenancy: Tenancy, now: Date, includeAnonymou AND user_id IS NOT NULL AND event_at >= {since:DateTime} AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) GROUP BY day ORDER BY day ASC `, @@ -197,29 +191,71 @@ async function loadDailyActiveUsers(tenancy: Tenancy, now: Date, includeAnonymou return out; } -async function loadDailyActiveUsersSplit(tenancy: Tenancy, now: Date, includeAnonymous: boolean): Promise { +async function loadDailyActiveSplitFromClickhouse(options: { + tenancy: Tenancy, + now: Date, + entity: "user" | "team", + includeAnonymous: boolean, +}): Promise { + const { tenancy, now, entity, includeAnonymous } = options; const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0); const since = new Date(todayUtc.getTime() - METRICS_WINDOW_MS); const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); - const clickhouseClient = getClickhouseAdminClient(); - const schema = await getPrismaSchemaForTenancy(tenancy); - const prisma = await getPrismaClientForTenancy(tenancy); - const userRows = await clickhouseClient.query({ + const idCol = entity === "user" ? "user_id" : "team_id"; + // Teams don't have an is_anonymous concept, so that filter is users-only. + const anonFilter = entity === "user" + ? "AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0)" + : ""; + + const clickhouseClient = getClickhouseAdminClient(); + // Note: the inner `assumeNotNull(${idCol}) AS entity_id` must not reuse the + // column name, or ClickHouse re-resolves `WHERE ${idCol} IS NOT NULL` + // against the alias (assumeNotNull returns '' for NULLs, which passes the + // not-null test) and phantom rows slip through. + const result = await clickhouseClient.query({ query: ` SELECT - toDate(event_at) AS day, - assumeNotNull(user_id) AS user_id - FROM analytics_internal.events - WHERE event_type = '$token-refresh' - AND project_id = {projectId:String} - AND branch_id = {branchId:String} - AND user_id IS NOT NULL - AND event_at >= {since:DateTime} - AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) - GROUP BY day, user_id + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + entity_id, + lagInFrame(day, 1) OVER (PARTITION BY entity_id ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + assumeNotNull(${idCol}) AS entity_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + ) + ) AS w + LEFT JOIN ( + SELECT + assumeNotNull(${idCol}) AS entity_id, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + GROUP BY entity_id + ) AS f USING (entity_id) + GROUP BY w.day + ORDER BY w.day ASC `, query_params: { projectId: tenancy.project.id, @@ -229,129 +265,35 @@ async function loadDailyActiveUsersSplit(tenancy: Tenancy, now: Date, includeAno includeAnonymous: includeAnonymous ? 1 : 0, }, format: "JSONEachRow", - }).then((result) => result.json() as Promise<{ day: string, user_id: string }[]>); - - const sanitizedUserRows = userRows.flatMap((row) => { - const userId = normalizeUuidFromEvent(row.user_id); - if (userId == null) { - return []; - } - return [{ ...row, user_id: userId }]; }); - - const activeUserIds = [...new Set(sanitizedUserRows.map((row) => row.user_id))]; - const users: { projectUserId: string, signedUpAtOrCreatedAt: Date }[] = activeUserIds.length === 0 - ? [] - : await prisma.$replica().$queryRaw<{ projectUserId: string, signedUpAtOrCreatedAt: Date }[]>` - SELECT - "projectUserId"::text AS "projectUserId", - COALESCE("signedUpAt", "createdAt") AS "signedUpAtOrCreatedAt" - FROM ${sqlQuoteIdent(schema)}."ProjectUser" - WHERE "tenancyId" = ${tenancy.id}::UUID - AND "projectUserId" IN (${Prisma.join(activeUserIds.map((id) => Prisma.sql`${id}::UUID`))}) - ${includeAnonymous ? Prisma.empty : Prisma.sql`AND "isAnonymous" = false`} - `; - + const rows = (await result.json()) as { + day: string, + total_count: string, + new_count: string, + retained_count: string, + reactivated_count: string, + }[]; + + const byDay = new Map(rows.map((r) => [r.day.split('T')[0], r])); const orderedDays: string[] = []; - const idsByDay = new Map>(); for (let i = 0; i <= METRICS_WINDOW_DAYS; i += 1) { - const date = new Date(since.getTime() + i * ONE_DAY_MS).toISOString().split('T')[0]; - orderedDays.push(date); - idsByDay.set(date, new Set()); + orderedDays.push(new Date(since.getTime() + i * ONE_DAY_MS).toISOString().split('T')[0]); } - for (const row of sanitizedUserRows) { - const day = row.day.split('T')[0]; - const daySet = idsByDay.get(day); - if (daySet) { - daySet.add(row.user_id); - } - } - - const createdDayByUserId = new Map( - users.map((user) => [user.projectUserId, user.signedUpAtOrCreatedAt.toISOString().split('T')[0]]) - ); + const split: ActivitySplit = { + total: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.total_count ?? 0) })), + new: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.new_count ?? 0) })), + retained: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.retained_count ?? 0) })), + reactivated: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.reactivated_count ?? 0) })), + }; + return split; +} - return buildSplitFromDailyEntitySets({ - orderedDays, - entityIdsByDay: idsByDay, - createdDayByEntityId: createdDayByUserId, - }); +async function loadDailyActiveUsersSplit(tenancy: Tenancy, now: Date, includeAnonymous: boolean): Promise { + return await loadDailyActiveSplitFromClickhouse({ tenancy, now, entity: "user", includeAnonymous }); } async function loadDailyActiveTeamsSplit(tenancy: Tenancy, now: Date): Promise { - const todayUtc = new Date(now); - todayUtc.setUTCHours(0, 0, 0, 0); - const since = new Date(todayUtc.getTime() - METRICS_WINDOW_MS); - const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); - const clickhouseClient = getClickhouseAdminClient(); - const schema = await getPrismaSchemaForTenancy(tenancy); - const prisma = await getPrismaClientForTenancy(tenancy); - - const teamRows = await clickhouseClient.query({ - query: ` - SELECT - toDate(event_at) AS day, - assumeNotNull(team_id) AS team_id - FROM analytics_internal.events - WHERE event_type = '$token-refresh' - AND project_id = {projectId:String} - AND branch_id = {branchId:String} - AND team_id IS NOT NULL - AND event_at >= {since:DateTime} - AND event_at < {untilExclusive:DateTime} - GROUP BY day, team_id - `, - query_params: { - projectId: tenancy.project.id, - branchId: tenancy.branchId, - since: formatClickhouseDateTimeParam(since), - untilExclusive: formatClickhouseDateTimeParam(untilExclusive), - }, - format: "JSONEachRow", - }).then((result) => result.json() as Promise<{ day: string, team_id: string }[]>); - - const sanitizedTeamRows = teamRows.flatMap((row) => { - const teamId = normalizeUuidFromEvent(row.team_id); - if (teamId == null) { - return []; - } - return [{ ...row, team_id: teamId }]; - }); - - const activeTeamIds = [...new Set(sanitizedTeamRows.map((row) => row.team_id))]; - const teams: { teamId: string, createdAt: Date }[] = activeTeamIds.length === 0 - ? [] - : await prisma.$replica().$queryRaw<{ teamId: string, createdAt: Date }[]>` - SELECT "teamId"::text AS "teamId", "createdAt" - FROM ${sqlQuoteIdent(schema)}."Team" - WHERE "tenancyId" = ${tenancy.id}::UUID - AND "teamId" IN (${Prisma.join(activeTeamIds.map((id) => Prisma.sql`${id}::UUID`))}) - `; - - const orderedDays: string[] = []; - const idsByDay = new Map>(); - for (let i = 0; i <= METRICS_WINDOW_DAYS; i += 1) { - const date = new Date(since.getTime() + i * ONE_DAY_MS).toISOString().split('T')[0]; - orderedDays.push(date); - idsByDay.set(date, new Set()); - } - for (const row of sanitizedTeamRows) { - const day = row.day.split('T')[0]; - const daySet = idsByDay.get(day); - if (daySet) { - daySet.add(row.team_id); - } - } - - const createdDayByTeamId = new Map( - teams.map((team) => [team.teamId, team.createdAt.toISOString().split('T')[0]]) - ); - - return buildSplitFromDailyEntitySets({ - orderedDays, - entityIdsByDay: idsByDay, - createdDayByEntityId: createdDayByTeamId, - }); + return await loadDailyActiveSplitFromClickhouse({ tenancy, now, entity: "team", includeAnonymous: false }); } async function loadLoginMethods(tenancy: Tenancy): Promise<{ method: string, count: number }[]> { @@ -397,6 +339,9 @@ async function loadRecentlyActiveUsers(tenancy: Tenancy, includeAnonymous: boole return dbUsers.map((user) => userPrismaToCrud(user, tenancy.config)); } +// UUID v4 regex identical to isUuid() in stack-shared, ported to ClickHouse re2 syntax. +const MAU_UUID_V4_REGEX = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$"; + async function loadMonthlyActiveUsers(tenancy: Tenancy, now: Date, includeAnonymous: boolean = false): Promise { const { since, untilExclusive } = getMetricsWindowBounds(now); @@ -404,17 +349,19 @@ async function loadMonthlyActiveUsers(tenancy: Tenancy, now: Date, includeAnonym try { const result = await clickhouseClient.query({ query: ` - SELECT - assumeNotNull(user_id) AS user_id - FROM analytics_internal.events - WHERE event_type = '$token-refresh' - AND project_id = {projectId:String} - AND branch_id = {branchId:String} - AND user_id IS NOT NULL - AND event_at >= {since:DateTime} - AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) - GROUP BY user_id + SELECT uniqExact(sipHash64(normalized_user_id)) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) `, query_params: { projectId: tenancy.project.id, @@ -422,18 +369,12 @@ async function loadMonthlyActiveUsers(tenancy: Tenancy, now: Date, includeAnonym since: formatClickhouseDateTimeParam(since), untilExclusive: formatClickhouseDateTimeParam(untilExclusive), includeAnonymous: includeAnonymous ? 1 : 0, + uuidRe: MAU_UUID_V4_REGEX, }, format: "JSONEachRow", }); - const rows: { user_id: string }[] = await result.json(); - const uniqueUserIds = new Set(); - for (const row of rows) { - const normalizedUserId = normalizeUuidFromEvent(row.user_id); - if (normalizedUserId != null) { - uniqueUserIds.add(normalizedUserId); - } - } - return uniqueUserIds.size; + const rows: { mau: string | number }[] = await result.json(); + return Number(rows[0]?.mau ?? 0); } catch (error) { // Only swallow real ClickHouse errors (e.g. project hasn't enabled // analytics yet, transient query failure). Anything else is a programming @@ -835,7 +776,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo LEFT JOIN ( SELECT user_id, - argMax(JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8'), event_at) AS latest_is_anonymous + argMax(coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0), event_at) AS latest_is_anonymous FROM analytics_internal.events WHERE event_type = '$token-refresh' AND project_id = {projectId:String} @@ -846,7 +787,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo ) AS token_refresh_users ON e.user_id = token_refresh_users.user_id `; - const nonAnonymousAnalyticsUserFilter = "({includeAnonymous:UInt8} = 1 OR coalesce(JSONExtract(toJSONString(e.data), 'is_anonymous', 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; + const nonAnonymousAnalyticsUserFilter = "({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; const [dailyEventResult, totalVisitorResult, referrerResult, topRegionResult, onlineResult] = await Promise.all([ // Combined daily aggregates: page-view count, click count, and unique // visitors per day — one scan over the page-view/click event types. @@ -953,7 +894,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo uniqExactIf( assumeNotNull(user_id), user_id IS NOT NULL - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) ) AS visitors FROM analytics_internal.events WHERE event_type = '$token-refresh' @@ -987,7 +928,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo AND user_id IS NOT NULL AND event_at >= {onlineSince:DateTime} AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) `, query_params: { onlineSince: formatClickhouseDateTimeParam(new Date(now.getTime() - 5 * 60 * 1000)), diff --git a/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap b/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap index ea78adab31..69a08bef41 100644 --- a/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap +++ b/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap @@ -3481,7 +3481,7 @@ NiceResponse { "date": , }, { - "activity": 9, + "activity": 10, "date": , }, ], @@ -3607,7 +3607,7 @@ NiceResponse { "date": , }, { - "activity": 1, + "activity": 0, "date": , }, ],