From a0bad07c19bc0c913dd298eb7cf26f500425382c Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:27:40 +0200 Subject: [PATCH 01/14] feat: add persistent source cache storage --- src/source-cache.ts | 149 +++++++++++++++++++++++++++++++++++++ tests/source-cache.test.ts | 103 +++++++++++++++++++++++++ 2 files changed, 252 insertions(+) create mode 100644 src/source-cache.ts create mode 100644 tests/source-cache.test.ts diff --git a/src/source-cache.ts b/src/source-cache.ts new file mode 100644 index 0000000..4c7efa2 --- /dev/null +++ b/src/source-cache.ts @@ -0,0 +1,149 @@ +import { createHash, randomBytes } from 'crypto' +import { existsSync } from 'fs' +import { mkdir, open, readFile, rename, stat, unlink } from 'fs/promises' +import { homedir } from 'os' +import { dirname, join } from 'path' + +import type { SessionSummary } from './types.js' + +export const SOURCE_CACHE_VERSION = 1 + +export type SourceCacheStrategy = 'full-reparse' | 'append-jsonl' + +export type SourceFingerprint = { + mtimeMs: number + sizeBytes: number +} + +export type AppendState = { + endOffset: number + tailHash: string +} + +export type SourceCacheEntry = { + version: number + provider: string + logicalPath: string + fingerprintPath: string + cacheStrategy: SourceCacheStrategy + parserVersion: string + fingerprint: SourceFingerprint + sessions: SessionSummary[] + appendState?: AppendState +} + +export type SourceCacheManifest = { + version: number + entries: Record +} + +function cacheRoot(): string { + const base = process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn') + return join(base, 'source-cache-v1') +} + +function manifestPath(): string { + return join(cacheRoot(), 'manifest.json') +} + +function entryDir(): string { + return join(cacheRoot(), 'entries') +} + +function sourceKey(provider: string, logicalPath: string): string { + return `${provider}:${logicalPath}` +} + +function entryFilename(provider: string, logicalPath: string): string { + return `${createHash('sha1').update(sourceKey(provider, logicalPath)).digest('hex')}.json` +} + +export function emptySourceCacheManifest(): SourceCacheManifest { + return { version: SOURCE_CACHE_VERSION, entries: {} } +} + +export async function computeFileFingerprint(filePath: string): Promise { + const meta = await stat(filePath) + return { mtimeMs: meta.mtimeMs, sizeBytes: meta.size } +} + +export async function loadSourceCacheManifest(): Promise { + if (!existsSync(manifestPath())) return emptySourceCacheManifest() + + try { + const raw = await readFile(manifestPath(), 'utf-8') + const parsed = JSON.parse(raw) as Partial + if (parsed.version !== SOURCE_CACHE_VERSION || !parsed.entries || typeof parsed.entries !== 'object') { + return emptySourceCacheManifest() + } + return { version: SOURCE_CACHE_VERSION, entries: parsed.entries as SourceCacheManifest['entries'] } + } catch { + return emptySourceCacheManifest() + } +} + +async function atomicWriteJson(path: string, value: unknown): Promise { + await mkdir(dirname(path), { recursive: true }) + const temp = `${path}.${randomBytes(8).toString('hex')}.tmp` + const handle = await open(temp, 'w', 0o600) + try { + await handle.writeFile(JSON.stringify(value), { encoding: 'utf-8' }) + await handle.sync() + } finally { + await handle.close() + } + + try { + await rename(temp, path) + } catch (err) { + try { + await unlink(temp) + } catch { + // ignore cleanup failures + } + throw err + } +} + +export async function saveSourceCacheManifest(manifest: SourceCacheManifest): Promise { + await mkdir(cacheRoot(), { recursive: true }) + await atomicWriteJson(manifestPath(), manifest) +} + +export async function readSourceCacheEntry( + manifest: SourceCacheManifest, + provider: string, + logicalPath: string, +): Promise { + const meta = manifest.entries[sourceKey(provider, logicalPath)] + if (!meta) return null + + try { + const raw = await readFile(join(entryDir(), meta.file), 'utf-8') + const entry = JSON.parse(raw) as SourceCacheEntry + if (entry.version !== SOURCE_CACHE_VERSION) return null + + const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) + if ( + currentFingerprint.mtimeMs !== entry.fingerprint.mtimeMs + || currentFingerprint.sizeBytes !== entry.fingerprint.sizeBytes + ) { + return null + } + + return entry + } catch { + return null + } +} + +export async function writeSourceCacheEntry(manifest: SourceCacheManifest, entry: SourceCacheEntry): Promise { + await mkdir(entryDir(), { recursive: true }) + const file = entryFilename(entry.provider, entry.logicalPath) + manifest.entries[sourceKey(entry.provider, entry.logicalPath)] = { + file, + provider: entry.provider, + logicalPath: entry.logicalPath, + } + await atomicWriteJson(join(entryDir(), file), entry) +} diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts new file mode 100644 index 0000000..3cc8340 --- /dev/null +++ b/tests/source-cache.test.ts @@ -0,0 +1,103 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { existsSync } from 'fs' +import { mkdtemp, readFile, rm, writeFile } from 'fs/promises' +import { tmpdir } from 'os' +import { join } from 'path' + +import { + SOURCE_CACHE_VERSION, + emptySourceCacheManifest, + loadSourceCacheManifest, + saveSourceCacheManifest, + readSourceCacheEntry, + writeSourceCacheEntry, + computeFileFingerprint, + type SourceCacheEntry, +} from '../src/source-cache.js' + +let root = '' + +beforeEach(async () => { + root = await mkdtemp(join(tmpdir(), 'codeburn-source-cache-')) + process.env['CODEBURN_CACHE_DIR'] = root +}) + +afterEach(async () => { + delete process.env['CODEBURN_CACHE_DIR'] + if (root) await rm(root, { recursive: true, force: true }) +}) + +describe('source cache manifest', () => { + it('returns an empty manifest when no file exists', async () => { + await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) + }) + + it('round-trips a manifest and entry', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, '{"ok":true}\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + const loadedManifest = await loadSourceCacheManifest() + const loadedEntry = await readSourceCacheEntry(loadedManifest, 'fake', sourcePath) + expect(loadedEntry).toEqual(entry) + }) + + it('returns null when the fingerprint no longer matches', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + await writeFile(sourcePath, 'one\ntwo\n', 'utf-8') + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + + it('writes atomically without leaving temp files behind', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'x\n', 'utf-8') + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint: await computeFileFingerprint(sourcePath), + sessions: [], + }) + await saveSourceCacheManifest(manifest) + + const files = JSON.parse(await readFile(join(root, 'source-cache-v1', 'manifest.json'), 'utf-8')) + expect(files.version).toBe(SOURCE_CACHE_VERSION) + expect(existsSync(join(root, 'source-cache-v1', 'entries'))).toBe(true) + }) +}) From 0d4d10362773f81a18bb87abbcae3e6a60fbfcbd Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:30:53 +0200 Subject: [PATCH 02/14] fix: tighten source cache validation --- src/source-cache.ts | 42 +++++++++++++++++++++++++++++++++----- tests/source-cache.test.ts | 39 ++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/src/source-cache.ts b/src/source-cache.ts index 4c7efa2..a771010 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -37,6 +37,31 @@ export type SourceCacheManifest = { entries: Record } +function isPlainObject(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value) +} + +function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } { + return isPlainObject(value) + && typeof value.file === 'string' + && typeof value.provider === 'string' + && typeof value.logicalPath === 'string' +} + +function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { + return isPlainObject(value) + && typeof value.version === 'number' + && typeof value.provider === 'string' + && typeof value.logicalPath === 'string' + && typeof value.fingerprintPath === 'string' + && (value.cacheStrategy === 'full-reparse' || value.cacheStrategy === 'append-jsonl') + && typeof value.parserVersion === 'string' + && isPlainObject(value.fingerprint) + && typeof value.fingerprint.mtimeMs === 'number' + && typeof value.fingerprint.sizeBytes === 'number' + && Array.isArray(value.sessions) +} + function cacheRoot(): string { const base = process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn') return join(base, 'source-cache-v1') @@ -72,11 +97,18 @@ export async function loadSourceCacheManifest(): Promise { try { const raw = await readFile(manifestPath(), 'utf-8') - const parsed = JSON.parse(raw) as Partial - if (parsed.version !== SOURCE_CACHE_VERSION || !parsed.entries || typeof parsed.entries !== 'object') { + const parsed: unknown = JSON.parse(raw) + if (!isPlainObject(parsed) || parsed.version !== SOURCE_CACHE_VERSION || !isPlainObject(parsed.entries)) { return emptySourceCacheManifest() } - return { version: SOURCE_CACHE_VERSION, entries: parsed.entries as SourceCacheManifest['entries'] } + + const entries: SourceCacheManifest['entries'] = {} + for (const [key, value] of Object.entries(parsed.entries)) { + if (!isManifestEntry(value)) return emptySourceCacheManifest() + entries[key] = value + } + + return { version: SOURCE_CACHE_VERSION, entries } } catch { return emptySourceCacheManifest() } @@ -120,8 +152,8 @@ export async function readSourceCacheEntry( try { const raw = await readFile(join(entryDir(), meta.file), 'utf-8') - const entry = JSON.parse(raw) as SourceCacheEntry - if (entry.version !== SOURCE_CACHE_VERSION) return null + const entry: unknown = JSON.parse(raw) + if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) if ( diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts index 3cc8340..581b66a 100644 --- a/tests/source-cache.test.ts +++ b/tests/source-cache.test.ts @@ -1,6 +1,6 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { existsSync } from 'fs' -import { mkdtemp, readFile, rm, writeFile } from 'fs/promises' +import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'fs/promises' import { tmpdir } from 'os' import { join } from 'path' @@ -32,6 +32,16 @@ describe('source cache manifest', () => { await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) }) + it('returns an empty manifest when the manifest shape is invalid', async () => { + await mkdir(join(root, 'source-cache-v1'), { recursive: true }) + await writeFile(join(root, 'source-cache-v1', 'manifest.json'), JSON.stringify({ + version: SOURCE_CACHE_VERSION, + entries: { bad: { file: 123, provider: 'fake' } }, + }), 'utf-8') + + await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) + }) + it('round-trips a manifest and entry', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, '{"ok":true}\n', 'utf-8') @@ -80,6 +90,29 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when the cached entry shape is invalid', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const manifest = await loadSourceCacheManifest() + const file = 'broken.json' + manifest.entries[`fake:${sourcePath}`] = { file, provider: 'fake', logicalPath: sourcePath } + await saveSourceCacheManifest(manifest) + await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true }) + await writeFile(join(root, 'source-cache-v1', 'entries', file), JSON.stringify({ + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint: { mtimeMs: 'nope', sizeBytes: 4 }, + sessions: [], + }), 'utf-8') + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('writes atomically without leaving temp files behind', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'x\n', 'utf-8') @@ -99,5 +132,9 @@ describe('source cache manifest', () => { const files = JSON.parse(await readFile(join(root, 'source-cache-v1', 'manifest.json'), 'utf-8')) expect(files.version).toBe(SOURCE_CACHE_VERSION) expect(existsSync(join(root, 'source-cache-v1', 'entries'))).toBe(true) + const cacheFiles = await readdir(join(root, 'source-cache-v1')) + const entryFiles = await readdir(join(root, 'source-cache-v1', 'entries')) + expect(cacheFiles.some(f => f.endsWith('.tmp'))).toBe(false) + expect(entryFiles.some(f => f.endsWith('.tmp'))).toBe(false) }) }) From ac5dd8c3e958769665cd70ab0aaacbd86f88081b Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:35:48 +0200 Subject: [PATCH 03/14] fix: tighten source cache validation --- src/source-cache.ts | 40 +++++++++++++++++++++++- tests/source-cache.test.ts | 64 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/src/source-cache.ts b/src/source-cache.ts index a771010..83ece56 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -44,10 +44,44 @@ function isPlainObject(value: unknown): value is Record { function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } { return isPlainObject(value) && typeof value.file === 'string' + && /^[a-f0-9]{40}\.json$/.test(value.file) && typeof value.provider === 'string' && typeof value.logicalPath === 'string' } +function isSessionSummary(value: unknown): value is SessionSummary { + return isPlainObject(value) + && typeof value.sessionId === 'string' + && typeof value.project === 'string' + && typeof value.firstTimestamp === 'string' + && typeof value.lastTimestamp === 'string' + && typeof value.totalCostUSD === 'number' + && Number.isFinite(value.totalCostUSD) + && typeof value.totalInputTokens === 'number' + && Number.isFinite(value.totalInputTokens) + && typeof value.totalOutputTokens === 'number' + && Number.isFinite(value.totalOutputTokens) + && typeof value.totalCacheReadTokens === 'number' + && Number.isFinite(value.totalCacheReadTokens) + && typeof value.totalCacheWriteTokens === 'number' + && Number.isFinite(value.totalCacheWriteTokens) + && typeof value.apiCalls === 'number' + && Number.isFinite(value.apiCalls) + && Array.isArray(value.turns) + && isPlainObject(value.modelBreakdown) + && isPlainObject(value.toolBreakdown) + && isPlainObject(value.mcpBreakdown) + && isPlainObject(value.bashBreakdown) + && isPlainObject(value.categoryBreakdown) +} + +function isAppendState(value: unknown): value is AppendState { + return isPlainObject(value) + && typeof value.endOffset === 'number' + && Number.isFinite(value.endOffset) + && typeof value.tailHash === 'string' +} + function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { return isPlainObject(value) && typeof value.version === 'number' @@ -57,9 +91,13 @@ function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { && (value.cacheStrategy === 'full-reparse' || value.cacheStrategy === 'append-jsonl') && typeof value.parserVersion === 'string' && isPlainObject(value.fingerprint) + && Number.isFinite(value.fingerprint.mtimeMs) && typeof value.fingerprint.mtimeMs === 'number' + && Number.isFinite(value.fingerprint.sizeBytes) && typeof value.fingerprint.sizeBytes === 'number' && Array.isArray(value.sessions) + && value.sessions.every(isSessionSummary) + && (value.appendState === undefined || isAppendState(value.appendState)) } function cacheRoot(): string { @@ -172,10 +210,10 @@ export async function readSourceCacheEntry( export async function writeSourceCacheEntry(manifest: SourceCacheManifest, entry: SourceCacheEntry): Promise { await mkdir(entryDir(), { recursive: true }) const file = entryFilename(entry.provider, entry.logicalPath) + await atomicWriteJson(join(entryDir(), file), entry) manifest.entries[sourceKey(entry.provider, entry.logicalPath)] = { file, provider: entry.provider, logicalPath: entry.logicalPath, } - await atomicWriteJson(join(entryDir(), file), entry) } diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts index 581b66a..8707f85 100644 --- a/tests/source-cache.test.ts +++ b/tests/source-cache.test.ts @@ -1,4 +1,5 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { createHash } from 'crypto' import { existsSync } from 'fs' import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'fs/promises' import { tmpdir } from 'os' @@ -42,6 +43,22 @@ describe('source cache manifest', () => { await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) }) + it('returns an empty manifest when an entry filename is unsafe', async () => { + await mkdir(join(root, 'source-cache-v1'), { recursive: true }) + await writeFile(join(root, 'source-cache-v1', 'manifest.json'), JSON.stringify({ + version: SOURCE_CACHE_VERSION, + entries: { + bad: { + file: '../escape.json', + provider: 'fake', + logicalPath: join(root, 'source.jsonl'), + }, + }, + }), 'utf-8') + + await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) + }) + it('round-trips a manifest and entry', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, '{"ok":true}\n', 'utf-8') @@ -113,6 +130,30 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when append state is malformed', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'append-jsonl' as const, + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + appendState: { endOffset: 'bad', tailHash: 'abc' }, + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry as SourceCacheEntry) + await saveSourceCacheManifest(manifest) + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('writes atomically without leaving temp files behind', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'x\n', 'utf-8') @@ -137,4 +178,27 @@ describe('source cache manifest', () => { expect(cacheFiles.some(f => f.endsWith('.tmp'))).toBe(false) expect(entryFiles.some(f => f.endsWith('.tmp'))).toBe(false) }) + + it('does not mutate the manifest when the entry write fails', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'x\n', 'utf-8') + const manifest = await loadSourceCacheManifest() + const provider = 'fake' + const logicalPath = sourcePath + const file = `${createHash('sha1').update(`${provider}:${logicalPath}`).digest('hex')}.json` + await mkdir(join(root, 'source-cache-v1', 'entries', file), { recursive: true }) + + await expect(writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider, + logicalPath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint: await computeFileFingerprint(sourcePath), + sessions: [], + })).rejects.toBeTruthy() + + expect(manifest.entries[`fake:${sourcePath}`]).toBeUndefined() + }) }) From a2593ceb1ed7ddfa3e509df497b202679b18d25e Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:39:19 +0200 Subject: [PATCH 04/14] fix: harden source cache validation --- src/source-cache.ts | 101 +++++++++++++++++++---- tests/source-cache.test.ts | 161 ++++++++++++++++++++++++++++++++++++- 2 files changed, 244 insertions(+), 18 deletions(-) diff --git a/src/source-cache.ts b/src/source-cache.ts index 83ece56..bd65dcf 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -41,6 +41,10 @@ function isPlainObject(value: unknown): value is Record { return !!value && typeof value === 'object' && !Array.isArray(value) } +function isFiniteNumber(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value) +} + function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } { return isPlainObject(value) && typeof value.file === 'string' @@ -55,24 +59,82 @@ function isSessionSummary(value: unknown): value is SessionSummary { && typeof value.project === 'string' && typeof value.firstTimestamp === 'string' && typeof value.lastTimestamp === 'string' - && typeof value.totalCostUSD === 'number' - && Number.isFinite(value.totalCostUSD) - && typeof value.totalInputTokens === 'number' - && Number.isFinite(value.totalInputTokens) - && typeof value.totalOutputTokens === 'number' - && Number.isFinite(value.totalOutputTokens) - && typeof value.totalCacheReadTokens === 'number' - && Number.isFinite(value.totalCacheReadTokens) - && typeof value.totalCacheWriteTokens === 'number' - && Number.isFinite(value.totalCacheWriteTokens) - && typeof value.apiCalls === 'number' - && Number.isFinite(value.apiCalls) + && isFiniteNumber(value.totalCostUSD) + && isFiniteNumber(value.totalInputTokens) + && isFiniteNumber(value.totalOutputTokens) + && isFiniteNumber(value.totalCacheReadTokens) + && isFiniteNumber(value.totalCacheWriteTokens) + && isFiniteNumber(value.apiCalls) && Array.isArray(value.turns) - && isPlainObject(value.modelBreakdown) - && isPlainObject(value.toolBreakdown) - && isPlainObject(value.mcpBreakdown) - && isPlainObject(value.bashBreakdown) - && isPlainObject(value.categoryBreakdown) + && value.turns.every(isParsedTurn) + && isBreakdownMap(value.modelBreakdown, isModelBreakdownEntry) + && isBreakdownMap(value.toolBreakdown, isCallsBreakdownEntry) + && isBreakdownMap(value.mcpBreakdown, isCallsBreakdownEntry) + && isBreakdownMap(value.bashBreakdown, isCallsBreakdownEntry) + && isBreakdownMap(value.categoryBreakdown, isCategoryBreakdownEntry) +} + +function isTokenUsage(value: unknown): value is { inputTokens: number; outputTokens: number; cacheCreationInputTokens: number; cacheReadInputTokens: number; cachedInputTokens: number; reasoningTokens: number; webSearchRequests: number } { + return isPlainObject(value) + && isFiniteNumber(value.inputTokens) + && isFiniteNumber(value.outputTokens) + && isFiniteNumber(value.cacheCreationInputTokens) + && isFiniteNumber(value.cacheReadInputTokens) + && isFiniteNumber(value.cachedInputTokens) + && isFiniteNumber(value.reasoningTokens) + && isFiniteNumber(value.webSearchRequests) +} + +function isParsedApiCall(value: unknown): boolean { + return isPlainObject(value) + && typeof value.provider === 'string' + && typeof value.model === 'string' + && isTokenUsage(value.usage) + && isFiniteNumber(value.costUSD) + && Array.isArray(value.tools) + && value.tools.every(tool => typeof tool === 'string') + && Array.isArray(value.mcpTools) + && value.mcpTools.every(tool => typeof tool === 'string') + && typeof value.hasAgentSpawn === 'boolean' + && typeof value.hasPlanMode === 'boolean' + && (value.speed === 'standard' || value.speed === 'fast') + && typeof value.timestamp === 'string' + && Array.isArray(value.bashCommands) + && value.bashCommands.every(command => typeof command === 'string') + && typeof value.deduplicationKey === 'string' +} + +function isParsedTurn(value: unknown): boolean { + return isPlainObject(value) + && typeof value.userMessage === 'string' + && Array.isArray(value.assistantCalls) + && value.assistantCalls.every(isParsedApiCall) + && typeof value.timestamp === 'string' + && typeof value.sessionId === 'string' +} + +function isModelBreakdownEntry(value: unknown): boolean { + return isPlainObject(value) + && isFiniteNumber(value.calls) + && isFiniteNumber(value.costUSD) + && isTokenUsage(value.tokens) +} + +function isCallsBreakdownEntry(value: unknown): boolean { + return isPlainObject(value) && isFiniteNumber(value.calls) +} + +function isCategoryBreakdownEntry(value: unknown): boolean { + return isPlainObject(value) + && isFiniteNumber(value.turns) + && isFiniteNumber(value.costUSD) + && isFiniteNumber(value.retries) + && isFiniteNumber(value.editTurns) + && isFiniteNumber(value.oneShotTurns) +} + +function isBreakdownMap(value: unknown, predicate: (entry: unknown) => entry is T): value is Record { + return isPlainObject(value) && Object.values(value).every(predicate) } function isAppendState(value: unknown): value is AppendState { @@ -187,11 +249,16 @@ export async function readSourceCacheEntry( ): Promise { const meta = manifest.entries[sourceKey(provider, logicalPath)] if (!meta) return null + if (meta.provider !== provider || meta.logicalPath !== logicalPath) return null + + const expectedFile = entryFilename(provider, logicalPath) + if (meta.file !== expectedFile) return null try { const raw = await readFile(join(entryDir(), meta.file), 'utf-8') const entry: unknown = JSON.parse(raw) if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null + if (entry.provider !== provider || entry.logicalPath !== logicalPath) return null const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) if ( diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts index 8707f85..3af4818 100644 --- a/tests/source-cache.test.ts +++ b/tests/source-cache.test.ts @@ -15,9 +15,32 @@ import { computeFileFingerprint, type SourceCacheEntry, } from '../src/source-cache.js' +import type { SessionSummary } from '../src/types.js' let root = '' +function emptySession(sessionId: string, overrides: Partial = {}): SessionSummary { + return { + sessionId, + project: 'project', + firstTimestamp: '2026-04-10T00:00:00Z', + lastTimestamp: '2026-04-10T00:00:00Z', + totalCostUSD: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls: 0, + turns: [], + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: {}, + bashBreakdown: {}, + categoryBreakdown: {}, + ...overrides, + } +} + beforeEach(async () => { root = await mkdtemp(join(tmpdir(), 'codeburn-source-cache-')) process.env['CODEBURN_CACHE_DIR'] = root @@ -111,7 +134,7 @@ describe('source cache manifest', () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'one\n', 'utf-8') const manifest = await loadSourceCacheManifest() - const file = 'broken.json' + const file = `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json` manifest.entries[`fake:${sourcePath}`] = { file, provider: 'fake', logicalPath: sourcePath } await saveSourceCacheManifest(manifest) await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true }) @@ -130,6 +153,101 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when the manifest metadata does not match the lookup request', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const file = `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json` + const manifest = await loadSourceCacheManifest() + manifest.entries[`fake:${sourcePath}`] = { + file, + provider: 'other', + logicalPath: sourcePath, + } + await saveSourceCacheManifest(manifest) + await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true }) + await writeFile(join(root, 'source-cache-v1', 'entries', file), JSON.stringify({ + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + }), 'utf-8') + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + + it('returns null when a nested assistant call is malformed', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [ + emptySession('session-1', { + turns: [{ + userMessage: 'hello', + assistantCalls: [{ + provider: 'fake', + model: 'model', + usage: { + inputTokens: 1, + outputTokens: 1, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + }, + costUSD: 1, + tools: [], + mcpTools: [], + hasAgentSpawn: false, + hasPlanMode: false, + speed: 'standard', + timestamp: '2026-04-10T00:00:00Z', + bashCommands: [], + deduplicationKey: 'k', + }], + timestamp: '2026-04-10T00:00:00Z', + sessionId: 'session-1', + }], + }), + ], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + await writeFile(join(root, 'source-cache-v1', 'entries', `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json`), JSON.stringify({ + ...entry, + sessions: [{ + ...entry.sessions[0], + turns: [{ + ...entry.sessions[0].turns[0], + assistantCalls: [{ + ...entry.sessions[0].turns[0].assistantCalls[0], + usage: { ...entry.sessions[0].turns[0].assistantCalls[0].usage, inputTokens: 'bad' }, + }], + }], + }], + }), 'utf-8') + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('returns null when append state is malformed', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'one\n', 'utf-8') @@ -154,6 +272,47 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when a breakdown map contains malformed values', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [ + emptySession('session-2', { + modelBreakdown: { + modelA: { + calls: 'bad', + costUSD: 0, + tokens: { + inputTokens: 0, + outputTokens: 0, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + }, + }, + }, + }), + ], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('writes atomically without leaving temp files behind', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'x\n', 'utf-8') From 303a9256c5784a8231a03a807f72b202df25545d Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:46:09 +0200 Subject: [PATCH 05/14] feat: reuse cached parsed sources --- src/parser.ts | 272 ++++++++++++++++++++++++++----------- src/providers/types.ts | 4 + tests/parser-cache.test.ts | 153 +++++++++++++++++++++ 3 files changed, 348 insertions(+), 81 deletions(-) create mode 100644 tests/parser-cache.test.ts diff --git a/src/parser.ts b/src/parser.ts index ba0d31c..14b50d8 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -3,7 +3,15 @@ import { basename, join } from 'path' import { readSessionFile } from './fs-utils.js' import { calculateCost, getShortModelName } from './models.js' import { discoverAllSessions, getProvider } from './providers/index.js' -import type { ParsedProviderCall } from './providers/types.js' +import type { ParsedProviderCall, Provider, SessionSource } from './providers/types.js' +import { + computeFileFingerprint, + loadSourceCacheManifest, + readSourceCacheEntry, + saveSourceCacheManifest, + SOURCE_CACHE_VERSION, + writeSourceCacheEntry, +} from './source-cache.js' import type { AssistantMessageContent, ClassifiedTurn, @@ -260,6 +268,65 @@ function buildSessionSummary( } } +export type SourceProgressReporter = { + start(label: string, total: number): void + advance(itemLabel: string): void + finish(): void +} + +export type ParseOptions = { + noCache?: boolean + progress?: SourceProgressReporter | null +} + +function addSessionToProjectMap(projectMap: Map, session: SessionSummary) { + if (session.apiCalls === 0) return + const existing = projectMap.get(session.project) ?? [] + existing.push(session) + projectMap.set(session.project, existing) +} + +function buildProjects(projectMap: Map): ProjectSummary[] { + const projects: ProjectSummary[] = [] + for (const [dirName, sessions] of projectMap) { + projects.push({ + project: dirName, + projectPath: unsanitizePath(dirName), + sessions, + totalCostUSD: sessions.reduce((s, sess) => s + sess.totalCostUSD, 0), + totalApiCalls: sessions.reduce((s, sess) => s + sess.apiCalls, 0), + }) + } + return projects +} + +function filterSessionSummaryToRange(session: SessionSummary, dateRange?: DateRange): SessionSummary | null { + if (!dateRange) return session + + const turns = session.turns + .map(turn => ({ + ...turn, + assistantCalls: turn.assistantCalls.filter(call => { + const ts = new Date(call.timestamp) + return ts >= dateRange.start && ts <= dateRange.end + }), + })) + .filter(turn => turn.assistantCalls.length > 0) + + if (turns.length === 0) return null + return buildSessionSummary(session.sessionId, session.project, turns) +} + +function addSeenKeysFromSessions(sessions: SessionSummary[], seenKeys: Set) { + for (const session of sessions) { + for (const turn of session.turns) { + for (const call of turn.assistantCalls) { + seenKeys.add(call.deduplicationKey) + } + } + } +} + async function parseSessionFile( filePath: string, project: string, @@ -328,26 +395,11 @@ async function scanProjectDirs(dirs: Array<{ path: string; name: string }>, seen for (const filePath of jsonlFiles) { const session = await parseSessionFile(filePath, dirName, seenMsgIds, dateRange) - if (session && session.apiCalls > 0) { - const existing = projectMap.get(dirName) ?? [] - existing.push(session) - projectMap.set(dirName, existing) - } + if (session) addSessionToProjectMap(projectMap, session) } } - const projects: ProjectSummary[] = [] - for (const [dirName, sessions] of projectMap) { - projects.push({ - project: dirName, - projectPath: unsanitizePath(dirName), - sessions, - totalCostUSD: sessions.reduce((s, sess) => s + sess.totalCostUSD, 0), - totalApiCalls: sessions.reduce((s, sess) => s + sess.apiCalls, 0), - }) - } - - return projects + return buildProjects(projectMap) } function providerCallToTurn(call: ParsedProviderCall): ParsedTurn { @@ -387,82 +439,105 @@ function providerCallToTurn(call: ParsedProviderCall): ParsedTurn { async function parseProviderSources( providerName: string, - sources: Array<{ path: string; project: string }>, + sources: SessionSource[], seenKeys: Set, dateRange?: DateRange, + options: ParseOptions = {}, ): Promise { - const provider = await getProvider(providerName) - if (!provider) return [] + const projectMap = new Map() + const manifest = await loadSourceCacheManifest() + const sourceStates = await Promise.all(sources.map(async source => { + const parserVersion = source.parserVersion ?? `${providerName}:v1` + const cached = options.noCache + ? null + : await readSourceCacheEntry(manifest, providerName, source.path) + + if (cached && cached.parserVersion === parserVersion) { + return { source, parserVersion, cachedSessions: cached.sessions } + } - const sessionMap = new Map() + return { source, parserVersion, cachedSessions: null } + })) - for (const source of sources) { - if (dateRange) { - try { - const s = await stat(source.path) - if (s.mtimeMs < dateRange.start.getTime()) continue - } catch { /* fall through; treat unknown stat as "may contain data" */ } - } - const parser = provider.createSessionParser( - { path: source.path, project: source.project, provider: providerName }, - seenKeys, - ) - - for await (const call of parser.parse()) { - if (dateRange) { - if (!call.timestamp) continue - const ts = new Date(call.timestamp) - if (ts < dateRange.start || ts > dateRange.end) continue - } + const refreshCount = sourceStates.filter(state => state.cachedSessions === null).length + let provider: Provider | undefined + let wroteManifest = false - const turn = providerCallToTurn(call) - const classified = classifyTurn(turn) - const key = `${providerName}:${call.sessionId}:${source.project}` + if (refreshCount > 0) options.progress?.start('Updating cache', refreshCount) - const existing = sessionMap.get(key) - if (existing) { - existing.turns.push(classified) + try { + for (const state of sourceStates) { + let fullSessions = state.cachedSessions + + if (fullSessions) { + addSeenKeysFromSessions(fullSessions, seenKeys) } else { - sessionMap.set(key, { project: source.project, turns: [classified] }) + provider ??= await getProvider(providerName) + if (!provider) continue + + options.progress?.advance(state.source.progressLabel ?? state.source.path) + fullSessions = await parseFreshProviderSource(provider, providerName, state.source, seenKeys) + + const fingerprintPath = state.source.fingerprintPath ?? state.source.path + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: providerName, + logicalPath: state.source.path, + fingerprintPath, + cacheStrategy: state.source.cacheStrategy ?? 'full-reparse', + parserVersion: state.parserVersion, + fingerprint: await computeFileFingerprint(fingerprintPath), + sessions: fullSessions, + }) + wroteManifest = true } - } - } - const projectMap = new Map() - for (const [key, { project, turns }] of sessionMap) { - const sessionId = key.split(':')[1] ?? key - const session = buildSessionSummary(sessionId, project, turns) - if (session.apiCalls > 0) { - const existing = projectMap.get(project) ?? [] - existing.push(session) - projectMap.set(project, existing) + for (const session of fullSessions + .map(session => filterSessionSummaryToRange(session, dateRange)) + .filter((session): session is SessionSummary => session !== null)) { + addSessionToProjectMap(projectMap, session) + } } + } finally { + if (refreshCount > 0) options.progress?.finish() } - const projects: ProjectSummary[] = [] - for (const [dirName, sessions] of projectMap) { - projects.push({ - project: dirName, - projectPath: unsanitizePath(dirName), - sessions, - totalCostUSD: sessions.reduce((s, sess) => s + sess.totalCostUSD, 0), - totalApiCalls: sessions.reduce((s, sess) => s + sess.apiCalls, 0), - }) - } + if (wroteManifest) await saveSourceCacheManifest(manifest) - return projects + return buildProjects(projectMap) } const CACHE_TTL_MS = 60_000 const MAX_CACHE_ENTRIES = 10 -const sessionCache = new Map() +const sessionCache = new Map() -function cacheKey(dateRange?: DateRange, providerFilter?: string): string { +function cacheKey(dateRange?: DateRange, providerFilter?: string, noCache = false): string { const s = dateRange ? `${dateRange.start.getTime()}:${dateRange.end.getTime()}` : 'none' - return `${s}:${providerFilter ?? 'all'}` + return `${s}:${providerFilter ?? 'all'}:${noCache ? 'nocache' : 'cache'}` +} + +async function sourceSignatureForCache(sources: SessionSource[]): Promise { + const fingerprints = await Promise.all(sources.map(async source => { + const fingerprintPath = source.fingerprintPath ?? source.path + try { + const meta = await stat(fingerprintPath) + return [ + source.provider, + source.project, + source.path, + fingerprintPath, + String(meta.mtimeMs), + String(meta.size), + ].join(':') + } catch { + return [source.provider, source.project, source.path, fingerprintPath, 'missing'].join(':') + } + })) + + return fingerprints.sort().join('|') } -function cachePut(key: string, data: ProjectSummary[]) { +function cachePut(key: string, data: ProjectSummary[], sourceSignature: string) { const now = Date.now() for (const [k, v] of sessionCache) { if (now - v.ts > CACHE_TTL_MS) sessionCache.delete(k) @@ -471,7 +546,7 @@ function cachePut(key: string, data: ProjectSummary[]) { const oldest = [...sessionCache.entries()].sort((a, b) => a[1].ts - b[1].ts)[0] if (oldest) sessionCache.delete(oldest[0]) } - sessionCache.set(key, { data, ts: now }) + sessionCache.set(key, { data, sourceSignature, ts: now }) } export function filterProjectsByName( @@ -499,14 +574,49 @@ export function filterProjectsByName( return result } -export async function parseAllSessions(dateRange?: DateRange, providerFilter?: string): Promise { - const key = cacheKey(dateRange, providerFilter) +async function parseFreshProviderSource( + provider: Provider, + providerName: string, + source: SessionSource, + seenKeys: Set, +): Promise { + const sessionMap = new Map() + const parser = provider.createSessionParser(source, seenKeys) + + for await (const call of parser.parse()) { + const turn = providerCallToTurn(call) + const classified = classifyTurn(turn) + const key = `${providerName}:${call.sessionId}:${source.project}` + const existing = sessionMap.get(key) + + if (existing) { + existing.turns.push(classified) + } else { + sessionMap.set(key, { project: source.project, turns: [classified] }) + } + } + + return [...sessionMap.entries()].map(([key, value]) => { + const sessionId = key.split(':')[1] ?? key + return buildSessionSummary(sessionId, value.project, value.turns) + }) +} + +export async function parseAllSessions( + dateRange?: DateRange, + providerFilter?: string, + options: ParseOptions = {}, +): Promise { + const key = cacheKey(dateRange, providerFilter, options.noCache === true) + const allSources = await discoverAllSessions(providerFilter) + const sourceSignature = await sourceSignatureForCache(allSources) const cached = sessionCache.get(key) - if (cached && Date.now() - cached.ts < CACHE_TTL_MS) return cached.data + if (cached && Date.now() - cached.ts < CACHE_TTL_MS && cached.sourceSignature === sourceSignature) { + return cached.data + } const seenMsgIds = new Set() const seenKeys = new Set() - const allSources = await discoverAllSessions(providerFilter) const claudeSources = allSources.filter(s => s.provider === 'claude') const nonClaudeSources = allSources.filter(s => s.provider !== 'claude') @@ -514,16 +624,16 @@ export async function parseAllSessions(dateRange?: DateRange, providerFilter?: s const claudeDirs = claudeSources.map(s => ({ path: s.path, name: s.project })) const claudeProjects = await scanProjectDirs(claudeDirs, seenMsgIds, dateRange) - const providerGroups = new Map>() + const providerGroups = new Map() for (const source of nonClaudeSources) { const existing = providerGroups.get(source.provider) ?? [] - existing.push({ path: source.path, project: source.project }) + existing.push(source) providerGroups.set(source.provider, existing) } const otherProjects: ProjectSummary[] = [] for (const [providerName, sources] of providerGroups) { - const projects = await parseProviderSources(providerName, sources, seenKeys, dateRange) + const projects = await parseProviderSources(providerName, sources, seenKeys, dateRange, options) otherProjects.push(...projects) } @@ -540,6 +650,6 @@ export async function parseAllSessions(dateRange?: DateRange, providerFilter?: s } const result = Array.from(mergedMap.values()).sort((a, b) => b.totalCostUSD - a.totalCostUSD) - cachePut(key, result) + cachePut(key, result, sourceSignature) return result } diff --git a/src/providers/types.ts b/src/providers/types.ts index 3ab967a..cf02309 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -2,6 +2,10 @@ export type SessionSource = { path: string project: string provider: string + fingerprintPath?: string + cacheStrategy?: 'full-reparse' | 'append-jsonl' + progressLabel?: string + parserVersion?: string } export type SessionParser = { diff --git a/tests/parser-cache.test.ts b/tests/parser-cache.test.ts new file mode 100644 index 0000000..58459fb --- /dev/null +++ b/tests/parser-cache.test.ts @@ -0,0 +1,153 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { mkdtemp, readFile, rm, writeFile } from 'fs/promises' +import { tmpdir } from 'os' +import { join } from 'path' + +import type { ParsedProviderCall, Provider, SessionSource } from '../src/providers/types.js' + +let root = '' +let sourcePath = '' +let parseCalls = 0 + +function makeCall(index: number): ParsedProviderCall { + const second = String(index).padStart(2, '0') + return { + provider: 'fake', + model: 'gpt-5', + inputTokens: 10, + outputTokens: 20, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + costUSD: 0.01, + tools: ['Edit'], + bashCommands: [], + timestamp: `2026-04-20T09:00:${second}.000Z`, + speed: 'standard', + deduplicationKey: `fake:${index}`, + userMessage: `prompt ${index}`, + sessionId: 'fake-session', + } +} + +beforeEach(async () => { + root = await mkdtemp(join(tmpdir(), 'codeburn-parser-cache-')) + sourcePath = join(root, 'fake.jsonl') + parseCalls = 0 + process.env['CODEBURN_CACHE_DIR'] = join(root, 'cache') + await writeFile(sourcePath, 'one\n', 'utf-8') +}) + +afterEach(async () => { + delete process.env['CODEBURN_CACHE_DIR'] + await rm(root, { recursive: true, force: true }) + vi.resetModules() + vi.clearAllMocks() +}) + +describe('parseAllSessions source cache', () => { + it('reuses unchanged cached sources, refreshes changed sources, and honors noCache', async () => { + const fakeSource = { + path: sourcePath, + fingerprintPath: sourcePath, + project: 'fake-project', + provider: 'fake', + cacheStrategy: 'full-reparse', + progressLabel: 'fake.jsonl', + } as SessionSource + + const fakeProvider: Provider = { + name: 'fake', + displayName: 'Fake', + modelDisplayName: model => model, + toolDisplayName: tool => tool, + discoverSessions: async () => [fakeSource], + createSessionParser() { + return { + async *parse() { + parseCalls += 1 + const lineCount = (await readFile(sourcePath, 'utf-8')).trim().split('\n').filter(Boolean).length + for (let i = 0; i < lineCount; i += 1) yield makeCall(i) + }, + } + }, + } + + vi.doMock('../src/providers/index.js', () => ({ + discoverAllSessions: async () => [fakeSource], + getProvider: async () => fakeProvider, + })) + + const { parseAllSessions } = await import('../src/parser.js') + + const progress = { + start: vi.fn(), + advance: vi.fn(), + finish: vi.fn(), + } + + const first = await parseAllSessions(undefined, 'fake', { progress }) + expect(first[0]?.totalApiCalls).toBe(1) + expect(parseCalls).toBe(1) + expect(progress.start).toHaveBeenCalledWith('Updating cache', 1) + expect(progress.advance).toHaveBeenCalledWith('fake.jsonl') + expect(progress.finish).toHaveBeenCalled() + + const second = await parseAllSessions(undefined, 'fake') + expect(second[0]?.totalApiCalls).toBe(1) + expect(parseCalls).toBe(1) + + await writeFile(sourcePath, 'one\ntwo\n', 'utf-8') + const third = await parseAllSessions(undefined, 'fake') + expect(third[0]?.totalApiCalls).toBe(2) + expect(parseCalls).toBe(2) + + const rebuilt = await parseAllSessions(undefined, 'fake', { noCache: true }) + expect(rebuilt[0]?.totalApiCalls).toBe(2) + expect(parseCalls).toBe(3) + }) + + it('filters cached full sessions down to the requested date range', async () => { + const fakeSource = { + path: sourcePath, + fingerprintPath: sourcePath, + project: 'fake-project', + provider: 'fake', + cacheStrategy: 'full-reparse', + progressLabel: 'fake.jsonl', + } as SessionSource + + const fakeProvider: Provider = { + name: 'fake', + displayName: 'Fake', + modelDisplayName: model => model, + toolDisplayName: tool => tool, + discoverSessions: async () => [fakeSource], + createSessionParser() { + return { + async *parse() { + yield makeCall(0) + yield { ...makeCall(1), timestamp: '2026-04-21T10:00:00.000Z', deduplicationKey: 'fake:next-day' } + }, + } + }, + } + + vi.doMock('../src/providers/index.js', () => ({ + discoverAllSessions: async () => [fakeSource], + getProvider: async () => fakeProvider, + })) + + const { parseAllSessions } = await import('../src/parser.js') + await parseAllSessions(undefined, 'fake') + + const onlyFirstDay = await parseAllSessions({ + start: new Date('2026-04-20T00:00:00.000Z'), + end: new Date('2026-04-20T23:59:59.999Z'), + }, 'fake') + + expect(onlyFirstDay[0]?.totalApiCalls).toBe(1) + }) +}) From 862be251e5f382134342984a27f64cf50e3e7b04 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:59:37 +0200 Subject: [PATCH 06/14] refactor: move providers onto shared cache metadata --- src/cursor-cache.ts | 63 ----------------------- src/providers/codex.ts | 10 +++- src/providers/copilot.ts | 10 +++- src/providers/cursor.ts | 23 ++++----- src/providers/opencode.ts | 4 ++ src/providers/pi.ts | 10 +++- src/providers/types.ts | 4 +- tests/providers/cursor.test.ts | 94 +++++++++++++++++++++++++++++++--- 8 files changed, 131 insertions(+), 87 deletions(-) delete mode 100644 src/cursor-cache.ts diff --git a/src/cursor-cache.ts b/src/cursor-cache.ts deleted file mode 100644 index e743020..0000000 --- a/src/cursor-cache.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { readFile, writeFile, mkdir, stat } from 'fs/promises' -import { join } from 'path' -import { homedir } from 'os' - -import type { ParsedProviderCall } from './providers/types.js' - -type ResultCache = { - dbMtimeMs: number - dbSizeBytes: number - calls: ParsedProviderCall[] -} - -const CACHE_FILE = 'cursor-results.json' - -function getCacheDir(): string { - return join(homedir(), '.cache', 'codeburn') -} - -function getCachePath(): string { - return join(getCacheDir(), CACHE_FILE) -} - -async function getDbFingerprint(dbPath: string): Promise<{ mtimeMs: number; size: number } | null> { - try { - const s = await stat(dbPath) - return { mtimeMs: s.mtimeMs, size: s.size } - } catch { - return null - } -} - -export async function readCachedResults(dbPath: string): Promise { - try { - const fp = await getDbFingerprint(dbPath) - if (!fp) return null - - const raw = await readFile(getCachePath(), 'utf-8') - const cache = JSON.parse(raw) as ResultCache - - if (cache.dbMtimeMs === fp.mtimeMs && cache.dbSizeBytes === fp.size) { - return cache.calls - } - return null - } catch { - return null - } -} - -export async function writeCachedResults(dbPath: string, calls: ParsedProviderCall[]): Promise { - try { - const fp = await getDbFingerprint(dbPath) - if (!fp) return - - const dir = getCacheDir() - await mkdir(dir, { recursive: true }) - const cache: ResultCache = { - dbMtimeMs: fp.mtimeMs, - dbSizeBytes: fp.size, - calls, - } - await writeFile(getCachePath(), JSON.stringify(cache), 'utf-8') - } catch {} -} diff --git a/src/providers/codex.ts b/src/providers/codex.ts index 01d48b7..5c0ff07 100644 --- a/src/providers/codex.ts +++ b/src/providers/codex.ts @@ -122,7 +122,15 @@ async function discoverSessionsInDir(codexDir: string): Promise if (!valid || !meta) continue const cwd = meta.payload?.cwd ?? 'unknown' - sources.push({ path: filePath, project: sanitizeProject(cwd), provider: 'codex' }) + sources.push({ + path: filePath, + project: sanitizeProject(cwd), + provider: 'codex', + fingerprintPath: filePath, + cacheStrategy: 'append-jsonl', + progressLabel: basename(filePath), + parserVersion: 'codex:v1', + }) } } } diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index f32738f..039f844 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -179,7 +179,15 @@ async function discoverSessionsInDir(sessionStateDir: string): Promise): SessionPars return } - const cached = await readCachedResults(source.path) - if (cached) { - for (const call of cached) { - if (seenKeys.has(call.deduplicationKey)) continue - seenKeys.add(call.deduplicationKey) - yield call - } - return - } - let db: SqliteDatabase try { db = openDatabase(source.path) @@ -241,8 +230,6 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars const { calls } = parseBubbles(db, seenKeys) - await writeCachedResults(source.path, calls) - for (const call of calls) { yield call } @@ -272,7 +259,15 @@ export function createCursorProvider(dbPathOverride?: string): Provider { const dbPath = dbPathOverride ?? getCursorDbPath() if (!existsSync(dbPath)) return [] - return [{ path: dbPath, project: 'cursor', provider: 'cursor' }] + return [{ + path: dbPath, + project: 'cursor', + provider: 'cursor', + fingerprintPath: dbPath, + cacheStrategy: 'full-reparse', + progressLabel: 'Cursor state.vscdb', + parserVersion: 'cursor:v1', + }] }, createSessionParser(source: SessionSource, seenKeys: Set): SessionParser { diff --git a/src/providers/opencode.ts b/src/providers/opencode.ts index 9dd32ff..4b24d9e 100644 --- a/src/providers/opencode.ts +++ b/src/providers/opencode.ts @@ -271,6 +271,10 @@ async function discoverFromDb(dbPath: string): Promise { path: `${dbPath}:${row.id}`, project: row.directory ? sanitize(row.directory) : sanitize(row.title), provider: 'opencode', + fingerprintPath: dbPath, + cacheStrategy: 'full-reparse', + progressLabel: `opencode:${row.id}`, + parserVersion: 'opencode:v1', })) } catch { return [] diff --git a/src/providers/pi.ts b/src/providers/pi.ts index 92af213..a516433 100644 --- a/src/providers/pi.ts +++ b/src/providers/pi.ts @@ -100,7 +100,15 @@ async function discoverSessionsInDir(sessionsDir: string): Promise { let cursorProvider: Provider @@ -68,10 +77,83 @@ describe('cursor sqlite adapter', () => { }) }) -describe('cursor cache', () => { - it('returns null when no cache exists', async () => { - const { readCachedResults } = await import('../../src/cursor-cache.js') - const result = await readCachedResults('/nonexistent/path.db') - expect(result).toBeNull() +skipUnlessSqlite('shared cache metadata', () => { + let tmpDir: string + + beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), 'provider-cache-meta-')) + }) + + afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }) + }) + + async function createOpenCodeTestDb(dir: string): Promise { + const ocDir = join(dir, 'opencode') + const dbPath = join(ocDir, 'opencode.db') + const { DatabaseSync: Database } = require('node:sqlite') + + await mkdir(ocDir, { recursive: true }) + const db = new Database(dbPath) + db.exec(` + CREATE TABLE session ( + id TEXT PRIMARY KEY, project_id TEXT NOT NULL, parent_id TEXT, + slug TEXT NOT NULL, directory TEXT NOT NULL, title TEXT NOT NULL, + version TEXT NOT NULL, time_created INTEGER, time_updated INTEGER, + time_archived INTEGER + ) + `) + db.exec(` + CREATE TABLE message ( + id TEXT PRIMARY KEY, session_id TEXT NOT NULL, + time_created INTEGER, time_updated INTEGER, data TEXT NOT NULL + ) + `) + db.exec(` + CREATE TABLE part ( + id TEXT PRIMARY KEY, message_id TEXT NOT NULL, + session_id TEXT NOT NULL, time_created INTEGER, + time_updated INTEGER, data TEXT NOT NULL + ) + `) + db.prepare(` + INSERT INTO session (id, project_id, slug, directory, title, version, time_created) + VALUES (?, ?, ?, ?, ?, ?, ?) + `).run('sess-1', 'proj-1', 'slug-1', '/home/user/myproject', 'My Project', '1.0', 1700000000000) + db.close() + return dbPath + } + + it('cursor exposes the sqlite database as its fingerprint path', async () => { + const dbPath = join(tmpDir, 'state.vscdb') + await writeFile(dbPath, '') + + const cursor = createCursorProvider(dbPath) + const sources = await cursor.discoverSessions() + + expect(sources).toHaveLength(1) + for (const source of sources) { + expect(source.cacheStrategy).toBe('full-reparse') + expect(source.fingerprintPath).toBe(source.path) + expect(source.progressLabel).toBe('Cursor state.vscdb') + expect(source.parserVersion).toBe('cursor:v1') + } + }) + + it('opencode sources fingerprint the backing database, not the logical dbPath:sessionId key', async () => { + const dbPath = await createOpenCodeTestDb(tmpDir) + + const opencode = createOpenCodeProvider(tmpDir) + const sources = await opencode.discoverSessions() + + expect(sources).toHaveLength(1) + for (const source of sources) { + expect(source.cacheStrategy).toBe('full-reparse') + expect(source.fingerprintPath).toBeTruthy() + expect(source.fingerprintPath).toBe(dbPath) + expect(source.fingerprintPath).not.toBe(source.path) + expect(source.progressLabel).toBe('opencode:sess-1') + expect(source.parserVersion).toBe('opencode:v1') + } }) }) From ad5366472a4307541fb4bdba416f23a304244684 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 17:14:01 +0200 Subject: [PATCH 07/14] feat: cache Claude sources by session file --- src/fs-utils.ts | 26 ++++ src/parser.ts | 262 +++++++++++++++++++++++++++++++------ src/source-cache.ts | 19 ++- tests/fs-utils.test.ts | 31 +++++ tests/parser-cache.test.ts | 62 ++++++++- 5 files changed, 355 insertions(+), 45 deletions(-) diff --git a/src/fs-utils.ts b/src/fs-utils.ts index 823a630..bf25c50 100644 --- a/src/fs-utils.ts +++ b/src/fs-utils.ts @@ -93,3 +93,29 @@ export async function* readSessionLines(filePath: string): AsyncGenerator { + let size: number + try { + size = (await stat(filePath)).size + } catch (err) { + warn(`stat failed for ${filePath}: ${(err as NodeJS.ErrnoException).code ?? 'unknown'}`) + return + } + + if (size > MAX_SESSION_FILE_BYTES) { + warn(`skipped oversize file ${filePath} (${size} bytes > cap ${MAX_SESSION_FILE_BYTES})`) + return + } + + const stream = createReadStream(filePath, { + encoding: 'utf-8', + start: Math.max(0, startOffset), + }) + const rl = createInterface({ input: stream, crlfDelay: Infinity }) + try { + for await (const line of rl) yield line + } catch (err) { + warn(`stream read failed for ${filePath}: ${(err as NodeJS.ErrnoException).code ?? 'unknown'}`) + } +} diff --git a/src/parser.ts b/src/parser.ts index 14b50d8..e02bb9e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,6 +1,7 @@ +import { createHash } from 'crypto' import { readdir, stat } from 'fs/promises' import { basename, join } from 'path' -import { readSessionFile } from './fs-utils.js' +import { readSessionFile, readSessionLinesFromOffset } from './fs-utils.js' import { calculateCost, getShortModelName } from './models.js' import { discoverAllSessions, getProvider } from './providers/index.js' import type { ParsedProviderCall, Provider, SessionSource } from './providers/types.js' @@ -317,7 +318,7 @@ function filterSessionSummaryToRange(session: SessionSummary, dateRange?: DateRa return buildSessionSummary(session.sessionId, session.project, turns) } -function addSeenKeysFromSessions(sessions: SessionSummary[], seenKeys: Set) { +function addSeenDeduplicationKeysFromSessions(sessions: SessionSummary[], seenKeys: Set) { for (const session of sessions) { for (const turn of session.turns) { for (const call of turn.assistantCalls) { @@ -327,6 +328,45 @@ function addSeenKeysFromSessions(sessions: SessionSummary[], seenKeys: Set, + sessionIdFallback: string, + dateRange?: DateRange, +): SessionSummary | null { + if (entries.length === 0) return null + + let filteredEntries = entries + if (dateRange) { + filteredEntries = entries.filter(entry => { + if (!entry.timestamp) return entry.type === 'user' + const ts = new Date(entry.timestamp) + return ts >= dateRange.start && ts <= dateRange.end + }) + if (filteredEntries.length === 0) return null + } + + const sessionId = entries.find(entry => typeof entry.sessionId === 'string')?.sessionId ?? sessionIdFallback + const turns = groupIntoTurns(filteredEntries, seenMsgIds) + if (turns.length === 0) return null + + return buildSessionSummary(sessionId, project, turns.map(classifyTurn)) +} + +function buildClaudeSessionSummaryFromLines( + lines: string[], + project: string, + seenMsgIds: Set, + sessionIdFallback: string, + dateRange?: DateRange, +): SessionSummary | null { + const entries = lines + .map(parseJsonlLine) + .filter((entry): entry is JournalEntry => entry !== null) + return buildSessionSummaryFromEntries(entries, project, seenMsgIds, sessionIdFallback, dateRange) +} + async function parseSessionFile( filePath: string, project: string, @@ -345,30 +385,7 @@ async function parseSessionFile( const content = await readSessionFile(filePath) if (content === null) return null const lines = content.split('\n').filter(l => l.trim()) - const entries: JournalEntry[] = [] - - for (const line of lines) { - const entry = parseJsonlLine(line) - if (entry) entries.push(entry) - } - - if (entries.length === 0) return null - - let filteredEntries = entries - if (dateRange) { - filteredEntries = entries.filter(e => { - if (!e.timestamp) return e.type === 'user' - const ts = new Date(e.timestamp) - return ts >= dateRange.start && ts <= dateRange.end - }) - if (filteredEntries.length === 0) return null - } - - const sessionId = basename(filePath, '.jsonl') - const turns = groupIntoTurns(filteredEntries, seenMsgIds) - const classified = turns.map(classifyTurn) - - return buildSessionSummary(sessionId, project, classified) + return buildClaudeSessionSummaryFromLines(lines, project, seenMsgIds, basename(filePath, '.jsonl'), dateRange) } async function collectJsonlFiles(dirPath: string): Promise { @@ -387,18 +404,168 @@ async function collectJsonlFiles(dirPath: string): Promise { return jsonlFiles } -async function scanProjectDirs(dirs: Array<{ path: string; name: string }>, seenMsgIds: Set, dateRange?: DateRange): Promise { +type ClaudeCacheUnit = { + path: string + project: string + progressLabel: string +} + +async function listClaudeCacheUnits(dirPath: string, dirName: string): Promise { + const jsonlFiles = await collectJsonlFiles(dirPath) + return jsonlFiles.map(filePath => ({ + path: filePath, + project: dirName, + progressLabel: filePath.split(/[\\/]/).slice(-2).join('/'), + })) +} + +function appendStateTailHash(session: SessionSummary): string { + return createHash('sha1').update(session.lastTimestamp).digest('hex') +} + +function fingerprintsMatch( + left: { mtimeMs: number; sizeBytes: number }, + right: { mtimeMs: number; sizeBytes: number }, +): boolean { + return left.mtimeMs === right.mtimeMs && left.sizeBytes === right.sizeBytes +} + +async function refreshClaudeCacheUnit( + manifest: Awaited>, + unit: ClaudeCacheUnit, + seenMsgIds: Set, + parserVersion: string, + options: ParseOptions, +): Promise<{ session: SessionSummary | null; wrote: boolean; refreshed: boolean }> { + let reportedRefresh = false + const cached = options.noCache + ? null + : await readSourceCacheEntry(manifest, 'claude', unit.path, { allowStaleFingerprint: true }) + const fingerprint = await computeFileFingerprint(unit.path) + + if ( + cached + && cached.parserVersion === parserVersion + && cached.cacheStrategy === 'append-jsonl' + && fingerprintsMatch(fingerprint, cached.fingerprint) + ) { + addSeenDeduplicationKeysFromSessions(cached.sessions, seenMsgIds) + return { session: cached.sessions[0] ?? null, wrote: false, refreshed: false } + } + + if ( + cached + && cached.parserVersion === parserVersion + && cached.cacheStrategy === 'append-jsonl' + && cached.appendState + && fingerprint.sizeBytes > cached.fingerprint.sizeBytes + ) { + reportedRefresh = true + options.progress?.advance(unit.progressLabel) + addSeenDeduplicationKeysFromSessions(cached.sessions, seenMsgIds) + const appendedLines: string[] = [] + for await (const line of readSessionLinesFromOffset(unit.path, cached.appendState.endOffset)) { + if (line.trim()) appendedLines.push(line) + } + + const appended = buildClaudeSessionSummaryFromLines( + appendedLines, + unit.project, + seenMsgIds, + cached.sessions[0]?.sessionId ?? basename(unit.path, '.jsonl'), + ) + + if (appended && cached.sessions[0]) { + const merged = buildSessionSummary( + cached.sessions[0].sessionId, + unit.project, + [...cached.sessions[0].turns, ...appended.turns], + ) + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'claude', + logicalPath: unit.path, + fingerprintPath: unit.path, + cacheStrategy: 'append-jsonl', + parserVersion, + fingerprint, + sessions: [merged], + appendState: { + endOffset: fingerprint.sizeBytes, + tailHash: appendStateTailHash(merged), + }, + }) + return { session: merged, wrote: true, refreshed: true } + } + } + + if (!reportedRefresh) options.progress?.advance(unit.progressLabel) + const session = await parseSessionFile(unit.path, unit.project, seenMsgIds) + if (!session) return { session: null, wrote: false, refreshed: true } + + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'claude', + logicalPath: unit.path, + fingerprintPath: unit.path, + cacheStrategy: 'append-jsonl', + parserVersion, + fingerprint, + sessions: [session], + appendState: { + endOffset: fingerprint.sizeBytes, + tailHash: appendStateTailHash(session), + }, + }) + return { session, wrote: true, refreshed: true } +} + +async function scanClaudeDirsWithCache( + dirs: Array<{ path: string; name: string }>, + seenMsgIds: Set, + dateRange?: DateRange, + options: ParseOptions = {}, +): Promise { const projectMap = new Map() + const manifest = await loadSourceCacheManifest() + const parserVersion = 'claude:v1' + const units = (await Promise.all( + dirs.map(dir => listClaudeCacheUnits(dir.path, dir.name)), + )).flat() + const refreshStates = await Promise.all(units.map(async unit => { + const cached = options.noCache + ? null + : await readSourceCacheEntry(manifest, 'claude', unit.path, { allowStaleFingerprint: true }) + const fingerprint = await computeFileFingerprint(unit.path).catch(() => null) + const reusable = !!( + cached + && fingerprint + && cached.parserVersion === parserVersion + && cached.cacheStrategy === 'append-jsonl' + && fingerprintsMatch(fingerprint, cached.fingerprint) + ) + return { unit, refreshed: !reusable } + })) - for (const { path: dirPath, name: dirName } of dirs) { - const jsonlFiles = await collectJsonlFiles(dirPath) + const refreshCount = refreshStates.filter(state => state.refreshed).length + let wroteManifest = false - for (const filePath of jsonlFiles) { - const session = await parseSessionFile(filePath, dirName, seenMsgIds, dateRange) - if (session) addSessionToProjectMap(projectMap, session) + if (refreshCount > 0) options.progress?.start('Updating cache', refreshCount) + + try { + for (const { unit } of refreshStates) { + const { session, wrote } = await refreshClaudeCacheUnit(manifest, unit, seenMsgIds, parserVersion, options) + if (wrote) wroteManifest = true + if (!session) continue + + const filtered = filterSessionSummaryToRange(session, dateRange) + if (filtered) addSessionToProjectMap(projectMap, filtered) } + } finally { + if (refreshCount > 0) options.progress?.finish() } + if (wroteManifest) await saveSourceCacheManifest(manifest) return buildProjects(projectMap) } @@ -470,7 +637,7 @@ async function parseProviderSources( let fullSessions = state.cachedSessions if (fullSessions) { - addSeenKeysFromSessions(fullSessions, seenKeys) + addSeenDeduplicationKeysFromSessions(fullSessions, seenKeys) } else { provider ??= await getProvider(providerName) if (!provider) continue @@ -518,23 +685,42 @@ function cacheKey(dateRange?: DateRange, providerFilter?: string, noCache = fals async function sourceSignatureForCache(sources: SessionSource[]): Promise { const fingerprints = await Promise.all(sources.map(async source => { + if (source.provider === 'claude') { + const jsonlFiles = await collectJsonlFiles(source.path) + return Promise.all(jsonlFiles.map(async filePath => { + try { + const meta = await stat(filePath) + return [ + source.provider, + source.project, + filePath, + filePath, + String(meta.mtimeMs), + String(meta.size), + ].join(':') + } catch { + return [source.provider, source.project, filePath, filePath, 'missing'].join(':') + } + })) + } + const fingerprintPath = source.fingerprintPath ?? source.path try { const meta = await stat(fingerprintPath) - return [ + return [[ source.provider, source.project, source.path, fingerprintPath, String(meta.mtimeMs), String(meta.size), - ].join(':') + ].join(':')] } catch { - return [source.provider, source.project, source.path, fingerprintPath, 'missing'].join(':') + return [[source.provider, source.project, source.path, fingerprintPath, 'missing'].join(':')] } })) - return fingerprints.sort().join('|') + return fingerprints.flat().sort().join('|') } function cachePut(key: string, data: ProjectSummary[], sourceSignature: string) { @@ -622,7 +808,7 @@ export async function parseAllSessions( const nonClaudeSources = allSources.filter(s => s.provider !== 'claude') const claudeDirs = claudeSources.map(s => ({ path: s.path, name: s.project })) - const claudeProjects = await scanProjectDirs(claudeDirs, seenMsgIds, dateRange) + const claudeProjects = await scanClaudeDirsWithCache(claudeDirs, seenMsgIds, dateRange, options) const providerGroups = new Map() for (const source of nonClaudeSources) { diff --git a/src/source-cache.ts b/src/source-cache.ts index bd65dcf..3dc652f 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -37,6 +37,10 @@ export type SourceCacheManifest = { entries: Record } +export type ReadSourceCacheEntryOptions = { + allowStaleFingerprint?: boolean +} + function isPlainObject(value: unknown): value is Record { return !!value && typeof value === 'object' && !Array.isArray(value) } @@ -246,6 +250,7 @@ export async function readSourceCacheEntry( manifest: SourceCacheManifest, provider: string, logicalPath: string, + options: ReadSourceCacheEntryOptions = {}, ): Promise { const meta = manifest.entries[sourceKey(provider, logicalPath)] if (!meta) return null @@ -260,12 +265,14 @@ export async function readSourceCacheEntry( if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null if (entry.provider !== provider || entry.logicalPath !== logicalPath) return null - const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) - if ( - currentFingerprint.mtimeMs !== entry.fingerprint.mtimeMs - || currentFingerprint.sizeBytes !== entry.fingerprint.sizeBytes - ) { - return null + if (!options.allowStaleFingerprint) { + const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) + if ( + currentFingerprint.mtimeMs !== entry.fingerprint.mtimeMs + || currentFingerprint.sizeBytes !== entry.fingerprint.sizeBytes + ) { + return null + } } return entry diff --git a/tests/fs-utils.test.ts b/tests/fs-utils.test.ts index 6510900..b23941a 100644 --- a/tests/fs-utils.test.ts +++ b/tests/fs-utils.test.ts @@ -8,6 +8,7 @@ import { STREAM_THRESHOLD_BYTES, readSessionFile, readSessionLines, + readSessionLinesFromOffset, } from '../src/fs-utils.js' describe('readSessionFile', () => { @@ -96,3 +97,33 @@ describe('readSessionLines', () => { await gen.return(undefined) }) }) + +describe('readSessionLinesFromOffset', () => { + const tmpDirs: string[] = [] + + afterEach(async () => { + while (tmpDirs.length > 0) { + const d = tmpDirs.pop() + if (d) await rm(d, { recursive: true, force: true }) + } + }) + + async function tmpPath(content: string): Promise { + const base = await mkdtemp(join(tmpdir(), 'codeburn-fs-offset-')) + tmpDirs.push(base) + const p = join(base, 'offset.txt') + await writeFile(p, content, 'utf-8') + return p + } + + it('starts at the requested byte offset', async () => { + const p = await tmpPath('alpha\nbeta\ngamma\n') + const lines: string[] = [] + + for await (const line of readSessionLinesFromOffset(p, Buffer.byteLength('alpha\n', 'utf-8'))) { + lines.push(line) + } + + expect(lines).toEqual(['beta', 'gamma']) + }) +}) diff --git a/tests/parser-cache.test.ts b/tests/parser-cache.test.ts index 58459fb..446d629 100644 --- a/tests/parser-cache.test.ts +++ b/tests/parser-cache.test.ts @@ -1,5 +1,5 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { mkdtemp, readFile, rm, writeFile } from 'fs/promises' +import { appendFile, mkdir, mkdtemp, readFile, rm, writeFile } from 'fs/promises' import { tmpdir } from 'os' import { join } from 'path' @@ -8,6 +8,8 @@ import type { ParsedProviderCall, Provider, SessionSource } from '../src/provide let root = '' let sourcePath = '' let parseCalls = 0 +let claudeRoot = '' +let claudeSessionPath = '' function makeCall(index: number): ParsedProviderCall { const second = String(index).padStart(2, '0') @@ -35,13 +37,38 @@ function makeCall(index: number): ParsedProviderCall { beforeEach(async () => { root = await mkdtemp(join(tmpdir(), 'codeburn-parser-cache-')) sourcePath = join(root, 'fake.jsonl') + claudeRoot = join(root, '.claude') + claudeSessionPath = join(claudeRoot, 'projects', 'demo-project', 'session.jsonl') parseCalls = 0 process.env['CODEBURN_CACHE_DIR'] = join(root, 'cache') + process.env['CLAUDE_CONFIG_DIR'] = claudeRoot await writeFile(sourcePath, 'one\n', 'utf-8') + await mkdir(join(claudeRoot, 'projects', 'demo-project'), { recursive: true }) + await writeFile(claudeSessionPath, [ + JSON.stringify({ + type: 'user', + timestamp: '2026-04-20T09:00:00.000Z', + sessionId: 'sess-1', + message: { role: 'user', content: 'first' }, + }), + JSON.stringify({ + type: 'assistant', + timestamp: '2026-04-20T09:00:01.000Z', + message: { + id: 'msg-1', + model: 'claude-sonnet-4-6', + role: 'assistant', + type: 'message', + content: [], + usage: { input_tokens: 10, output_tokens: 20 }, + }, + }), + ].join('\n') + '\n', 'utf-8') }) afterEach(async () => { delete process.env['CODEBURN_CACHE_DIR'] + delete process.env['CLAUDE_CONFIG_DIR'] await rm(root, { recursive: true, force: true }) vi.resetModules() vi.clearAllMocks() @@ -150,4 +177,37 @@ describe('parseAllSessions source cache', () => { expect(onlyFirstDay[0]?.totalApiCalls).toBe(1) }) + + it('refreshes appended Claude log entries on the next run', async () => { + vi.doUnmock('../src/providers/index.js') + vi.resetModules() + const { parseAllSessions } = await import('../src/parser.js') + + const first = await parseAllSessions(undefined, 'claude') + expect(first.find(project => project.project === 'demo-project')?.totalApiCalls).toBe(1) + + await appendFile(claudeSessionPath, [ + JSON.stringify({ + type: 'user', + timestamp: '2026-04-20T09:05:00.000Z', + sessionId: 'sess-1', + message: { role: 'user', content: 'second' }, + }), + JSON.stringify({ + type: 'assistant', + timestamp: '2026-04-20T09:05:01.000Z', + message: { + id: 'msg-2', + model: 'claude-sonnet-4-6', + role: 'assistant', + type: 'message', + content: [], + usage: { input_tokens: 11, output_tokens: 21 }, + }, + }), + ].join('\n') + '\n', 'utf-8') + + const second = await parseAllSessions(undefined, 'claude') + expect(second.find(project => project.project === 'demo-project')?.totalApiCalls).toBe(2) + }) }) From 1b8e0f82896491161bfa220257265eb376b1bdb9 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 17:20:31 +0200 Subject: [PATCH 08/14] fix: harden Claude append cache refresh --- src/parser.ts | 164 +++++++++++++++++++++++++++---------- src/source-cache.ts | 2 + tests/parser-cache.test.ts | 97 ++++++++++++++++++++++ 3 files changed, 222 insertions(+), 41 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index e02bb9e..c7d022b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,5 @@ import { createHash } from 'crypto' -import { readdir, stat } from 'fs/promises' +import { open, readdir, stat } from 'fs/promises' import { basename, join } from 'path' import { readSessionFile, readSessionLinesFromOffset } from './fs-utils.js' import { calculateCost, getShortModelName } from './models.js' @@ -410,6 +410,13 @@ type ClaudeCacheUnit = { progressLabel: string } +type ClaudeTailState = { + tailHash: string + lastEntryType?: string +} + +const CLAUDE_TAIL_WINDOW_BYTES = 16 * 1024 + async function listClaudeCacheUnits(dirPath: string, dirName: string): Promise { const jsonlFiles = await collectJsonlFiles(dirPath) return jsonlFiles.map(filePath => ({ @@ -419,10 +426,6 @@ async function listClaudeCacheUnits(dirPath: string, dirName: string): Promise { + const start = Math.max(0, endOffset - CLAUDE_TAIL_WINDOW_BYTES) + const length = Math.max(0, endOffset - start) + if (length === 0) return null + + const handle = await open(filePath, 'r') + const buffer = Buffer.alloc(length) + + try { + await handle.read(buffer, 0, length, start) + } finally { + await handle.close() + } + + const chunk = buffer.toString('utf-8').replace(/[\r\n]+$/, '') + if (chunk.length === 0) return null + + const lastNewline = chunk.lastIndexOf('\n') + if (lastNewline < 0 && start > 0) return null + + const lastLine = lastNewline >= 0 ? chunk.slice(lastNewline + 1) : chunk + if (!lastLine.trim()) return null + + const entry = parseJsonlLine(lastLine) + return { + tailHash: createHash('sha1').update(lastLine).digest('hex'), + lastEntryType: entry?.type, + } +} + +async function buildClaudeAppendState(filePath: string, endOffset: number): Promise<{ + endOffset: number + tailHash: string + lastEntryType?: string +}> { + const tailState = await readClaudeTailState(filePath, endOffset) + return { + endOffset, + tailHash: tailState?.tailHash ?? '', + lastEntryType: tailState?.lastEntryType, + } +} + +function mergeClaudeAppendSession( + cachedSession: SessionSummary, + appendedSession: SessionSummary, + lastEntryType?: string, +): SessionSummary | null { + const mergedTurns = [...cachedSession.turns] + const appendedTurns = [...appendedSession.turns] + const firstAppendedTurn = appendedTurns[0] + + if (firstAppendedTurn && firstAppendedTurn.userMessage === '') { + if (lastEntryType !== 'assistant' || mergedTurns.length === 0) return null + + const previousTurn = mergedTurns[mergedTurns.length - 1]! + mergedTurns[mergedTurns.length - 1] = classifyTurn({ + userMessage: previousTurn.userMessage, + assistantCalls: [...previousTurn.assistantCalls, ...firstAppendedTurn.assistantCalls], + timestamp: previousTurn.timestamp, + sessionId: previousTurn.sessionId, + }) + appendedTurns.shift() + } + + return buildSessionSummary( + cachedSession.sessionId, + cachedSession.project, + [...mergedTurns, ...appendedTurns], + ) +} + async function refreshClaudeCacheUnit( manifest: Awaited>, unit: ClaudeCacheUnit, @@ -460,42 +535,52 @@ async function refreshClaudeCacheUnit( && cached.appendState && fingerprint.sizeBytes > cached.fingerprint.sizeBytes ) { - reportedRefresh = true - options.progress?.advance(unit.progressLabel) - addSeenDeduplicationKeysFromSessions(cached.sessions, seenMsgIds) - const appendedLines: string[] = [] - for await (const line of readSessionLinesFromOffset(unit.path, cached.appendState.endOffset)) { - if (line.trim()) appendedLines.push(line) - } - - const appended = buildClaudeSessionSummaryFromLines( - appendedLines, - unit.project, - seenMsgIds, - cached.sessions[0]?.sessionId ?? basename(unit.path, '.jsonl'), + const currentTailState = await readClaudeTailState(unit.path, cached.appendState.endOffset) + const tailMatches = !!( + currentTailState + && cached.appendState.tailHash + && currentTailState.tailHash === cached.appendState.tailHash ) - if (appended && cached.sessions[0]) { - const merged = buildSessionSummary( - cached.sessions[0].sessionId, + if (tailMatches) { + reportedRefresh = true + options.progress?.advance(unit.progressLabel) + addSeenDeduplicationKeysFromSessions(cached.sessions, seenMsgIds) + + const appendedLines: string[] = [] + for await (const line of readSessionLinesFromOffset(unit.path, cached.appendState.endOffset)) { + if (line.trim()) appendedLines.push(line) + } + + const appended = buildClaudeSessionSummaryFromLines( + appendedLines, unit.project, - [...cached.sessions[0].turns, ...appended.turns], + seenMsgIds, + cached.sessions[0]?.sessionId ?? basename(unit.path, '.jsonl'), ) - await writeSourceCacheEntry(manifest, { - version: SOURCE_CACHE_VERSION, - provider: 'claude', - logicalPath: unit.path, - fingerprintPath: unit.path, - cacheStrategy: 'append-jsonl', - parserVersion, - fingerprint, - sessions: [merged], - appendState: { - endOffset: fingerprint.sizeBytes, - tailHash: appendStateTailHash(merged), - }, - }) - return { session: merged, wrote: true, refreshed: true } + + if (appended && cached.sessions[0]) { + const merged = mergeClaudeAppendSession( + cached.sessions[0], + appended, + cached.appendState.lastEntryType, + ) + + if (merged) { + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'claude', + logicalPath: unit.path, + fingerprintPath: unit.path, + cacheStrategy: 'append-jsonl', + parserVersion, + fingerprint, + sessions: [merged], + appendState: await buildClaudeAppendState(unit.path, fingerprint.sizeBytes), + }) + return { session: merged, wrote: true, refreshed: true } + } + } } } @@ -512,10 +597,7 @@ async function refreshClaudeCacheUnit( parserVersion, fingerprint, sessions: [session], - appendState: { - endOffset: fingerprint.sizeBytes, - tailHash: appendStateTailHash(session), - }, + appendState: await buildClaudeAppendState(unit.path, fingerprint.sizeBytes), }) return { session, wrote: true, refreshed: true } } diff --git a/src/source-cache.ts b/src/source-cache.ts index 3dc652f..b5b4029 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -18,6 +18,7 @@ export type SourceFingerprint = { export type AppendState = { endOffset: number tailHash: string + lastEntryType?: string } export type SourceCacheEntry = { @@ -146,6 +147,7 @@ function isAppendState(value: unknown): value is AppendState { && typeof value.endOffset === 'number' && Number.isFinite(value.endOffset) && typeof value.tailHash === 'string' + && (value.lastEntryType === undefined || typeof value.lastEntryType === 'string') } function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { diff --git a/tests/parser-cache.test.ts b/tests/parser-cache.test.ts index 446d629..3ee2553 100644 --- a/tests/parser-cache.test.ts +++ b/tests/parser-cache.test.ts @@ -210,4 +210,101 @@ describe('parseAllSessions source cache', () => { const second = await parseAllSessions(undefined, 'claude') expect(second.find(project => project.project === 'demo-project')?.totalApiCalls).toBe(2) }) + + it('falls back to a full Claude reparse when cached tail verification fails', async () => { + vi.doUnmock('../src/providers/index.js') + vi.resetModules() + const { parseAllSessions } = await import('../src/parser.js') + await parseAllSessions(undefined, 'claude') + + const cacheRoot = join(root, 'cache', 'source-cache-v1') + const manifest = JSON.parse(await readFile(join(cacheRoot, 'manifest.json'), 'utf-8')) as { + entries: Record + } + const entryPath = join(cacheRoot, 'entries', manifest.entries[`claude:${claudeSessionPath}`]!.file) + const entry = JSON.parse(await readFile(entryPath, 'utf-8')) as { + appendState?: { tailHash?: string } + } + entry.appendState = { ...entry.appendState, tailHash: 'broken-tail-hash' } + await writeFile(entryPath, JSON.stringify(entry), 'utf-8') + + await appendFile(claudeSessionPath, [ + JSON.stringify({ + type: 'user', + timestamp: '2026-04-20T09:05:00.000Z', + sessionId: 'sess-1', + message: { role: 'user', content: 'second' }, + }), + JSON.stringify({ + type: 'assistant', + timestamp: '2026-04-20T09:05:01.000Z', + message: { + id: 'msg-2', + model: 'claude-sonnet-4-6', + role: 'assistant', + type: 'message', + content: [], + usage: { input_tokens: 11, output_tokens: 21 }, + }, + }), + ].join('\n') + '\n', 'utf-8') + + vi.resetModules() + const readSessionFileCalls: string[] = [] + const readSessionLinesFromOffsetCalls: Array<[string, number]> = [] + vi.doMock('../src/fs-utils.js', async () => { + const actual = await vi.importActual('../src/fs-utils.js') + return { + ...actual, + readSessionFile: vi.fn(async (filePath: string) => { + readSessionFileCalls.push(filePath) + return actual.readSessionFile(filePath) + }), + readSessionLinesFromOffset: vi.fn(async function* (filePath: string, startOffset: number) { + readSessionLinesFromOffsetCalls.push([filePath, startOffset]) + for await (const line of actual.readSessionLinesFromOffset(filePath, startOffset)) { + yield line + } + }), + } + }) + + const { parseAllSessions: reparsedParseAllSessions } = await import('../src/parser.js') + const reparsed = await reparsedParseAllSessions(undefined, 'claude') + + expect(reparsed.find(project => project.project === 'demo-project')?.totalApiCalls).toBe(2) + expect(readSessionFileCalls).toContain(claudeSessionPath) + expect(readSessionLinesFromOffsetCalls).toHaveLength(0) + }) + + it('keeps appended assistant-only Claude entries inside the existing turn', async () => { + vi.doUnmock('../src/providers/index.js') + vi.resetModules() + const { parseAllSessions } = await import('../src/parser.js') + + const first = await parseAllSessions(undefined, 'claude') + const initialSession = first.find(project => project.project === 'demo-project')?.sessions[0] + expect(initialSession?.turns).toHaveLength(1) + + await appendFile(claudeSessionPath, JSON.stringify({ + type: 'assistant', + timestamp: '2026-04-20T09:05:01.000Z', + message: { + id: 'msg-2', + model: 'claude-sonnet-4-6', + role: 'assistant', + type: 'message', + content: [], + usage: { input_tokens: 11, output_tokens: 21 }, + }, + }) + '\n', 'utf-8') + + const second = await parseAllSessions(undefined, 'claude') + const session = second.find(project => project.project === 'demo-project')?.sessions[0] + + expect(session?.apiCalls).toBe(2) + expect(session?.turns).toHaveLength(1) + expect(session?.turns[0]?.userMessage).toBe('first') + expect(session?.turns[0]?.assistantCalls).toHaveLength(2) + }) }) From 2a9daec0eac257613decb50110fe38221fdc8786 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 17:55:02 +0200 Subject: [PATCH 09/14] feat: add cache rebuild flag and progress --- src/cli.ts | 73 +++++++++++++++++++++++++----------- src/compare.tsx | 8 +++- src/dashboard.tsx | 46 +++++++++++++++++------ src/parse-progress.ts | 42 +++++++++++++++++++++ tests/parse-progress.test.ts | 25 ++++++++++++ 5 files changed, 159 insertions(+), 35 deletions(-) create mode 100644 src/parse-progress.ts create mode 100644 tests/parse-progress.test.ts diff --git a/src/cli.ts b/src/cli.ts index e23e24e..7bd91a9 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -12,6 +12,7 @@ import { aggregateProjectsIntoDays, buildPeriodDataFromDays, dateKey } from './d import { CATEGORY_LABELS, type DateRange, type ProjectSummary, type TaskCategory } from './types.js' import { renderDashboard } from './dashboard.js' import { parseDateRangeFlags } from './cli-date.js' +import { createTerminalProgressReporter } from './parse-progress.js' import { runOptimize, scanAndDetect } from './optimize.js' import { renderCompare } from './compare.js' import { getAllProviders } from './providers/index.js' @@ -120,10 +121,14 @@ function toJsonPlanSummary(planUsage: PlanUsage): JsonPlanSummary { } } -async function runJsonReport(period: Period, provider: string, project: string[], exclude: string[]): Promise { +async function runJsonReport(period: Period, provider: string, project: string[], exclude: string[], noCache = false): Promise { await loadPricing() const { range, label } = getDateRange(period) - const projects = filterProjectsByName(await parseAllSessions(range, provider), project, exclude) + const projects = filterProjectsByName( + await parseAllSessions(range, provider, { noCache, progress: null }), + project, + exclude, + ) const report: ReturnType & { plan?: JsonPlanSummary } = buildJsonReport(projects, label, period) const planUsage = await getPlanUsageOrNull() if (planUsage) { @@ -132,6 +137,17 @@ async function runJsonReport(period: Period, provider: string, project: string[] console.log(JSON.stringify(report, null, 2)) } +function noCacheRequested(opts: { cache?: boolean }): boolean { + return opts.cache === false +} + +function buildParseOptions(noCache: boolean, enableProgress: boolean) { + return { + noCache, + progress: createTerminalProgressReporter(enableProgress), + } +} + const program = new Command() .name('codeburn') .description('See where your AI coding tokens go - by task, tool, model, and project') @@ -288,8 +304,10 @@ program .option('--format ', 'Output format: tui, json', 'tui') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) + .option('--no-cache', 'Rebuild the parsed source cache for this run') .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInt, 30) .action(async (opts) => { + const noCache = noCacheRequested(opts) let customRange: DateRange | null = null try { customRange = parseDateRangeFlags(opts.from, opts.to) @@ -305,17 +323,17 @@ program if (customRange) { const label = `${opts.from ?? 'all'} to ${opts.to ?? 'today'}` const projects = filterProjectsByName( - await parseAllSessions(customRange, opts.provider), + await parseAllSessions(customRange, opts.provider, { noCache, progress: null }), opts.project, opts.exclude, ) console.log(JSON.stringify(buildJsonReport(projects, label, 'custom'), null, 2)) } else { - await runJsonReport(period, opts.provider, opts.project, opts.exclude) + await runJsonReport(period, opts.provider, opts.project, opts.exclude, noCache) } return } - await renderDashboard(period, opts.provider, opts.refresh, opts.project, opts.exclude, customRange) + await renderDashboard(period, opts.provider, opts.refresh, opts.project, opts.exclude, customRange, noCache) }) function buildPeriodData(label: string, projects: ProjectSummary[]): PeriodData { @@ -367,8 +385,11 @@ program .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) .option('--period ', 'Primary period for menubar-json: today, week, 30days, month, all', 'today') .option('--no-optimize', 'Skip optimize findings (menubar-json only, faster)') + .option('--no-cache', 'Rebuild the parsed source cache for this run') .action(async (opts) => { await loadPricing() + const noCache = noCacheRequested(opts) + const parseOptions = buildParseOptions(noCache, opts.format === 'terminal') const pf = opts.provider const fp = (p: ProjectSummary[]) => filterProjectsByName(p, opts.project, opts.exclude) if (opts.format === 'menubar-json') { @@ -403,7 +424,7 @@ program if (gapStart.getTime() <= yesterdayEnd.getTime()) { const gapRange: DateRange = { start: gapStart, end: yesterdayEnd } - const gapProjects = filterProjectsByName(await parseAllSessions(gapRange, 'all'), opts.project, opts.exclude) + const gapProjects = filterProjectsByName(await parseAllSessions(gapRange, 'all', { noCache, progress: null }), opts.project, opts.exclude) const gapDays = aggregateProjectsIntoDays(gapProjects) c = addNewDays(c, gapDays, yesterdayStr) await saveDailyCache(c) @@ -420,7 +441,7 @@ program if (isAllProviders) { const todayRange: DateRange = { start: todayStart, end: now } - const todayProjects = fp(await parseAllSessions(todayRange, 'all')) + const todayProjects = fp(await parseAllSessions(todayRange, 'all', { noCache, progress: null })) const todayDays = aggregateProjectsIntoDays(todayProjects) const rangeStartStr = toDateString(periodInfo.range.start) const rangeEndStr = toDateString(periodInfo.range.end) @@ -431,7 +452,7 @@ program scanProjects = todayProjects scanRange = todayRange } else { - const projects = fp(await parseAllSessions(periodInfo.range, pf)) + const projects = fp(await parseAllSessions(periodInfo.range, pf, { noCache, progress: null })) currentData = buildPeriodData(periodInfo.label, projects) scanProjects = projects scanRange = periodInfo.range @@ -445,7 +466,7 @@ program const providers: ProviderCost[] = [] if (isAllProviders) { const todayRangeForProviders: DateRange = { start: todayStart, end: now } - const todayDaysForProviders = aggregateProjectsIntoDays(fp(await parseAllSessions(todayRangeForProviders, 'all'))) + const todayDaysForProviders = aggregateProjectsIntoDays(fp(await parseAllSessions(todayRangeForProviders, 'all', { noCache, progress: null }))) const rangeStartStr = toDateString(periodInfo.range.start) const allDaysForProviders = [ ...getDaysInRange(cache, rangeStartStr, yesterdayStr), @@ -476,7 +497,7 @@ program // in the cache, so the filtered view shows zero tokens (heatmap/trend still works on cost). const historyStartStr = toDateString(new Date(todayStart.getTime() - BACKFILL_DAYS * MS_PER_DAY)) const allCacheDays = getDaysInRange(cache, historyStartStr, yesterdayStr) - const allTodayDaysForHistory = aggregateProjectsIntoDays(fp(await parseAllSessions({ start: todayStart, end: now }, 'all'))) + const allTodayDaysForHistory = aggregateProjectsIntoDays(fp(await parseAllSessions({ start: todayStart, end: now }, 'all', { noCache, progress: null }))) const fullHistory = [...allCacheDays, ...allTodayDaysForHistory] const dailyHistory = fullHistory.map(d => { if (isAllProviders) { @@ -521,8 +542,8 @@ program } if (opts.format === 'json') { - const todayData = buildPeriodData('today', fp(await parseAllSessions(getDateRange('today').range, pf))) - const monthData = buildPeriodData('month', fp(await parseAllSessions(getDateRange('month').range, pf))) + const todayData = buildPeriodData('today', fp(await parseAllSessions(getDateRange('today').range, pf, { noCache, progress: null }))) + const monthData = buildPeriodData('month', fp(await parseAllSessions(getDateRange('month').range, pf, { noCache, progress: null }))) const { code, rate } = getCurrency() const payload: { currency: string @@ -542,7 +563,7 @@ program return } - const monthProjects = fp(await parseAllSessions(getDateRange('month').range, pf)) + const monthProjects = fp(await parseAllSessions(getDateRange('month').range, pf, parseOptions)) console.log(renderStatusBar(monthProjects)) }) @@ -553,13 +574,15 @@ program .option('--format ', 'Output format: tui, json', 'tui') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) + .option('--no-cache', 'Rebuild the parsed source cache for this run') .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInt, 30) .action(async (opts) => { + const noCache = noCacheRequested(opts) if (opts.format === 'json') { - await runJsonReport('today', opts.provider, opts.project, opts.exclude) + await runJsonReport('today', opts.provider, opts.project, opts.exclude, noCache) return } - await renderDashboard('today', opts.provider, opts.refresh, opts.project, opts.exclude) + await renderDashboard('today', opts.provider, opts.refresh, opts.project, opts.exclude, null, noCache) }) program @@ -569,13 +592,15 @@ program .option('--format ', 'Output format: tui, json', 'tui') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) + .option('--no-cache', 'Rebuild the parsed source cache for this run') .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInt, 30) .action(async (opts) => { + const noCache = noCacheRequested(opts) if (opts.format === 'json') { - await runJsonReport('month', opts.provider, opts.project, opts.exclude) + await runJsonReport('month', opts.provider, opts.project, opts.exclude, noCache) return } - await renderDashboard('month', opts.provider, opts.refresh, opts.project, opts.exclude) + await renderDashboard('month', opts.provider, opts.refresh, opts.project, opts.exclude, null, noCache) }) program @@ -586,14 +611,16 @@ program .option('--provider ', 'Filter by provider: all, claude, codex, cursor', 'all') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) + .option('--no-cache', 'Rebuild the parsed source cache for this run') .action(async (opts) => { await loadPricing() + const parseOptions = buildParseOptions(noCacheRequested(opts), true) const pf = opts.provider const fp = (p: ProjectSummary[]) => filterProjectsByName(p, opts.project, opts.exclude) const periods: PeriodExport[] = [ - { label: 'Today', projects: fp(await parseAllSessions(getDateRange('today').range, pf)) }, - { label: '7 Days', projects: fp(await parseAllSessions(getDateRange('week').range, pf)) }, - { label: '30 Days', projects: fp(await parseAllSessions(getDateRange('30days').range, pf)) }, + { label: 'Today', projects: fp(await parseAllSessions(getDateRange('today').range, pf, parseOptions)) }, + { label: '7 Days', projects: fp(await parseAllSessions(getDateRange('week').range, pf, parseOptions)) }, + { label: '30 Days', projects: fp(await parseAllSessions(getDateRange('30days').range, pf, parseOptions)) }, ] if (periods.every(p => p.projects.length === 0)) { @@ -813,10 +840,11 @@ program .description('Find token waste and get exact fixes') .option('-p, --period ', 'Analysis period: today, week, 30days, month, all', '30days') .option('--provider ', 'Filter by provider: all, claude, codex, cursor', 'all') + .option('--no-cache', 'Rebuild the parsed source cache for this run') .action(async (opts) => { await loadPricing() const { range, label } = getDateRange(opts.period) - const projects = await parseAllSessions(range, opts.provider) + const projects = await parseAllSessions(range, opts.provider, buildParseOptions(noCacheRequested(opts), true)) await runOptimize(projects, label, range) }) @@ -825,10 +853,11 @@ program .description('Compare two AI models side-by-side') .option('-p, --period ', 'Analysis period: today, week, 30days, month, all', 'all') .option('--provider ', 'Filter by provider: all, claude, codex, cursor', 'all') + .option('--no-cache', 'Rebuild the parsed source cache for this run') .action(async (opts) => { await loadPricing() const { range } = getDateRange(opts.period) - await renderCompare(range, opts.provider) + await renderCompare(range, opts.provider, noCacheRequested(opts)) }) program.parse() diff --git a/src/compare.tsx b/src/compare.tsx index 0f1947e..f183e7b 100644 --- a/src/compare.tsx +++ b/src/compare.tsx @@ -5,6 +5,7 @@ import type { ModelStats, ComparisonRow, CategoryComparison, WorkingStyleRow } f import { aggregateModelStats, computeComparison, computeCategoryComparison, computeWorkingStyle, scanSelfCorrections } from './compare-stats.js' import { formatCost } from './format.js' import { parseAllSessions } from './parser.js' +import { createTerminalProgressReporter } from './parse-progress.js' import { getAllProviders } from './providers/index.js' import type { ProjectSummary, DateRange } from './types.js' @@ -441,14 +442,17 @@ export function CompareView({ projects, onBack }: CompareViewProps) { ) } -export async function renderCompare(range: DateRange, provider: string): Promise { +export async function renderCompare(range: DateRange, provider: string, noCache = false): Promise { const isTTY = process.stdin.isTTY && process.stdout.isTTY if (!isTTY) { process.stdout.write('Model comparison requires an interactive terminal.\n') return } - const projects = await parseAllSessions(range, provider) + const projects = await parseAllSessions(range, provider, { + noCache, + progress: createTerminalProgressReporter(true), + }) const { waitUntilExit } = render( process.exit(0)} /> ) diff --git a/src/dashboard.tsx b/src/dashboard.tsx index f84254d..2eb5ac4 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -10,6 +10,7 @@ import { getAllProviders } from './providers/index.js' import { scanAndDetect, type WasteFinding, type WasteAction, type OptimizeResult } from './optimize.js' import { estimateContextBudget, discoverProjectCwd, type ContextBudget } from './context-budget.js' import { dateKey } from './day-aggregator.js' +import { createTerminalProgressReporter } from './parse-progress.js' import { CompareView } from './compare.js' import { getPlanUsageOrNull, type PlanUsage } from './plan-usage.js' import { planDisplayName } from './plans.js' @@ -620,7 +621,7 @@ function DashboardContent({ projects, period, columns, activeProvider, budgets, ) } -function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, initialPlanUsage, refreshSeconds, projectFilter, excludeFilter }: { +function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, initialPlanUsage, refreshSeconds, projectFilter, excludeFilter, noCache }: { initialProjects: ProjectSummary[] initialPeriod: Period initialProvider: string @@ -628,6 +629,7 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, refreshSeconds?: number projectFilter?: string[] excludeFilter?: string[] + noCache?: boolean }) { const { exit } = useApp() const [period, setPeriod] = useState(initialPeriod) @@ -697,13 +699,14 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, setOptimizeResult(null) try { const range = getDateRange(p) - const data = await parseAllSessions(range, prov) + const data = filterProjectsByName( + await parseAllSessions(range, prov, { noCache: noCache ?? false, progress: null }), + projectFilter, + excludeFilter, + ) if (reloadGenerationRef.current !== generation) return - const filteredProjects = filterProjectsByName(data, projectFilter, excludeFilter) - if (reloadGenerationRef.current !== generation) return - - setProjects(filteredProjects) + setProjects(data) const usage = await getPlanUsageOrNull() if (reloadGenerationRef.current !== generation) return setPlanUsage(usage ?? undefined) @@ -714,7 +717,7 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, setLoading(false) } } - }, [projectFilter, excludeFilter]) + }, [excludeFilter, noCache, projectFilter]) useEffect(() => { if (!refreshSeconds || refreshSeconds <= 0) return @@ -799,15 +802,36 @@ function StaticDashboard({ projects, period, activeProvider, planUsage }: { proj ) } -export async function renderDashboard(period: Period = 'week', provider: string = 'all', refreshSeconds?: number, projectFilter?: string[], excludeFilter?: string[], customRange?: DateRange | null): Promise { +export async function renderDashboard( + period: Period = 'week', + provider: string = 'all', + refreshSeconds?: number, + projectFilter?: string[], + excludeFilter?: string[], + customRange?: DateRange | null, + noCache = false, +): Promise { await loadPricing() + const isTTY = process.stdin.isTTY && process.stdout.isTTY const range = customRange ?? getDateRange(period) - const filteredProjects = filterProjectsByName(await parseAllSessions(range, provider), projectFilter, excludeFilter) + const filteredProjects = filterProjectsByName( + await parseAllSessions(range, provider, { noCache, progress: createTerminalProgressReporter(isTTY) }), + projectFilter, + excludeFilter, + ) const planUsage = await getPlanUsageOrNull() - const isTTY = process.stdin.isTTY && process.stdout.isTTY if (isTTY) { const { waitUntilExit } = render( - + ) await waitUntilExit() } else { diff --git a/src/parse-progress.ts b/src/parse-progress.ts new file mode 100644 index 0000000..8a502c7 --- /dev/null +++ b/src/parse-progress.ts @@ -0,0 +1,42 @@ +import type { SourceProgressReporter } from './parser.js' + +export function createTerminalProgressReporter( + enabled: boolean, + stream: NodeJS.WriteStream = process.stderr, +): SourceProgressReporter | null { + if (!enabled || !stream.isTTY) return null + + let total = 0 + let current = 0 + let lastLineLength = 0 + let active = false + + function writeLine(line: string, done = false) { + const pad = lastLineLength > line.length ? ' '.repeat(lastLineLength - line.length) : '' + lastLineLength = Math.max(lastLineLength, line.length) + stream.write(`${line}${pad}${done ? '\n' : '\r'}`) + } + + return { + start(label: string, nextTotal: number) { + total = nextTotal + current = 0 + lastLineLength = 0 + active = nextTotal > 0 + if (active) writeLine(`${label} 0/${total}`) + }, + advance(itemLabel: string) { + if (!active) return + current += 1 + writeLine(`Updating cache ${current}/${total}${itemLabel ? ` ${itemLabel}` : ''}`) + }, + finish() { + if (!active) return + writeLine(`Updating cache ${current}/${total}`, true) + active = false + total = 0 + current = 0 + lastLineLength = 0 + }, + } +} diff --git a/tests/parse-progress.test.ts b/tests/parse-progress.test.ts new file mode 100644 index 0000000..dbdbcf5 --- /dev/null +++ b/tests/parse-progress.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it, vi } from 'vitest' + +import { createTerminalProgressReporter } from '../src/parse-progress.js' + +describe('createTerminalProgressReporter', () => { + it('renders Updating cache progress lines to stderr-compatible streams', () => { + const writes: string[] = [] + const stream = { + isTTY: true, + write: vi.fn((chunk: string) => { + writes.push(chunk) + return true + }), + } as unknown as NodeJS.WriteStream + + const reporter = createTerminalProgressReporter(true, stream) + reporter?.start('Updating cache', 2) + reporter?.advance('claude/session.jsonl') + reporter?.advance('codex/rollout.jsonl') + reporter?.finish() + + expect(writes.join('')).toContain('Updating cache') + expect(writes.join('')).toContain('2/2') + }) +}) From ff442c71f205c070997d7860659b4909c7fa62c2 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 17:55:15 +0200 Subject: [PATCH 10/14] perf: cache provider discovery metadata --- src/discovery-cache.ts | 146 ++++++++++++++++++++++++++++++++ src/providers/codex.ts | 43 ++++++++++ src/providers/copilot.ts | 37 ++++++++ src/providers/pi.ts | 26 ++++++ tests/providers/codex.test.ts | 27 +++++- tests/providers/copilot.test.ts | 24 +++++- tests/providers/pi.test.ts | 31 ++++++- 7 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 src/discovery-cache.ts diff --git a/src/discovery-cache.ts b/src/discovery-cache.ts new file mode 100644 index 0000000..39b76e4 --- /dev/null +++ b/src/discovery-cache.ts @@ -0,0 +1,146 @@ +import { createHash, randomBytes } from 'crypto' +import { existsSync } from 'fs' +import { mkdir, open, readFile, rename, unlink } from 'fs/promises' +import { homedir } from 'os' +import { dirname, join } from 'path' + +import type { SessionSource } from './providers/types.js' + +const DISCOVERY_CACHE_VERSION = 1 + +export type DiscoverySnapshotEntry = { + path: string + mtimeMs: number +} + +type DiscoveryCacheEntry = { + version: number + provider: string + scope: string + snapshot: DiscoverySnapshotEntry[] + sources: SessionSource[] +} + +function cacheRoot(): string { + const base = process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn') + return join(base, 'discovery-cache-v1') +} + +function cacheFilename(provider: string, scope: string): string { + return `${createHash('sha1').update(`${provider}:${scope}`).digest('hex')}.json` +} + +function cachePath(provider: string, scope: string): string { + return join(cacheRoot(), cacheFilename(provider, scope)) +} + +function isPlainObject(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value) +} + +function isFiniteNumber(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value) +} + +function isDiscoverySnapshotEntry(value: unknown): value is DiscoverySnapshotEntry { + return isPlainObject(value) + && typeof value.path === 'string' + && isFiniteNumber(value.mtimeMs) +} + +function isSessionSource(value: unknown): value is SessionSource { + return isPlainObject(value) + && typeof value.path === 'string' + && typeof value.project === 'string' + && typeof value.provider === 'string' + && (value.fingerprintPath === undefined || typeof value.fingerprintPath === 'string') + && (value.cacheStrategy === undefined || value.cacheStrategy === 'full-reparse' || value.cacheStrategy === 'append-jsonl') + && (value.progressLabel === undefined || typeof value.progressLabel === 'string') + && (value.parserVersion === undefined || typeof value.parserVersion === 'string') +} + +function isDiscoveryCacheEntry(value: unknown): value is DiscoveryCacheEntry { + return isPlainObject(value) + && value.version === DISCOVERY_CACHE_VERSION + && typeof value.provider === 'string' + && typeof value.scope === 'string' + && Array.isArray(value.snapshot) + && value.snapshot.every(isDiscoverySnapshotEntry) + && Array.isArray(value.sources) + && value.sources.every(isSessionSource) +} + +function normalizeSnapshot(snapshot: DiscoverySnapshotEntry[]): DiscoverySnapshotEntry[] { + return [...snapshot].sort((left, right) => left.path.localeCompare(right.path)) +} + +function snapshotsMatch(left: DiscoverySnapshotEntry[], right: DiscoverySnapshotEntry[]): boolean { + if (left.length !== right.length) return false + return left.every((entry, index) => { + const other = right[index] + return !!other && entry.path === other.path && entry.mtimeMs === other.mtimeMs + }) +} + +async function atomicWriteJson(path: string, value: unknown): Promise { + await mkdir(dirname(path), { recursive: true }) + const temp = `${path}.${randomBytes(8).toString('hex')}.tmp` + const handle = await open(temp, 'w', 0o600) + try { + await handle.writeFile(JSON.stringify(value), { encoding: 'utf-8' }) + await handle.sync() + } finally { + await handle.close() + } + + try { + await rename(temp, path) + } catch (err) { + try { + await unlink(temp) + } catch { + // ignore cleanup failures + } + throw err + } +} + +export async function loadDiscoveryCache( + provider: string, + scope: string, + snapshot: DiscoverySnapshotEntry[], +): Promise { + const path = cachePath(provider, scope) + if (!existsSync(path)) return null + + try { + const raw = await readFile(path, 'utf-8') + const parsed: unknown = JSON.parse(raw) + if (!isDiscoveryCacheEntry(parsed)) return null + if (parsed.provider !== provider || parsed.scope !== scope) return null + + const normalizedSnapshot = normalizeSnapshot(snapshot) + const cachedSnapshot = normalizeSnapshot(parsed.snapshot) + if (!snapshotsMatch(normalizedSnapshot, cachedSnapshot)) return null + + return parsed.sources + } catch { + return null + } +} + +export async function saveDiscoveryCache( + provider: string, + scope: string, + snapshot: DiscoverySnapshotEntry[], + sources: SessionSource[], +): Promise { + await mkdir(cacheRoot(), { recursive: true }) + await atomicWriteJson(cachePath(provider, scope), { + version: DISCOVERY_CACHE_VERSION, + provider, + scope, + snapshot: normalizeSnapshot(snapshot), + sources, + } satisfies DiscoveryCacheEntry) +} diff --git a/src/providers/codex.ts b/src/providers/codex.ts index 5c0ff07..9b27530 100644 --- a/src/providers/codex.ts +++ b/src/providers/codex.ts @@ -2,6 +2,7 @@ import { readdir, stat } from 'fs/promises' import { basename, join } from 'path' import { homedir } from 'os' +import { type DiscoverySnapshotEntry, loadDiscoveryCache, saveDiscoveryCache } from '../discovery-cache.js' import { readSessionFile } from '../fs-utils.js' import { calculateCost } from '../models.js' import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js' @@ -86,8 +87,49 @@ async function isValidCodexSession(filePath: string): Promise<{ valid: boolean; return { valid, meta: valid ? entry : undefined } } +async function collectCodexDiscoverySnapshot(sessionsDir: string): Promise { + const snapshot: DiscoverySnapshotEntry[] = [] + + let years: string[] + try { + years = await readdir(sessionsDir) + } catch { + return snapshot + } + + for (const year of years) { + if (!/^\d{4}$/.test(year)) continue + const yearDir = join(sessionsDir, year) + const yearStat = await stat(yearDir).catch(() => null) + if (!yearStat?.isDirectory()) continue + + const months = await readdir(yearDir).catch(() => [] as string[]) + for (const month of months) { + if (!/^\d{2}$/.test(month)) continue + const monthDir = join(yearDir, month) + const monthStat = await stat(monthDir).catch(() => null) + if (!monthStat?.isDirectory()) continue + + const days = await readdir(monthDir).catch(() => [] as string[]) + for (const day of days) { + if (!/^\d{2}$/.test(day)) continue + const dayDir = join(monthDir, day) + const dayStat = await stat(dayDir).catch(() => null) + if (!dayStat?.isDirectory()) continue + snapshot.push({ path: dayDir, mtimeMs: dayStat.mtimeMs }) + } + } + } + + return snapshot +} + async function discoverSessionsInDir(codexDir: string): Promise { const sessionsDir = join(codexDir, 'sessions') + const snapshot = await collectCodexDiscoverySnapshot(sessionsDir) + const cached = await loadDiscoveryCache('codex', sessionsDir, snapshot) + if (cached) return cached + const sources: SessionSource[] = [] let years: string[] @@ -136,6 +178,7 @@ async function discoverSessionsInDir(codexDir: string): Promise } } + await saveDiscoveryCache('codex', sessionsDir, snapshot, sources) return sources } diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index 039f844..e0a353b 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -2,6 +2,7 @@ import { readdir, stat } from 'fs/promises' import { basename, dirname, join } from 'path' import { homedir } from 'os' +import { type DiscoverySnapshotEntry, loadDiscoveryCache, saveDiscoveryCache } from '../discovery-cache.js' import { readSessionFile } from '../fs-utils.js' import { calculateCost } from '../models.js' import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js' @@ -157,7 +158,42 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars } } +async function collectCopilotDiscoverySnapshot(sessionStateDir: string): Promise { + const snapshot: DiscoverySnapshotEntry[] = [] + + let sessionDirs: string[] + try { + sessionDirs = await readdir(sessionStateDir) + } catch { + return snapshot + } + + for (const sessionId of sessionDirs) { + const sessionDir = join(sessionStateDir, sessionId) + const dirStat = await stat(sessionDir).catch(() => null) + if (!dirStat?.isDirectory()) continue + + const eventsPath = join(sessionDir, 'events.jsonl') + const eventsStat = await stat(eventsPath).catch(() => null) + if (!eventsStat?.isFile()) continue + + snapshot.push({ path: eventsPath, mtimeMs: eventsStat.mtimeMs }) + + const workspacePath = join(sessionDir, 'workspace.yaml') + const workspaceStat = await stat(workspacePath).catch(() => null) + if (workspaceStat?.isFile()) { + snapshot.push({ path: workspacePath, mtimeMs: workspaceStat.mtimeMs }) + } + } + + return snapshot +} + async function discoverSessionsInDir(sessionStateDir: string): Promise { + const snapshot = await collectCopilotDiscoverySnapshot(sessionStateDir) + const cached = await loadDiscoveryCache('copilot', sessionStateDir, snapshot) + if (cached) return cached + const sources: SessionSource[] = [] let sessionDirs: string[] @@ -190,6 +226,7 @@ async function discoverSessionsInDir(sessionStateDir: string): Promise { } } +async function collectPiDiscoverySnapshot(sessionsDir: string): Promise { + const snapshot: DiscoverySnapshotEntry[] = [] + + let projectDirs: string[] + try { + projectDirs = await readdir(sessionsDir) + } catch { + return snapshot + } + + for (const dirName of projectDirs) { + const dirPath = join(sessionsDir, dirName) + const dirStat = await stat(dirPath).catch(() => null) + if (!dirStat?.isDirectory()) continue + snapshot.push({ path: dirPath, mtimeMs: dirStat.mtimeMs }) + } + + return snapshot +} + async function discoverSessionsInDir(sessionsDir: string): Promise { + const snapshot = await collectPiDiscoverySnapshot(sessionsDir) + const cached = await loadDiscoveryCache('pi', sessionsDir, snapshot) + if (cached) return cached + const sources: SessionSource[] = [] let projectDirs: string[] @@ -112,6 +137,7 @@ async function discoverSessionsInDir(sessionsDir: string): Promise { tmpDir = await mkdtemp(join(tmpdir(), 'codex-test-')) + process.env['CODEBURN_CACHE_DIR'] = join(tmpDir, 'cache') }) afterEach(async () => { + delete process.env['CODEBURN_CACHE_DIR'] await rm(tmpDir, { recursive: true, force: true }) }) @@ -136,6 +139,28 @@ describe('codex provider - session discovery', () => { const sessions = await provider.discoverSessions() expect(sessions).toEqual([]) }) + + it('reuses cached discovery results when the directory tree is unchanged', async () => { + await writeSession(tmpDir, '2026-04-14', 'rollout-cached.jsonl', [ + sessionMeta({ cwd: '/Users/test/myproject' }), + tokenCount({ last: { input: 100, output: 50 }, total: { total: 150 } }), + ]) + + const provider = createCodexProvider(tmpDir) + const readSpy = vi.spyOn(fsUtils, 'readSessionFile') + + const first = await provider.discoverSessions() + const firstReadCount = readSpy.mock.calls.length + const second = await provider.discoverSessions() + const secondReadCount = readSpy.mock.calls.length + + expect(first).toHaveLength(1) + expect(second).toEqual(first) + expect(firstReadCount).toBeGreaterThan(0) + expect(secondReadCount).toBe(firstReadCount) + + readSpy.mockRestore() + }) }) describe('codex provider - JSONL parsing', () => { diff --git a/tests/providers/copilot.test.ts b/tests/providers/copilot.test.ts index eb1b6c5..ba02f2c 100644 --- a/tests/providers/copilot.test.ts +++ b/tests/providers/copilot.test.ts @@ -1,9 +1,10 @@ -import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest' import { mkdtemp, mkdir, writeFile, rm } from 'fs/promises' import { join } from 'path' import { tmpdir } from 'os' import { copilot, createCopilotProvider } from '../../src/providers/copilot.js' +import * as fsUtils from '../../src/fs-utils.js' import type { ParsedProviderCall } from '../../src/providers/types.js' let tmpDir: string @@ -40,9 +41,11 @@ function assistantMessage(opts: { messageId: string; outputTokens: number; tools describe('copilot provider - JSONL parsing', () => { beforeEach(async () => { tmpDir = await mkdtemp(join(tmpdir(), 'copilot-test-')) + process.env['CODEBURN_CACHE_DIR'] = join(tmpDir, 'cache') }) afterEach(async () => { + delete process.env['CODEBURN_CACHE_DIR'] await rm(tmpDir, { recursive: true, force: true }) }) @@ -219,6 +222,25 @@ describe('copilot provider - discoverSessions', () => { const sessions = await provider.discoverSessions() expect(sessions).toHaveLength(0) }) + + it('reuses cached discovery results when session directories are unchanged', async () => { + await createSessionDir('sess-disc-cached', [modelChange('gpt-4.1')], '/home/user/myapp') + + const provider = createCopilotProvider(tmpDir) + const readSpy = vi.spyOn(fsUtils, 'readSessionFile') + + const first = await provider.discoverSessions() + const firstReadCount = readSpy.mock.calls.length + const second = await provider.discoverSessions() + const secondReadCount = readSpy.mock.calls.length + + expect(first).toHaveLength(1) + expect(second).toEqual(first) + expect(firstReadCount).toBeGreaterThan(0) + expect(secondReadCount).toBe(firstReadCount) + + readSpy.mockRestore() + }) }) describe('copilot provider - metadata', () => { diff --git a/tests/providers/pi.test.ts b/tests/providers/pi.test.ts index 74f8274..64d4265 100644 --- a/tests/providers/pi.test.ts +++ b/tests/providers/pi.test.ts @@ -1,18 +1,24 @@ -import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest' import { mkdtemp, mkdir, writeFile, rm } from 'fs/promises' import { join } from 'path' import { tmpdir } from 'os' import { createPiProvider } from '../../src/providers/pi.js' +import * as fsUtils from '../../src/fs-utils.js' import type { ParsedProviderCall } from '../../src/providers/types.js' let tmpDir: string +let cacheDir: string beforeEach(async () => { tmpDir = await mkdtemp(join(tmpdir(), 'pi-test-')) + cacheDir = await mkdtemp(join(tmpdir(), 'pi-cache-')) + process.env['CODEBURN_CACHE_DIR'] = cacheDir }) afterEach(async () => { + delete process.env['CODEBURN_CACHE_DIR'] + await rm(cacheDir, { recursive: true, force: true }) await rm(tmpDir, { recursive: true, force: true }) }) @@ -146,6 +152,29 @@ describe('pi provider - session discovery', () => { const sessions = await provider.discoverSessions() expect(sessions).toEqual([]) }) + + it('reuses cached discovery results when project directories are unchanged', async () => { + const projectDir = join(tmpDir, '--Users-test-myproject--') + await writeSession(projectDir, 'cached.jsonl', [ + sessionMeta({ cwd: '/Users/test/myproject' }), + assistantMessage({}), + ]) + + const provider = createPiProvider(tmpDir) + const readSpy = vi.spyOn(fsUtils, 'readSessionFile') + + const first = await provider.discoverSessions() + const firstReadCount = readSpy.mock.calls.length + const second = await provider.discoverSessions() + const secondReadCount = readSpy.mock.calls.length + + expect(first).toHaveLength(1) + expect(second).toEqual(first) + expect(firstReadCount).toBeGreaterThan(0) + expect(secondReadCount).toBe(firstReadCount) + + readSpy.mockRestore() + }) }) describe('pi provider - JSONL parsing', () => { From 140e50b702856c8f557780b17f0dbfad70f211e2 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 17:55:26 +0200 Subject: [PATCH 11/14] test: stabilize local-date aggregation --- tests/day-aggregator.test.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/day-aggregator.test.ts b/tests/day-aggregator.test.ts index fb90840..979681b 100644 --- a/tests/day-aggregator.test.ts +++ b/tests/day-aggregator.test.ts @@ -38,6 +38,11 @@ function makeCall(timestamp: string, costUSD: number, model = 'Opus 4.7', provid } } +function localDateKey(iso: string): string { + const d = new Date(iso) + return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}` +} + describe('aggregateProjectsIntoDays', () => { it('buckets api calls by calendar date derived from timestamp', () => { const projects: ProjectSummary[] = [ @@ -130,12 +135,13 @@ describe('aggregateProjectsIntoDays', () => { }) it('counts a session under its firstTimestamp date', () => { + const firstTimestamp = '2026-04-09T23:59:00Z' const projects: ProjectSummary[] = [ makeProject({ sessions: [{ sessionId: 's1', project: 'p', - firstTimestamp: '2026-04-09T23:59:00Z', + firstTimestamp, lastTimestamp: '2026-04-10T00:10:00Z', totalCostUSD: 1, totalInputTokens: 0, totalOutputTokens: 0, totalCacheReadTokens: 0, totalCacheWriteTokens: 0, @@ -147,7 +153,7 @@ describe('aggregateProjectsIntoDays', () => { }), ] const days = aggregateProjectsIntoDays(projects) - expect(days[0]!.date).toBe('2026-04-09') + expect(days[0]!.date).toBe(localDateKey(firstTimestamp)) expect(days[0]!.sessions).toBe(1) }) From eb3737f756086aa1a9251007951fa9a08e6fce12 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 17:55:37 +0200 Subject: [PATCH 12/14] docs: document persistent cache behavior --- CHANGELOG.md | 8 ++++++++ README.md | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69da656..8d9ee9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## Unreleased +### Added +- **Persistent parse cache for all providers.** Repeated CLI runs now reuse parsed source summaries across fresh processes instead of reparsing raw logs every time. +- **`--no-cache` on parse-backed commands.** `report`, `today`, `month`, `status`, `export`, `optimize`, and `compare` can bypass cached entries for that run and rebuild them from raw logs. +- **`Updating cache` stderr progress.** Non-JSON cold or partial cache rebuilds now show progress while CodeBurn refreshes changed sources. + +### Changed +- **Cursor now uses the shared parse cache.** The provider-specific Cursor cache path is gone; SQLite-backed provider data now flows through the same persistent cache layer as the other providers. + ## 0.8.0 - 2026-04-19 ### Added diff --git a/README.md b/README.md index 29fa180..50965f9 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,25 @@ codeburn today --format json | jq '.overview.cost' For the lighter `status --format json` (today + month totals only) or file-based exports (`export -f json`), see above. +## Cache behavior + +CodeBurn now keeps a persistent parse cache under `~/.cache/codeburn/source-cache-v1/`. +It applies to every provider. Unchanged sources load from cache across fresh CLI runs, +while changed sources are refreshed on demand so rolling windows like `today` stay current +as new log entries land. + +Use `--no-cache` on any command that reads session data to ignore cached entries for that +run and rebuild them from raw logs: + +```bash +codeburn today --no-cache +codeburn report --period all --no-cache +codeburn export --no-cache +``` + +When a non-JSON command needs to rebuild part of the cache, CodeBurn shows an +`Updating cache` progress bar on stderr. JSON output stays clean on stdout. + ## Providers CodeBurn auto-detects which AI coding tools you use. If multiple providers have session data on disk, press `p` in the dashboard to toggle between them. From 563f9c4f1b4ba308f2395ae78805b5879bf38a62 Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 18:51:23 +0200 Subject: [PATCH 13/14] refactor: share provider presentation metadata --- src/dashboard.tsx | 24 ++---------------------- src/provider-colors.ts | 27 +++++++++++++++++++++++++++ tests/provider-colors.test.ts | 29 +++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 22 deletions(-) create mode 100644 src/provider-colors.ts create mode 100644 tests/provider-colors.test.ts diff --git a/src/dashboard.tsx b/src/dashboard.tsx index 2eb5ac4..3fe3593 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -14,6 +14,7 @@ import { createTerminalProgressReporter } from './parse-progress.js' import { CompareView } from './compare.js' import { getPlanUsageOrNull, type PlanUsage } from './plan-usage.js' import { planDisplayName } from './plans.js' +import { providerColor, providerLabel } from './provider-colors.js' import { join } from 'path' type Period = 'today' | 'week' | '30days' | 'month' | 'all' @@ -56,15 +57,6 @@ const PANEL_COLORS = { bash: '#F5A05B', } -const PROVIDER_COLORS: Record = { - claude: '#FF8C42', - codex: '#5BF5A0', - cursor: '#00B4D8', - opencode: '#A78BFA', - pi: '#F472B6', - all: '#FF8C42', -} - const CATEGORY_COLORS: Record = { coding: '#5B9EF5', debugging: '#F55B5B', @@ -490,16 +482,6 @@ function BashBreakdown({ projects, pw, bw }: { projects: ProjectSummary[]; pw: n ) } -const PROVIDER_DISPLAY_NAMES: Record = { - all: 'All', - claude: 'Claude', - codex: 'Codex', - cursor: 'Cursor', - opencode: 'OpenCode', - pi: 'Pi', -} -function getProviderDisplayName(name: string): string { return PROVIDER_DISPLAY_NAMES[name] ?? name } - function PeriodTabs({ active, providerName, showProvider }: { active: Period; providerName?: string; showProvider?: boolean }) { return ( @@ -510,9 +492,7 @@ function PeriodTabs({ active, providerName, showProvider }: { active: Period; pr ))} - {showProvider && providerName && ( - | [p] {getProviderDisplayName(providerName)} - )} + {showProvider && providerName && | [p] {providerLabel(providerName)}} ) } diff --git a/src/provider-colors.ts b/src/provider-colors.ts new file mode 100644 index 0000000..6f830a2 --- /dev/null +++ b/src/provider-colors.ts @@ -0,0 +1,27 @@ +export const PROVIDER_COLORS: Record = { + all: '#FF8C42', + claude: '#FF8C42', + codex: '#5BF5A0', + cursor: '#00B4D8', + opencode: '#A78BFA', + pi: '#F472B6', + copilot: '#6495ED', +} + +const PROVIDER_LABELS: Record = { + all: 'All', + claude: 'Claude', + codex: 'Codex', + cursor: 'Cursor', + opencode: 'OpenCode', + pi: 'Pi', + copilot: 'Copilot', +} + +export function providerLabel(name: string): string { + return PROVIDER_LABELS[name] ?? name +} + +export function providerColor(name: string): string { + return PROVIDER_COLORS[name] ?? '#CCCCCC' +} diff --git a/tests/provider-colors.test.ts b/tests/provider-colors.test.ts new file mode 100644 index 0000000..d1e02b7 --- /dev/null +++ b/tests/provider-colors.test.ts @@ -0,0 +1,29 @@ +import { describe, it, expect } from 'vitest' + +import { PROVIDER_COLORS, providerColor, providerLabel } from '../src/provider-colors.js' + +describe('provider presentation metadata', () => { + it('exports the shared provider palette', () => { + expect(PROVIDER_COLORS).toEqual({ + all: '#FF8C42', + claude: '#FF8C42', + codex: '#5BF5A0', + cursor: '#00B4D8', + opencode: '#A78BFA', + pi: '#F472B6', + copilot: '#6495ED', + }) + }) + + it('maps provider names to labels', () => { + expect(providerLabel('all')).toBe('All') + expect(providerLabel('opencode')).toBe('OpenCode') + expect(providerLabel('unknown')).toBe('unknown') + }) + + it('maps provider names to colors with a neutral fallback', () => { + expect(providerColor('all')).toBe('#FF8C42') + expect(providerColor('opencode')).toBe('#A78BFA') + expect(providerColor('unknown')).toBe('#CCCCCC') + }) +}) From 7594fa02546716501b2535ad5fab6146c2b83e3b Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 23:52:30 +0200 Subject: [PATCH 14/14] feat: optimize parse caching across providers --- bin/codeburn | 20 + package.json | 5 +- src/dashboard.tsx | 190 +++++++-- src/discovery-cache.ts | 60 ++- src/parse-progress.ts | 63 ++- src/parser.ts | 803 +++++++++++++++++++++++++---------- src/source-cache.ts | 153 ++++++- tests/parse-progress.test.ts | 68 ++- tests/parser-cache.test.ts | 181 +++++++- 9 files changed, 1261 insertions(+), 282 deletions(-) create mode 100755 bin/codeburn diff --git a/bin/codeburn b/bin/codeburn new file mode 100755 index 0000000..3d3f76d --- /dev/null +++ b/bin/codeburn @@ -0,0 +1,20 @@ +#!/usr/bin/env node + +import { homedir } from "node:os"; + +try { + process.cwd(); +} catch (error) { + if ( + error && + typeof error === "object" && + "code" in error && + (error).code === "ENOENT" + ) { + process.chdir(homedir()); + } else { + throw error; + } +} + +await import("../dist/cli.js"); diff --git a/package.json b/package.json index f08fa97..a3b6df1 100644 --- a/package.json +++ b/package.json @@ -5,10 +5,11 @@ "type": "module", "main": "./dist/cli.js", "bin": { - "codeburn": "dist/cli.js" + "codeburn": "bin/codeburn" }, "files": [ - "dist" + "dist", + "bin" ], "scripts": { "build": "tsup", diff --git a/src/dashboard.tsx b/src/dashboard.tsx index 3fe3593..0f54eba 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -4,7 +4,7 @@ import React, { useState, useCallback, useEffect, useRef } from 'react' import { render, Box, Text, useInput, useApp, useWindowSize } from 'ink' import { CATEGORY_LABELS, type DateRange, type ProjectSummary, type TaskCategory } from './types.js' import { formatCost, formatTokens } from './format.js' -import { parseAllSessions, filterProjectsByName } from './parser.js' +import { parseAllSessions, filterProjectsByDateRange, filterProjectsByName } from './parser.js' import { loadPricing } from './models.js' import { getAllProviders } from './providers/index.js' import { scanAndDetect, type WasteFinding, type WasteAction, type OptimizeResult } from './optimize.js' @@ -20,6 +20,15 @@ import { join } from 'path' type Period = 'today' | 'week' | '30days' | 'month' | 'all' type View = 'dashboard' | 'optimize' | 'compare' +type CachedWindow = { + period: Period + range: { + start: Date + end: Date + } + projects: ProjectSummary[] +} + const PERIODS: Period[] = ['today', 'week', '30days', 'month', 'all'] const PERIOD_LABELS: Record = { today: 'Today', @@ -108,6 +117,10 @@ function getDateRange(period: Period): { start: Date; end: Date } { } } +function rangeCovers(outer: { start: Date; end: Date }, inner: { start: Date; end: Date }): boolean { + return outer.start <= inner.start && outer.end >= inner.end +} + type Layout = { dashWidth: number; wide: boolean; halfWidth: number; barWidth: number } function getLayout(columns?: number): Layout { @@ -630,9 +643,156 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, ).size const compareAvailable = modelCount >= 2 const debounceRef = useRef | null>(null) - const reloadGenerationRef = useRef(0) + const cacheByProviderRef = useRef(new Map()) + const reloadSeqRef = useRef(0) + const preloadingRef = useRef(new Map>()) const findingCount = optimizeResult?.findings.length ?? 0 + const providerCacheKey = useCallback((provider: string) => `${provider}:${noCache ? 'nocache' : 'cache'}`, [noCache]) + const getRangeWidth = useCallback((range: { start: Date; end: Date }) => range.end.getTime() - range.start.getTime(), []) + const makeCacheToken = useCallback((provider: string, period: Period) => `${providerCacheKey(provider)}:${period}`, [providerCacheKey]) + + const storeCachedWindow = useCallback((provider: string, period: Period, range: { start: Date; end: Date }, projects: ProjectSummary[]) => { + if (noCache) return + const key = providerCacheKey(provider) + const windows = cacheByProviderRef.current.get(key) ?? [] + const normalizedRange = { start: new Date(range.start), end: new Date(range.end) } + const existing = windows.findIndex( + existing => existing.period === period && existing.range.start.getTime() === normalizedRange.start.getTime() && existing.range.end.getTime() === normalizedRange.end.getTime(), + ) + if (existing >= 0) windows.splice(existing, 1) + windows.push({ period, range: normalizedRange, projects }) + windows.sort((a, b) => a.range.start.getTime() - b.range.start.getTime()) + cacheByProviderRef.current.set(key, windows) + }, [noCache, providerCacheKey]) + + const findCachedWindow = useCallback((provider: string, range: { start: Date; end: Date }) => { + const candidates = cacheByProviderRef.current.get(providerCacheKey(provider)) ?? [] + let best: CachedWindow | undefined + for (const candidate of candidates) { + if (!rangeCovers(candidate.range, range)) continue + if (!best) { best = candidate; continue } + if (getRangeWidth(candidate.range) < getRangeWidth(best.range)) { + best = candidate + } else if (candidate.period !== best.period && getRangeWidth(candidate.range) === getRangeWidth(best.range) && candidate.range.start > best.range.start) { + best = candidate + } + } + return best + }, [getRangeWidth, providerCacheKey]) + + const preloadWindow = useCallback(async (periodToLoad: Period, provider: string) => { + if (noCache) return + const preloadKey = makeCacheToken(provider, periodToLoad) + const range = getDateRange(periodToLoad) + const cached = findCachedWindow(provider, range) + if (cached) return + const inFlight = preloadingRef.current.get(preloadKey) + if (inFlight) return + + const promise = (async () => { + const projects = await parseAllSessions(range, provider, { noCache, progress: null }) + if (!noCache) { + storeCachedWindow(provider, periodToLoad, range, projects) + } + return projects + })() + + preloadingRef.current.set(preloadKey, promise) + try { + await promise + } finally { + preloadingRef.current.delete(preloadKey) + } + }, [findCachedWindow, makeCacheToken, noCache, storeCachedWindow]) + + const reloadData = useCallback(async (p: Period, prov: string, options?: { silent?: boolean }) => { + const range = getDateRange(p) + const request = ++reloadSeqRef.current + const token = makeCacheToken(prov, p) + const cachedWindow = findCachedWindow(prov, range) + if (!options?.silent) { + setOptimizeResult(null) + } + + if (cachedWindow) { + const projectsFromCache = filterProjectsByName( + filterProjectsByDateRange(cachedWindow.projects, range), + projectFilter, + excludeFilter, + ) + if (!options?.silent && request === reloadSeqRef.current) { + setProjects(projectsFromCache) + } + if (!options?.silent) { + const usage = await getPlanUsageOrNull() + if (request !== reloadSeqRef.current) return + setPlanUsage(usage ?? undefined) + } + return + } + + const inFlight = preloadingRef.current.get(token) + if (inFlight) { + if (!options?.silent) setLoading(true) + try { + const projects = await inFlight + if (!noCache) { + storeCachedWindow(prov, p, range, projects) + } + if (request !== reloadSeqRef.current) return + const filtered = filterProjectsByName(projects, projectFilter, excludeFilter) + if (!options?.silent) { + setProjects(filtered) + } + } finally { + if (!options?.silent && request === reloadSeqRef.current) setLoading(false) + } + if (!options?.silent) { + const usage = await getPlanUsageOrNull() + if (request !== reloadSeqRef.current) return + setPlanUsage(usage ?? undefined) + } + return + } + + if (!options?.silent) setLoading(true) + try { + const projects = await parseAllSessions(range, prov, { noCache, progress: null }) + if (!noCache) { + storeCachedWindow(prov, p, range, projects) + } + if (request !== reloadSeqRef.current) return + const filtered = filterProjectsByName(projects, projectFilter, excludeFilter) + if (!options?.silent) { + setProjects(filtered) + } + } finally { + if (!options?.silent && request === reloadSeqRef.current) setLoading(false) + } + if (!options?.silent) { + const usage = await getPlanUsageOrNull() + if (request !== reloadSeqRef.current) return + setPlanUsage(usage ?? undefined) + } + }, [excludeFilter, findCachedWindow, getPlanUsageOrNull, noCache, projectFilter, storeCachedWindow]) + + useEffect(() => { + if (noCache) return + const initialRange = getDateRange(initialPeriod) + const initialKey = providerCacheKey(initialProvider) + const existing = cacheByProviderRef.current.get(initialKey) ?? [] + const alreadyCached = existing.some(entry => rangeCovers(entry.range, initialRange)) + if (!alreadyCached) { + storeCachedWindow(initialProvider, initialPeriod, initialRange, initialProjects) + } + }, [initialPeriod, initialProvider, initialProjects, noCache, providerCacheKey, storeCachedWindow]) + + useEffect(() => { + if (noCache || period === '30days') return + void preloadWindow('30days', activeProvider) + }, [noCache, period, activeProvider, preloadWindow]) + useEffect(() => { let cancelled = false async function detect() { @@ -673,32 +833,6 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, return () => { cancelled = true } }, [projects, period, optimizeAvailable]) - const reloadData = useCallback(async (p: Period, prov: string) => { - const generation = ++reloadGenerationRef.current - setLoading(true) - setOptimizeResult(null) - try { - const range = getDateRange(p) - const data = filterProjectsByName( - await parseAllSessions(range, prov, { noCache: noCache ?? false, progress: null }), - projectFilter, - excludeFilter, - ) - if (reloadGenerationRef.current !== generation) return - - setProjects(data) - const usage = await getPlanUsageOrNull() - if (reloadGenerationRef.current !== generation) return - setPlanUsage(usage ?? undefined) - } catch (error) { - console.error(error) - } finally { - if (reloadGenerationRef.current === generation) { - setLoading(false) - } - } - }, [excludeFilter, noCache, projectFilter]) - useEffect(() => { if (!refreshSeconds || refreshSeconds <= 0) return const id = setInterval(() => { reloadData(period, activeProvider) }, refreshSeconds * 1000) diff --git a/src/discovery-cache.ts b/src/discovery-cache.ts index 39b76e4..fcd9e4e 100644 --- a/src/discovery-cache.ts +++ b/src/discovery-cache.ts @@ -8,12 +8,21 @@ import type { SessionSource } from './providers/types.js' const DISCOVERY_CACHE_VERSION = 1 +const DISCOVERY_DIRECTORY_MARKER_PREFIX = '__dir__:' + +function traceDiscoveryCacheRead(op: string, filePath: string, note?: string): void { + if (process.env['CODEBURN_FILE_TRACE'] !== '1') return + const suffix = note ? ` ${note}` : '' + process.stderr.write(`codeburn-trace discovery ${op} ${filePath}${suffix}\n`) +} + export type DiscoverySnapshotEntry = { path: string mtimeMs: number + dirSignature?: string } -type DiscoveryCacheEntry = { +export type DiscoveryCacheEntry = { version: number provider: string scope: string @@ -78,10 +87,46 @@ function snapshotsMatch(left: DiscoverySnapshotEntry[], right: DiscoverySnapshot if (left.length !== right.length) return false return left.every((entry, index) => { const other = right[index] - return !!other && entry.path === other.path && entry.mtimeMs === other.mtimeMs + return !!other + && entry.path === other.path + && entry.mtimeMs === other.mtimeMs + && entry.dirSignature === other.dirSignature }) } +function makeDirectoryMarker(path: string, dirSignature?: string): DiscoverySnapshotEntry { + return { + path: `${DISCOVERY_DIRECTORY_MARKER_PREFIX}${path}`, + mtimeMs: 0, + dirSignature, + } +} + +export function isDiscoveryDirectoryMarker(path: string): boolean { + return path.startsWith(DISCOVERY_DIRECTORY_MARKER_PREFIX) +} + +export function directoryPathFromMarker(markerPath: string): string | null { + return markerPath.startsWith(DISCOVERY_DIRECTORY_MARKER_PREFIX) + ? markerPath.slice(DISCOVERY_DIRECTORY_MARKER_PREFIX.length) + : null +} + +async function loadDiscoveryCacheEntry(provider: string, scope: string): Promise { + const path = cachePath(provider, scope) + if (!existsSync(path)) return null + traceDiscoveryCacheRead('entry:read', path, `provider=${provider} scope=${scope}`) + + try { + const raw = await readFile(path, 'utf-8') + const parsed: unknown = JSON.parse(raw) + if (!isDiscoveryCacheEntry(parsed) || parsed.provider !== provider || parsed.scope !== scope) return null + return parsed + } catch { + return null + } +} + async function atomicWriteJson(path: string, value: unknown): Promise { await mkdir(dirname(path), { recursive: true }) const temp = `${path}.${randomBytes(8).toString('hex')}.tmp` @@ -129,6 +174,13 @@ export async function loadDiscoveryCache( } } +export async function loadDiscoveryCacheEntryUnchecked( + provider: string, + scope: string, +): Promise { + return loadDiscoveryCacheEntry(provider, scope) +} + export async function saveDiscoveryCache( provider: string, scope: string, @@ -144,3 +196,7 @@ export async function saveDiscoveryCache( sources, } satisfies DiscoveryCacheEntry) } + +export function discoveryDirectoryMarker(prefixPath: string, dirSignature?: string): DiscoverySnapshotEntry { + return makeDirectoryMarker(prefixPath, dirSignature) +} diff --git a/src/parse-progress.ts b/src/parse-progress.ts index 8a502c7..3acb068 100644 --- a/src/parse-progress.ts +++ b/src/parse-progress.ts @@ -1,4 +1,28 @@ +import { Chalk } from 'chalk' +import { stripVTControlCharacters } from 'node:util' + import type { SourceProgressReporter } from './parser.js' +import { providerColor, providerLabel } from './provider-colors.js' + +function getBarWidth(columns: number | undefined): number { + if (!columns || columns >= 80) return 16 + if (columns >= 56) return 12 + return 8 +} + +function renderBar(current: number, total: number, width: number): { filled: number; empty: number } { + if (total <= 0) return { filled: 0, empty: width } + + const filled = Math.max(0, Math.min(width, Math.round((current / total) * width))) + return { filled, empty: Math.max(0, width - filled) } +} + +function mapChalkLevel(colorDepth: number): 0 | 1 | 2 | 3 { + if (colorDepth >= 24) return 3 + if (colorDepth >= 8) return 2 + if (colorDepth >= 1) return 1 + return 0 +} export function createTerminalProgressReporter( enabled: boolean, @@ -8,34 +32,53 @@ export function createTerminalProgressReporter( let total = 0 let current = 0 + let lastProvider = 'all' let lastLineLength = 0 let active = false + const colorDepth = typeof stream.getColorDepth === 'function' ? stream.getColorDepth() : 0 + const chalk = new Chalk({ level: mapChalkLevel(colorDepth) }) - function writeLine(line: string, done = false) { - const pad = lastLineLength > line.length ? ' '.repeat(lastLineLength - line.length) : '' - lastLineLength = Math.max(lastLineLength, line.length) - stream.write(`${line}${pad}${done ? '\n' : '\r'}`) + function buildFrame(provider: string, done = false): string { + const columns = 'columns' in stream ? (stream as NodeJS.WriteStream & { columns?: number }).columns : process.stderr.columns + const width = getBarWidth(columns) + const label = providerLabel(provider) + const { filled, empty } = renderBar(current, total, width) + const accent = providerColor(provider) + const line = [ + chalk.dim('Updating'), + chalk.bold.hex(accent)(label), + chalk.dim('cache'), + `[${chalk.hex(accent)('█'.repeat(filled))}${chalk.hex('#666666')('░'.repeat(empty))}]`, + `${current}/${total}`, + ].join(' ') + const visible = stripVTControlCharacters(line) + const pad = lastLineLength > visible.length ? ' '.repeat(lastLineLength - visible.length) : '' + lastLineLength = Math.max(lastLineLength, visible.length) + return `${line}${pad}${done ? '\n' : '\r'}` } return { - start(label: string, nextTotal: number) { + start(nextTotal: number) { total = nextTotal current = 0 + lastProvider = 'all' lastLineLength = 0 active = nextTotal > 0 - if (active) writeLine(`${label} 0/${total}`) }, - advance(itemLabel: string) { + advance(provider: string) { if (!active) return + lastProvider = provider current += 1 - writeLine(`Updating cache ${current}/${total}${itemLabel ? ` ${itemLabel}` : ''}`) + stream.write(buildFrame(provider)) }, - finish() { + finish(provider?: string) { if (!active) return - writeLine(`Updating cache ${current}/${total}`, true) + if (current === 0) return + stream.write(buildFrame(provider ?? lastProvider, true)) active = false total = 0 current = 0 + lastProvider = 'all' lastLineLength = 0 }, } diff --git a/src/parser.ts b/src/parser.ts index c7d022b..e39872e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,16 +1,20 @@ import { createHash } from 'crypto' import { open, readdir, stat } from 'fs/promises' import { basename, join } from 'path' +import { directoryPathFromMarker, discoveryDirectoryMarker, isDiscoveryDirectoryMarker, loadDiscoveryCacheEntryUnchecked, saveDiscoveryCache, type DiscoverySnapshotEntry } from './discovery-cache.js' import { readSessionFile, readSessionLinesFromOffset } from './fs-utils.js' import { calculateCost, getShortModelName } from './models.js' import { discoverAllSessions, getProvider } from './providers/index.js' import type { ParsedProviderCall, Provider, SessionSource } from './providers/types.js' import { computeFileFingerprint, + getManifestEntry, + isManifestDateRangeOverlap, loadSourceCacheManifest, readSourceCacheEntry, saveSourceCacheManifest, SOURCE_CACHE_VERSION, + type SourceCacheManifestEntry, writeSourceCacheEntry, } from './source-cache.js' import type { @@ -270,9 +274,9 @@ function buildSessionSummary( } export type SourceProgressReporter = { - start(label: string, total: number): void - advance(itemLabel: string): void - finish(): void + start(total: number): void + advance(provider: string): void + finish(provider?: string): void } export type ParseOptions = { @@ -280,6 +284,25 @@ export type ParseOptions = { progress?: SourceProgressReporter | null } +function wrapProgressReporter(progress?: SourceProgressReporter | null): SourceProgressReporter | null { + if (!progress) return null + + let lastProvider: string | undefined + + return { + start(total: number) { + progress.start(total) + }, + advance(provider: string) { + lastProvider = provider + progress.advance(provider) + }, + finish(provider?: string) { + progress.finish(provider ?? lastProvider) + }, + } +} + function addSessionToProjectMap(projectMap: Map, session: SessionSummary) { if (session.apiCalls === 0) return const existing = projectMap.get(session.project) ?? [] @@ -404,36 +427,47 @@ async function collectJsonlFiles(dirPath: string): Promise { return jsonlFiles } +const CLAUDE_TAIL_WINDOW_BYTES = 16 * 1024 +const CLAUDE_PARSER_VERSION = 'claude:v1' +const DEBUG_CACHE = process.env['CODEBURN_CACHE_DEBUG'] === '1' + +type SourceCacheRefreshReason = 'missing-entry' | 'parser-version' | 'fingerprint-miss' | 'range-miss' + +type SourceManifestAction = 'skip' | 'refresh' | 'use-cache' + +type SourceManifestState = { + source: SessionSource + parserVersion: string + manifestEntry: SourceCacheManifestEntry | null + action: SourceManifestAction + reason?: SourceCacheRefreshReason + currentFingerprint?: { mtimeMs: number; sizeBytes: number } + appendOnly?: boolean +} + type ClaudeCacheUnit = { path: string project: string progressLabel: string } -type ClaudeTailState = { - tailHash: string - lastEntryType?: string +type ClaudeCacheDiscovery = { + units: ClaudeCacheUnit[] + snapshot: DiscoverySnapshotEntry[] } -const CLAUDE_TAIL_WINDOW_BYTES = 16 * 1024 +type PlannedClaudeRefresh = SourceManifestState & { unit: ClaudeCacheUnit } -async function listClaudeCacheUnits(dirPath: string, dirName: string): Promise { - const jsonlFiles = await collectJsonlFiles(dirPath) - return jsonlFiles.map(filePath => ({ - path: filePath, - project: dirName, - progressLabel: filePath.split(/[\\/]/).slice(-2).join('/'), - })) +function logCacheDebug(provider: string, path: string, reason: SourceCacheRefreshReason): void { + if (!DEBUG_CACHE) return + process.stderr.write(`codeburn cache refresh [${provider}] ${path} (${reason})\n`) } -function fingerprintsMatch( - left: { mtimeMs: number; sizeBytes: number }, - right: { mtimeMs: number; sizeBytes: number }, -): boolean { +function fingerprintMatches(left: { mtimeMs: number; sizeBytes: number }, right: { mtimeMs: number; sizeBytes: number }): boolean { return left.mtimeMs === right.mtimeMs && left.sizeBytes === right.sizeBytes } -async function readClaudeTailState(filePath: string, endOffset: number): Promise { +async function readClaudeTailState(filePath: string, endOffset: number): Promise<{ tailHash: string; lastEntryType?: string } | null> { const start = Math.max(0, endOffset - CLAUDE_TAIL_WINDOW_BYTES) const length = Math.max(0, endOffset - start) if (length === 0) return null @@ -498,95 +532,318 @@ function mergeClaudeAppendSession( appendedTurns.shift() } - return buildSessionSummary( - cachedSession.sessionId, - cachedSession.project, - [...mergedTurns, ...appendedTurns], - ) + return buildSessionSummary( + cachedSession.sessionId, + cachedSession.project, + [...mergedTurns, ...appendedTurns], + ) + } + +async function isDirectoryMarkerUnchanged(cachedSnapshot: DiscoverySnapshotEntry[]): Promise { + for (const entry of cachedSnapshot) { + if (!isDiscoveryDirectoryMarker(entry.path)) continue + const path = directoryPathFromMarker(entry.path) + if (!path) return false + const markerStat = await stat(path).catch(() => null) + if (!markerStat || markerStat.mtimeMs !== entry.mtimeMs) return false + if (entry.dirSignature !== undefined) { + const entries = await readdir(path).catch(() => []) + const actualSignature = createHash('sha256').update(entries.sort().join('\n')).digest('hex') + if (actualSignature !== entry.dirSignature) return false + } + } + return true +} + +async function collectJsonlFilesWithSnapshot(dirPath: string): Promise { + const entries = await readdir(dirPath).catch(() => []) + const units: ClaudeCacheUnit[] = [] + const filePaths = new Set() + const snapshot: DiscoverySnapshotEntry[] = [] + const markerPaths = new Set([dirPath]) + + for (const entry of entries) { + if (entry.endsWith('.jsonl')) { + const filePath = join(dirPath, entry) + filePaths.add(filePath) + const fileStat = await stat(filePath).catch(() => null) + if (fileStat) snapshot.push({ path: filePath, mtimeMs: fileStat.mtimeMs }) + continue + } + + const subagentsPath = join(dirPath, entry, 'subagents') + const subFiles = await readdir(subagentsPath).catch(() => []) + if (subFiles.length > 0) markerPaths.add(subagentsPath) + for (const sf of subFiles) { + if (!sf.endsWith('.jsonl')) continue + const filePath = join(subagentsPath, sf) + filePaths.add(filePath) + const fileStat = await stat(filePath).catch(() => null) + if (fileStat) snapshot.push({ path: filePath, mtimeMs: fileStat.mtimeMs }) + } + } + + for (const markerPath of markerPaths) { + const markerStat = await stat(markerPath).catch(() => null) + if (markerStat) { + const entries = await readdir(markerPath).catch(() => []) + const dirSignature = createHash('sha256').update(entries.sort().join('\n')).digest('hex') + snapshot.push({ + ...discoveryDirectoryMarker(markerPath, dirSignature), + mtimeMs: markerStat.mtimeMs, + }) + } + } + + const discoveredUnits = [...filePaths].map(filePath => ({ + path: filePath, + project: basename(dirPath), + progressLabel: filePath.split(/[\\/]/).slice(-2).join('/'), + })) + + return { units: discoveredUnits, snapshot } +} + +async function listClaudeCacheUnitsFromCache(source: SessionSource): Promise { + const cached = await loadDiscoveryCacheEntryUnchecked('claude', source.path) + if (cached) { + const valid = await isDirectoryMarkerUnchanged(cached.snapshot) + if (valid) { + const units = cached.sources + .filter(candidate => candidate.provider === 'claude') + .map(candidate => ({ + path: candidate.path, + project: candidate.project, + progressLabel: candidate.progressLabel + ?? candidate.path.split(/[\\/]/).slice(-2).join('/'), + })) + if (units.length > 0) return { units, snapshot: cached.snapshot } + } + } + + const discovery = await collectJsonlFilesWithSnapshot(source.path) + const sources: SessionSource[] = discovery.units.map(unit => ({ + path: unit.path, + provider: 'claude', + project: source.project, + progressLabel: unit.progressLabel, + })) + await saveDiscoveryCache('claude', source.path, discovery.snapshot, sources) + return discovery +} + +function isRefreshReason(reason?: SourceCacheRefreshReason): reason is SourceCacheRefreshReason { + return !!reason +} + +async function evaluateSourceManifestState( + manifest: Awaited>, + source: SessionSource, + parserVersion: string, + dateRange: DateRange | undefined, + options: ParseOptions, + shouldAllowAppend: boolean, +): Promise { + const fingerprintPath = source.fingerprintPath ?? source.path + const manifestEntry = getManifestEntry(manifest, source.provider, source.path) + + if (options.noCache) { + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'missing-entry' } + if (isRefreshReason(state.reason)) logCacheDebug(source.provider, source.path, state.reason) + return state + } + + if (!manifestEntry) { + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'missing-entry' } + logCacheDebug(source.provider, source.path, state.reason) + return state + } + + if (manifestEntry.lastSeenParserVersion !== parserVersion) { + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'parser-version' } + logCacheDebug(source.provider, source.path, state.reason) + return state + } + + if (source.cacheStrategy && manifestEntry.cacheStrategy && source.cacheStrategy !== manifestEntry.cacheStrategy) { + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'parser-version' } + logCacheDebug(source.provider, source.path, state.reason) + return state + } + + const overlap = isManifestDateRangeOverlap(manifestEntry, dateRange) + if (overlap === false) { + return { source, parserVersion, manifestEntry, action: 'skip', reason: 'range-miss' } + } + + if (!manifestEntry.fingerprint || manifestEntry.fingerprintPath !== fingerprintPath) { + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'fingerprint-miss' } + logCacheDebug(source.provider, source.path, state.reason) + return state + } + + const currentFingerprint = await computeFileFingerprint(fingerprintPath).catch(() => null) + if (!currentFingerprint) { + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'fingerprint-miss' } + logCacheDebug(source.provider, source.path, state.reason) + return state + } + + if (fingerprintMatches(currentFingerprint, manifestEntry.fingerprint)) { + return { source, parserVersion, manifestEntry, action: 'use-cache', currentFingerprint } + } + + if (shouldAllowAppend && manifestEntry.cacheStrategy === 'append-jsonl' && manifestEntry.appendState && manifestEntry.fingerprint) { + const sizeDelta = currentFingerprint.sizeBytes - manifestEntry.fingerprint.sizeBytes + if (sizeDelta >= 0) { + const tailState = await readClaudeTailState(fingerprintPath, manifestEntry.appendState.endOffset) + const tailMatches = !!( + tailState + && manifestEntry.appendState.tailHash + && tailState.tailHash === manifestEntry.appendState.tailHash + ) + if (tailMatches) { + if (sizeDelta === 0) { + return { source, parserVersion, manifestEntry, action: 'use-cache', currentFingerprint, appendOnly: false } + } + return { + source, + parserVersion, + manifestEntry, + action: 'refresh', + reason: 'fingerprint-miss', + currentFingerprint, + appendOnly: true, + } + } + } + } + + const state: SourceManifestState = { source, parserVersion, manifestEntry, action: 'refresh', reason: 'fingerprint-miss', currentFingerprint } + logCacheDebug(source.provider, source.path, state.reason) + return state +} + +async function planClaudeRefreshes( + manifest: Awaited>, + units: ClaudeCacheUnit[], + dateRange: DateRange | undefined, + options: ParseOptions, +): Promise { + return Promise.all(units.map(async unit => { + const plan = await evaluateSourceManifestState( + manifest, + { path: unit.path, project: unit.project, provider: 'claude', fingerprintPath: unit.path, cacheStrategy: 'append-jsonl' }, + CLAUDE_PARSER_VERSION, + dateRange, + options, + true, + ) + if (DEBUG_CACHE) { + process.stderr.write(`codeburn cache plan [claude] ${unit.path} -> ${plan.action}\n`) + } + return { ...plan, unit } + })) } async function refreshClaudeCacheUnit( manifest: Awaited>, - unit: ClaudeCacheUnit, + state: PlannedClaudeRefresh, seenMsgIds: Set, - parserVersion: string, options: ParseOptions, ): Promise<{ session: SessionSummary | null; wrote: boolean; refreshed: boolean }> { - let reportedRefresh = false - const cached = options.noCache - ? null - : await readSourceCacheEntry(manifest, 'claude', unit.path, { allowStaleFingerprint: true }) - const fingerprint = await computeFileFingerprint(unit.path) - - if ( - cached - && cached.parserVersion === parserVersion - && cached.cacheStrategy === 'append-jsonl' - && fingerprintsMatch(fingerprint, cached.fingerprint) - ) { - addSeenDeduplicationKeysFromSessions(cached.sessions, seenMsgIds) - return { session: cached.sessions[0] ?? null, wrote: false, refreshed: false } - } - - if ( - cached - && cached.parserVersion === parserVersion - && cached.cacheStrategy === 'append-jsonl' - && cached.appendState - && fingerprint.sizeBytes > cached.fingerprint.sizeBytes - ) { - const currentTailState = await readClaudeTailState(unit.path, cached.appendState.endOffset) - const tailMatches = !!( - currentTailState - && cached.appendState.tailHash - && currentTailState.tailHash === cached.appendState.tailHash - ) + const { unit, appendOnly } = state + const localSeenMsgIds = new Set() + const manifestAppendState = state.manifestEntry?.appendState + const fingerprint = state.currentFingerprint ?? await computeFileFingerprint(unit.path) + + if (DEBUG_CACHE) { + process.stderr.write(`codeburn cache refresh-file ${unit.path} action=${state.action} appendOnly=${String(appendOnly)}\n`) + } - if (tailMatches) { - reportedRefresh = true - options.progress?.advance(unit.progressLabel) - addSeenDeduplicationKeysFromSessions(cached.sessions, seenMsgIds) + if (state.action === 'skip') { + return { session: null, wrote: false, refreshed: false } + } - const appendedLines: string[] = [] - for await (const line of readSessionLinesFromOffset(unit.path, cached.appendState.endOffset)) { - if (line.trim()) appendedLines.push(line) - } + if (state.action === 'use-cache') { + const cached = await readSourceCacheEntry(manifest, 'claude', state.source.path, { allowStaleFingerprint: true }) + if (cached) { + addSeenDeduplicationKeysFromSessions(cached.sessions, localSeenMsgIds) + return { session: cached.sessions[0] ?? null, wrote: false, refreshed: false } + } + } + + const cached = await readSourceCacheEntry(manifest, 'claude', state.source.path, { allowStaleFingerprint: true }) + let shouldUseAppendOnly = !!appendOnly + && !!cached + && !!cached.appendState + && cached.sessions.length > 0 + && !!state.currentFingerprint + if (shouldUseAppendOnly && manifestAppendState) { + if ( + manifestAppendState.tailHash !== cached.appendState.tailHash + || manifestAppendState.endOffset !== cached.appendState.endOffset + || manifestAppendState.lastEntryType !== cached.appendState.lastEntryType + ) { + shouldUseAppendOnly = false + } + } + + if (shouldUseAppendOnly && cached) { + addSeenDeduplicationKeysFromSessions(cached.sessions, localSeenMsgIds) + const appendedLines: string[] = [] + for await (const line of readSessionLinesFromOffset(unit.path, cached.appendState.endOffset)) { + if (line.trim()) appendedLines.push(line) + } + + const appended = buildClaudeSessionSummaryFromLines( + appendedLines, + unit.project, + localSeenMsgIds, + cached.sessions[0]?.sessionId ?? basename(unit.path, '.jsonl'), + ) - const appended = buildClaudeSessionSummaryFromLines( - appendedLines, - unit.project, - seenMsgIds, - cached.sessions[0]?.sessionId ?? basename(unit.path, '.jsonl'), + if (appended && cached.sessions[0]) { + const merged = mergeClaudeAppendSession( + cached.sessions[0], + appended, + cached.appendState.lastEntryType, ) - if (appended && cached.sessions[0]) { - const merged = mergeClaudeAppendSession( - cached.sessions[0], - appended, - cached.appendState.lastEntryType, - ) - - if (merged) { - await writeSourceCacheEntry(manifest, { - version: SOURCE_CACHE_VERSION, - provider: 'claude', - logicalPath: unit.path, - fingerprintPath: unit.path, - cacheStrategy: 'append-jsonl', - parserVersion, - fingerprint, - sessions: [merged], - appendState: await buildClaudeAppendState(unit.path, fingerprint.sizeBytes), - }) - return { session: merged, wrote: true, refreshed: true } - } + if (merged) { + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'claude', + logicalPath: unit.path, + fingerprintPath: unit.path, + cacheStrategy: 'append-jsonl', + parserVersion: CLAUDE_PARSER_VERSION, + fingerprint: state.currentFingerprint ?? fingerprint, + sessions: [merged], + appendState: await buildClaudeAppendState(unit.path, (state.currentFingerprint ?? fingerprint).sizeBytes), + }) + options.progress?.advance('claude') + return { session: merged, wrote: true, refreshed: true } } } } - if (!reportedRefresh) options.progress?.advance(unit.progressLabel) - const session = await parseSessionFile(unit.path, unit.project, seenMsgIds) - if (!session) return { session: null, wrote: false, refreshed: true } + options.progress?.advance('claude') + const session = await parseSessionFile(unit.path, unit.project, localSeenMsgIds) + if (!session) { + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'claude', + logicalPath: unit.path, + fingerprintPath: unit.path, + cacheStrategy: 'append-jsonl', + parserVersion: CLAUDE_PARSER_VERSION, + fingerprint: state.currentFingerprint ?? fingerprint, + sessions: [], + appendState: await buildClaudeAppendState(unit.path, (state.currentFingerprint ?? fingerprint).sizeBytes), + }) + return { session: null, wrote: true, refreshed: true } + } await writeSourceCacheEntry(manifest, { version: SOURCE_CACHE_VERSION, @@ -594,10 +851,10 @@ async function refreshClaudeCacheUnit( logicalPath: unit.path, fingerprintPath: unit.path, cacheStrategy: 'append-jsonl', - parserVersion, - fingerprint, + parserVersion: CLAUDE_PARSER_VERSION, + fingerprint: state.currentFingerprint ?? fingerprint, sessions: [session], - appendState: await buildClaudeAppendState(unit.path, fingerprint.sizeBytes), + appendState: await buildClaudeAppendState(unit.path, (state.currentFingerprint ?? fingerprint).sizeBytes), }) return { session, wrote: true, refreshed: true } } @@ -605,52 +862,56 @@ async function refreshClaudeCacheUnit( async function scanClaudeDirsWithCache( dirs: Array<{ path: string; name: string }>, seenMsgIds: Set, - dateRange?: DateRange, + dateRange: DateRange | undefined, + manifest?: Awaited>, + refreshStates?: PlannedClaudeRefresh[], options: ParseOptions = {}, ): Promise { const projectMap = new Map() - const manifest = await loadSourceCacheManifest() - const parserVersion = 'claude:v1' - const units = (await Promise.all( - dirs.map(dir => listClaudeCacheUnits(dir.path, dir.name)), - )).flat() - const refreshStates = await Promise.all(units.map(async unit => { - const cached = options.noCache - ? null - : await readSourceCacheEntry(manifest, 'claude', unit.path, { allowStaleFingerprint: true }) - const fingerprint = await computeFileFingerprint(unit.path).catch(() => null) - const reusable = !!( - cached - && fingerprint - && cached.parserVersion === parserVersion - && cached.cacheStrategy === 'append-jsonl' - && fingerprintsMatch(fingerprint, cached.fingerprint) - ) - return { unit, refreshed: !reusable } - })) + const cacheManifest = manifest ?? await loadSourceCacheManifest() + const claudeGroups = await Promise.all( + dirs.map(dir => listClaudeCacheUnitsFromCache({ path: dir.path, project: dir.name, provider: 'claude' })), + ) + const allUnits = claudeGroups.flatMap(group => group.units) + const plan = refreshStates + ?? await planClaudeRefreshes(cacheManifest, allUnits, dateRange, options) - const refreshCount = refreshStates.filter(state => state.refreshed).length let wroteManifest = false + for (const state of plan) { + if (state.action === 'skip') continue - if (refreshCount > 0) options.progress?.start('Updating cache', refreshCount) - - try { - for (const { unit } of refreshStates) { - const { session, wrote } = await refreshClaudeCacheUnit(manifest, unit, seenMsgIds, parserVersion, options) - if (wrote) wroteManifest = true - if (!session) continue + const { session, wrote } = await refreshClaudeCacheUnit(cacheManifest, state, seenMsgIds, options) + if (wrote) wroteManifest = true + if (!session) continue - const filtered = filterSessionSummaryToRange(session, dateRange) - if (filtered) addSessionToProjectMap(projectMap, filtered) - } - } finally { - if (refreshCount > 0) options.progress?.finish() + const filtered = filterSessionSummaryToRange(session, dateRange) + if (filtered) addSessionToProjectMap(projectMap, filtered) } - if (wroteManifest) await saveSourceCacheManifest(manifest) + if (wroteManifest) await saveSourceCacheManifest(cacheManifest) return buildProjects(projectMap) } +async function planProviderSources( + manifest: Awaited>, + providerName: string, + sources: SessionSource[], + dateRange: DateRange | undefined, + options: ParseOptions, +): Promise { + return Promise.all(sources.map(async source => { + const parserVersion = source.parserVersion ?? `${providerName}:v1` + return evaluateSourceManifestState( + manifest, + source, + parserVersion, + dateRange, + options, + false, + ) + })) +} + function providerCallToTurn(call: ParsedProviderCall): ParsedTurn { const tools = call.tools const usage: TokenUsage = { @@ -691,99 +952,101 @@ async function parseProviderSources( sources: SessionSource[], seenKeys: Set, dateRange?: DateRange, + manifest?: Awaited>, + sourceStates?: SourceManifestState[], options: ParseOptions = {}, ): Promise { const projectMap = new Map() - const manifest = await loadSourceCacheManifest() - const sourceStates = await Promise.all(sources.map(async source => { - const parserVersion = source.parserVersion ?? `${providerName}:v1` - const cached = options.noCache - ? null - : await readSourceCacheEntry(manifest, providerName, source.path) - - if (cached && cached.parserVersion === parserVersion) { - return { source, parserVersion, cachedSessions: cached.sessions } - } - - return { source, parserVersion, cachedSessions: null } - })) - - const refreshCount = sourceStates.filter(state => state.cachedSessions === null).length + const cacheManifest = manifest ?? await loadSourceCacheManifest() + const plannedSources = sourceStates + ?? await planProviderSources(cacheManifest, providerName, sources, dateRange, options) let provider: Provider | undefined let wroteManifest = false - if (refreshCount > 0) options.progress?.start('Updating cache', refreshCount) + for (const state of plannedSources) { + if (state.action === 'skip') continue - try { - for (const state of sourceStates) { - let fullSessions = state.cachedSessions + let fullSessions: SessionSummary[] | null = null + if (state.action === 'use-cache') { + const cached = await readSourceCacheEntry(cacheManifest, providerName, state.source.path, { allowStaleFingerprint: true }) + if (cached) fullSessions = cached.sessions + } - if (fullSessions) { - addSeenDeduplicationKeysFromSessions(fullSessions, seenKeys) - } else { - provider ??= await getProvider(providerName) - if (!provider) continue + if (!fullSessions) { + provider ??= await getProvider(providerName) + if (!provider) continue - options.progress?.advance(state.source.progressLabel ?? state.source.path) - fullSessions = await parseFreshProviderSource(provider, providerName, state.source, seenKeys) + options.progress?.advance(providerName) + fullSessions = await parseFreshProviderSource(provider, providerName, state.source, seenKeys) - const fingerprintPath = state.source.fingerprintPath ?? state.source.path - await writeSourceCacheEntry(manifest, { - version: SOURCE_CACHE_VERSION, - provider: providerName, - logicalPath: state.source.path, - fingerprintPath, - cacheStrategy: state.source.cacheStrategy ?? 'full-reparse', - parserVersion: state.parserVersion, - fingerprint: await computeFileFingerprint(fingerprintPath), - sessions: fullSessions, - }) - wroteManifest = true - } + const fingerprintPath = state.source.fingerprintPath ?? state.source.path + await writeSourceCacheEntry(cacheManifest, { + version: SOURCE_CACHE_VERSION, + provider: providerName, + logicalPath: state.source.path, + fingerprintPath, + cacheStrategy: state.source.cacheStrategy ?? 'full-reparse', + parserVersion: state.parserVersion, + fingerprint: await computeFileFingerprint(fingerprintPath), + sessions: fullSessions, + }) + wroteManifest = true + } - for (const session of fullSessions - .map(session => filterSessionSummaryToRange(session, dateRange)) - .filter((session): session is SessionSummary => session !== null)) { - addSessionToProjectMap(projectMap, session) - } + if (fullSessions) addSeenDeduplicationKeysFromSessions(fullSessions, seenKeys) + + for (const session of fullSessions + .map(session => filterSessionSummaryToRange(session, dateRange)) + .filter((session): session is SessionSummary => session !== null)) { + addSessionToProjectMap(projectMap, session) } - } finally { - if (refreshCount > 0) options.progress?.finish() } - if (wroteManifest) await saveSourceCacheManifest(manifest) + if (wroteManifest) await saveSourceCacheManifest(cacheManifest) return buildProjects(projectMap) } const CACHE_TTL_MS = 60_000 const MAX_CACHE_ENTRIES = 10 -const sessionCache = new Map() -function cacheKey(dateRange?: DateRange, providerFilter?: string, noCache = false): string { - const s = dateRange ? `${dateRange.start.getTime()}:${dateRange.end.getTime()}` : 'none' - return `${s}:${providerFilter ?? 'all'}:${noCache ? 'nocache' : 'cache'}` +type CachedSessionWindow = { + data: ProjectSummary[] + sourceSignature: string + ts: number + rangeStart: number | null + rangeEnd: number | null + context: string +} + +const sessionCache = new Map() + +function cacheContextKey(providerFilter?: string, noCache = false): string { + return `${providerFilter ?? 'all'}:${noCache ? 'nocache' : 'cache'}` +} + +function cacheKey(dateRange: DateRange | undefined, providerFilter?: string, noCache = false): string { + const range = dateRange ? `${dateRange.start.getTime()}:${dateRange.end.getTime()}` : 'none' + return `${cacheContextKey(providerFilter, noCache)}:${range}` } async function sourceSignatureForCache(sources: SessionSource[]): Promise { const fingerprints = await Promise.all(sources.map(async source => { if (source.provider === 'claude') { - const jsonlFiles = await collectJsonlFiles(source.path) - return Promise.all(jsonlFiles.map(async filePath => { + const discovery = await listClaudeCacheUnitsFromCache(source) + if (discovery.units.length === 0) { + return [`${source.provider}:${source.project}:${source.path}:empty`] + } + + const signatures = await Promise.all(discovery.units.map(async unit => { try { - const meta = await stat(filePath) - return [ - source.provider, - source.project, - filePath, - filePath, - String(meta.mtimeMs), - String(meta.size), - ].join(':') + const meta = await stat(unit.path) + return `${source.provider}:${source.project}:${unit.path}:mtime:${meta.mtimeMs}:size:${meta.size}` } catch { - return [source.provider, source.project, filePath, filePath, 'missing'].join(':') + return `${source.provider}:${source.project}:${unit.path}:missing` } })) + return signatures } const fingerprintPath = source.fingerprintPath ?? source.path @@ -805,7 +1068,38 @@ async function sourceSignatureForCache(sources: SessionSource[]): Promise= dateRange.end.getTime() +} + +function getCachedWindow(context: string, dateRange: DateRange | undefined, sourceSignature: string): ProjectSummary[] | null { + const now = Date.now() + let bestKey: string | null = null + let bestWidth = Number.POSITIVE_INFINITY + + if (!dateRange) return null + + for (const [key, entry] of sessionCache) { + if (entry.context !== context) continue + if (entry.sourceSignature !== sourceSignature) continue + if (now - entry.ts >= CACHE_TTL_MS) continue + if (!rangeCoversCandidate(entry, dateRange)) continue + + const width = entry.rangeEnd! - entry.rangeStart! + if (width < bestWidth || (width === bestWidth && (bestKey === null || key < bestKey))) { + bestWidth = width + bestKey = key + } + } + + if (bestKey === null) return null + const cached = sessionCache.get(bestKey) + if (!cached) return null + return filterProjectsByDateRange(cached.data, dateRange) +} + +function cachePut(key: string, data: ProjectSummary[], sourceSignature: string, context: string, dateRange: DateRange | undefined) { const now = Date.now() for (const [k, v] of sessionCache) { if (now - v.ts > CACHE_TTL_MS) sessionCache.delete(k) @@ -814,7 +1108,14 @@ function cachePut(key: string, data: ProjectSummary[], sourceSignature: string) const oldest = [...sessionCache.entries()].sort((a, b) => a[1].ts - b[1].ts)[0] if (oldest) sessionCache.delete(oldest[0]) } - sessionCache.set(key, { data, sourceSignature, ts: now }) + sessionCache.set(key, { + data, + sourceSignature, + ts: now, + rangeStart: dateRange?.start.getTime() ?? null, + rangeEnd: dateRange?.end.getTime() ?? null, + context, + }) } export function filterProjectsByName( @@ -842,6 +1143,32 @@ export function filterProjectsByName( return result } +export function filterProjectsByDateRange( + projects: ProjectSummary[], + dateRange?: DateRange, +): ProjectSummary[] { + if (!dateRange) return projects + + const filtered = projects.flatMap(project => { + const sessions = project.sessions + .map(session => filterSessionSummaryToRange(session, dateRange)) + .filter((session): session is NonNullable => session !== null) + + if (sessions.length === 0) return [] + + const totalCostUSD = sessions.reduce((sum, session) => sum + session.totalCostUSD, 0) + const totalApiCalls = sessions.reduce((sum, session) => sum + session.apiCalls, 0) + return [{ + ...project, + sessions, + totalCostUSD, + totalApiCalls, + }] + }) + + return filtered.sort((a, b) => b.totalCostUSD - a.totalCostUSD) +} + async function parseFreshProviderSource( provider: Provider, providerName: string, @@ -876,21 +1203,33 @@ export async function parseAllSessions( options: ParseOptions = {}, ): Promise { const key = cacheKey(dateRange, providerFilter, options.noCache === true) + const context = cacheContextKey(providerFilter, options.noCache === true) const allSources = await discoverAllSessions(providerFilter) const sourceSignature = await sourceSignatureForCache(allSources) - const cached = sessionCache.get(key) - if (cached && Date.now() - cached.ts < CACHE_TTL_MS && cached.sourceSignature === sourceSignature) { - return cached.data + + const cached = getCachedWindow(context, dateRange, sourceSignature) + if (cached) return cached + + const exact = sessionCache.get(key) + if (exact && Date.now() - exact.ts < CACHE_TTL_MS && exact.sourceSignature === sourceSignature) { + return exact.data } const seenMsgIds = new Set() const seenKeys = new Set() + const progress = wrapProgressReporter(options.progress) + const parseOptions: ParseOptions = { ...options, progress } + const manifest = await loadSourceCacheManifest() const claudeSources = allSources.filter(s => s.provider === 'claude') const nonClaudeSources = allSources.filter(s => s.provider !== 'claude') + const claudeDiscovery = await Promise.all( + claudeSources.map(source => listClaudeCacheUnitsFromCache(source)), + ) const claudeDirs = claudeSources.map(s => ({ path: s.path, name: s.project })) - const claudeProjects = await scanClaudeDirsWithCache(claudeDirs, seenMsgIds, dateRange, options) + const claudeUnits = claudeDiscovery.flatMap(discovery => discovery.units) + const plannedClaudeRefreshes = await planClaudeRefreshes(manifest, claudeUnits, dateRange, parseOptions) const providerGroups = new Map() for (const source of nonClaudeSources) { @@ -899,25 +1238,61 @@ export async function parseAllSessions( providerGroups.set(source.provider, existing) } - const otherProjects: ProjectSummary[] = [] + const plannedProviderGroups = new Map() for (const [providerName, sources] of providerGroups) { - const projects = await parseProviderSources(providerName, sources, seenKeys, dateRange, options) - otherProjects.push(...projects) + plannedProviderGroups.set( + providerName, + await planProviderSources(manifest, providerName, sources, dateRange, parseOptions), + ) } - const mergedMap = new Map() - for (const p of [...claudeProjects, ...otherProjects]) { - const existing = mergedMap.get(p.project) - if (existing) { - existing.sessions.push(...p.sessions) - existing.totalCostUSD += p.totalCostUSD - existing.totalApiCalls += p.totalApiCalls - } else { - mergedMap.set(p.project, { ...p }) + const refreshCount = plannedClaudeRefreshes.filter(state => state.action === 'refresh').length + + [...plannedProviderGroups.values()] + .flat() + .filter(state => state.action === 'refresh').length + + const otherProjects: ProjectSummary[] = [] + if (refreshCount > 0) progress?.start(refreshCount) + + try { + const claudeProjects = await scanClaudeDirsWithCache( + claudeDirs, + seenMsgIds, + dateRange, + manifest, + plannedClaudeRefreshes, + parseOptions, + ) + + for (const [providerName, sources] of providerGroups) { + const projects = await parseProviderSources( + providerName, + sources, + seenKeys, + dateRange, + manifest, + plannedProviderGroups.get(providerName), + parseOptions, + ) + otherProjects.push(...projects) } - } - const result = Array.from(mergedMap.values()).sort((a, b) => b.totalCostUSD - a.totalCostUSD) - cachePut(key, result, sourceSignature) - return result + const mergedMap = new Map() + for (const p of [...claudeProjects, ...otherProjects]) { + const existing = mergedMap.get(p.project) + if (existing) { + existing.sessions.push(...p.sessions) + existing.totalCostUSD += p.totalCostUSD + existing.totalApiCalls += p.totalApiCalls + } else { + mergedMap.set(p.project, { ...p }) + } + } + + const result = Array.from(mergedMap.values()).sort((a, b) => b.totalCostUSD - a.totalCostUSD) + cachePut(key, result, sourceSignature, context, dateRange) + return result + } finally { + if (refreshCount > 0) progress?.finish() + } } diff --git a/src/source-cache.ts b/src/source-cache.ts index b5b4029..450da2a 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -8,6 +8,14 @@ import type { SessionSummary } from './types.js' export const SOURCE_CACHE_VERSION = 1 +function traceCacheRead(op: string, filePath: string, note?: string): void { + if (process.env['CODEBURN_FILE_TRACE'] !== '1') return + const suffix = note ? ` ${note}` : '' + process.stderr.write(`codeburn-trace source-cache ${op} ${filePath}${suffix}\n`) +} + +const APPEND_TAIL_WINDOW_BYTES = 16 * 1024 + export type SourceCacheStrategy = 'full-reparse' | 'append-jsonl' export type SourceFingerprint = { @@ -35,13 +43,41 @@ export type SourceCacheEntry = { export type SourceCacheManifest = { version: number - entries: Record + entries: Record +} + +export type SourceCacheManifestEntry = { + file: string + provider: string + logicalPath: string + lastSeenParserVersion?: string + cacheStrategy?: SourceCacheStrategy + fingerprintPath?: string + fingerprint?: SourceFingerprint + firstTimestamp?: string + lastTimestamp?: string + appendState?: AppendState } export type ReadSourceCacheEntryOptions = { allowStaleFingerprint?: boolean } +export type SourceRange = { + firstTimestamp?: string + lastTimestamp?: string +} + +export type CachedSourcePlanHint = SourceCacheManifestEntry & SourceRange + +export function sourceCacheKey(provider: string, logicalPath: string): string { + return `${provider}:${logicalPath}` +} + +export function getManifestEntry(manifest: SourceCacheManifest, provider: string, logicalPath: string): SourceCacheManifestEntry | null { + return manifest.entries[sourceCacheKey(provider, logicalPath)] ?? null +} + function isPlainObject(value: unknown): value is Record { return !!value && typeof value === 'object' && !Array.isArray(value) } @@ -50,12 +86,32 @@ function isFiniteNumber(value: unknown): value is number { return typeof value === 'number' && Number.isFinite(value) } -function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } { +function isManifestEntry(value: unknown): value is SourceCacheManifest['entries'][string] { + const isAppendStateValue = (entry: unknown): entry is AppendState => + isPlainObject(entry) + && typeof entry.endOffset === 'number' + && Number.isFinite(entry.endOffset) + && typeof entry.tailHash === 'string' + && (entry.lastEntryType === undefined || typeof entry.lastEntryType === 'string') + + const isFingerprint = (entry: unknown): entry is SourceFingerprint => isPlainObject(entry) + && Number.isFinite(entry.mtimeMs) + && typeof entry.mtimeMs === 'number' + && Number.isFinite(entry.sizeBytes) + && typeof entry.sizeBytes === 'number' + return isPlainObject(value) && typeof value.file === 'string' && /^[a-f0-9]{40}\.json$/.test(value.file) && typeof value.provider === 'string' && typeof value.logicalPath === 'string' + && (value.lastSeenParserVersion === undefined || typeof value.lastSeenParserVersion === 'string') + && (value.cacheStrategy === undefined || value.cacheStrategy === 'full-reparse' || value.cacheStrategy === 'append-jsonl') + && (value.fingerprintPath === undefined || typeof value.fingerprintPath === 'string') + && (value.fingerprint === undefined || isFingerprint(value.fingerprint)) + && (value.firstTimestamp === undefined || typeof value.firstTimestamp === 'string') + && (value.lastTimestamp === undefined || typeof value.lastTimestamp === 'string') + && (value.appendState === undefined || isAppendStateValue(value.appendState)) } function isSessionSummary(value: unknown): value is SessionSummary { @@ -150,6 +206,60 @@ function isAppendState(value: unknown): value is AppendState { && (value.lastEntryType === undefined || typeof value.lastEntryType === 'string') } +function rangeFromSessions(sessions: SessionSummary[]): SourceRange { + if (sessions.length === 0) return {} + + let firstTs = sessions[0]?.firstTimestamp + let lastTs = sessions[sessions.length - 1]?.lastTimestamp + for (const session of sessions) { + if (!firstTs || session.firstTimestamp < firstTs) firstTs = session.firstTimestamp + if (!lastTs || session.lastTimestamp > lastTs) lastTs = session.lastTimestamp + } + + return { + firstTimestamp: firstTs, + lastTimestamp: lastTs, + } +} + +async function readTailStateHash(filePath: string, endOffset: number): Promise { + if (endOffset <= 0) return null + const start = Math.max(0, endOffset - APPEND_TAIL_WINDOW_BYTES) + const length = Math.max(0, endOffset - start) + if (length <= 0) return null + + const handle = await open(filePath, 'r') + const buffer = Buffer.alloc(length) + + try { + await handle.read(buffer, 0, length, start) + } finally { + await handle.close() + } + + const chunk = buffer.toString('utf-8').replace(/[\r\n]+$/, '') + if (chunk.length === 0) return null + + const lastNewline = chunk.lastIndexOf('\n') + const lastLine = lastNewline >= 0 ? chunk.slice(lastNewline + 1) : chunk + return lastLine.trim() ? createHash('sha1').update(lastLine).digest('hex') : null +} + +function isDateRangeOverlap( + firstTimestamp: string | undefined, + lastTimestamp: string | undefined, + rangeStart: number, + rangeEnd: number, +): boolean | null { + if (!firstTimestamp || !lastTimestamp) return null + + const firstMs = new Date(firstTimestamp).getTime() + const lastMs = new Date(lastTimestamp).getTime() + if (Number.isNaN(firstMs) || Number.isNaN(lastMs)) return null + + return lastMs >= rangeStart && firstMs <= rangeEnd +} + function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { return isPlainObject(value) && typeof value.version === 'number' @@ -181,12 +291,8 @@ function entryDir(): string { return join(cacheRoot(), 'entries') } -function sourceKey(provider: string, logicalPath: string): string { - return `${provider}:${logicalPath}` -} - function entryFilename(provider: string, logicalPath: string): string { - return `${createHash('sha1').update(sourceKey(provider, logicalPath)).digest('hex')}.json` + return `${createHash('sha1').update(sourceCacheKey(provider, logicalPath)).digest('hex')}.json` } export function emptySourceCacheManifest(): SourceCacheManifest { @@ -199,6 +305,7 @@ export async function computeFileFingerprint(filePath: string): Promise { + traceCacheRead('manifest:read', manifestPath()) if (!existsSync(manifestPath())) return emptySourceCacheManifest() try { @@ -254,7 +361,7 @@ export async function readSourceCacheEntry( logicalPath: string, options: ReadSourceCacheEntryOptions = {}, ): Promise { - const meta = manifest.entries[sourceKey(provider, logicalPath)] + const meta = manifest.entries[sourceCacheKey(provider, logicalPath)] if (!meta) return null if (meta.provider !== provider || meta.logicalPath !== logicalPath) return null @@ -263,6 +370,7 @@ export async function readSourceCacheEntry( try { const raw = await readFile(join(entryDir(), meta.file), 'utf-8') + traceCacheRead('entry:read', join(entryDir(), meta.file), `provider=${provider} logicalPath=${logicalPath}`) const entry: unknown = JSON.parse(raw) if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null if (entry.provider !== provider || entry.logicalPath !== logicalPath) return null @@ -273,7 +381,17 @@ export async function readSourceCacheEntry( currentFingerprint.mtimeMs !== entry.fingerprint.mtimeMs || currentFingerprint.sizeBytes !== entry.fingerprint.sizeBytes ) { - return null + const sizeMatches = currentFingerprint.sizeBytes === entry.fingerprint.sizeBytes + if (!( + entry.cacheStrategy === 'append-jsonl' + && entry.appendState + && sizeMatches + )) { + return null + } + + const liveTailHash = await readTailStateHash(entry.fingerprintPath, entry.appendState.endOffset) + if (liveTailHash !== entry.appendState.tailHash) return null } } @@ -287,9 +405,24 @@ export async function writeSourceCacheEntry(manifest: SourceCacheManifest, entry await mkdir(entryDir(), { recursive: true }) const file = entryFilename(entry.provider, entry.logicalPath) await atomicWriteJson(join(entryDir(), file), entry) - manifest.entries[sourceKey(entry.provider, entry.logicalPath)] = { + const range = rangeFromSessions(entry.sessions) + manifest.entries[sourceCacheKey(entry.provider, entry.logicalPath)] = { file, provider: entry.provider, logicalPath: entry.logicalPath, + lastSeenParserVersion: entry.parserVersion, + cacheStrategy: entry.cacheStrategy, + fingerprintPath: entry.fingerprintPath, + fingerprint: entry.fingerprint, + ...range, + appendState: entry.appendState, } } + +export function isManifestDateRangeOverlap( + manifestEntry: SourceCacheManifestEntry | null, + dateRange?: { start: Date; end: Date }, +): boolean | null { + if (!manifestEntry || !dateRange) return null + return isDateRangeOverlap(manifestEntry.firstTimestamp, manifestEntry.lastTimestamp, dateRange.start.getTime(), dateRange.end.getTime()) +} diff --git a/tests/parse-progress.test.ts b/tests/parse-progress.test.ts index dbdbcf5..30f8c64 100644 --- a/tests/parse-progress.test.ts +++ b/tests/parse-progress.test.ts @@ -1,12 +1,14 @@ +import { stripVTControlCharacters } from 'node:util' import { describe, expect, it, vi } from 'vitest' import { createTerminalProgressReporter } from '../src/parse-progress.js' describe('createTerminalProgressReporter', () => { - it('renders Updating cache progress lines to stderr-compatible streams', () => { + it('renders a provider-aware cache bar with global counts', () => { const writes: string[] = [] const stream = { isTTY: true, + columns: 60, write: vi.fn((chunk: string) => { writes.push(chunk) return true @@ -14,12 +16,62 @@ describe('createTerminalProgressReporter', () => { } as unknown as NodeJS.WriteStream const reporter = createTerminalProgressReporter(true, stream) - reporter?.start('Updating cache', 2) - reporter?.advance('claude/session.jsonl') - reporter?.advance('codex/rollout.jsonl') - reporter?.finish() + reporter?.start(1899) + reporter?.advance('claude') + reporter?.advance('claude') + reporter?.finish('claude') - expect(writes.join('')).toContain('Updating cache') - expect(writes.join('')).toContain('2/2') + const text = stripVTControlCharacters(writes.join('')) + expect(text).toContain('Updating Claude cache') + expect(text).toContain('2/1899') + expect(text).toContain('[') + expect(text).not.toContain('.jsonl') }) -}) + + it('shrinks the bar on narrow terminals', () => { + const writes: string[] = [] + const stream = { + isTTY: true, + columns: 34, + write: vi.fn((chunk: string) => { + writes.push(chunk) + return true + }), + } as unknown as NodeJS.WriteStream + + const reporter = createTerminalProgressReporter(true, stream) + reporter?.start(100) + reporter?.advance('codex') + + const text = stripVTControlCharacters(writes.join('')) + expect(text).toContain('Updating Codex cache') + expect(text).toContain('1/100') + expect(text).toMatch(/\[[█░]{8}\]/) + }) + + it('returns null for non-tty streams', () => { + const stream = { isTTY: false, write: vi.fn() } as unknown as NodeJS.WriteStream + expect(createTerminalProgressReporter(true, stream)).toBeNull() + }) + + it('uses stream color depth to configure output styling', () => { + const writes: string[] = [] + const getColorDepth = vi.fn(() => 8) + const stream = { + isTTY: true, + columns: 80, + getColorDepth, + write: vi.fn((chunk: string) => { + writes.push(chunk) + return true + }), + } as unknown as NodeJS.WriteStream + + const reporter = createTerminalProgressReporter(true, stream) + reporter?.start(2) + reporter?.advance('claude') + + expect(getColorDepth).toHaveBeenCalledTimes(1) + expect(writes.join('')).toContain('Updating') + }) +}) diff --git a/tests/parser-cache.test.ts b/tests/parser-cache.test.ts index 3ee2553..88a818a 100644 --- a/tests/parser-cache.test.ts +++ b/tests/parser-cache.test.ts @@ -75,14 +75,18 @@ afterEach(async () => { }) describe('parseAllSessions source cache', () => { - it('reuses unchanged cached sources, refreshes changed sources, and honors noCache', async () => { + it('uses one global progress lifecycle across provider refreshes', async () => { const fakeSource = { path: sourcePath, fingerprintPath: sourcePath, project: 'fake-project', provider: 'fake', cacheStrategy: 'full-reparse', - progressLabel: 'fake.jsonl', + } as SessionSource + const claudeSource = { + path: join(claudeRoot, 'projects', 'demo-project'), + project: 'demo-project', + provider: 'claude', } as SessionSource const fakeProvider: Provider = { @@ -103,7 +107,10 @@ describe('parseAllSessions source cache', () => { } vi.doMock('../src/providers/index.js', () => ({ - discoverAllSessions: async () => [fakeSource], + discoverAllSessions: async (providerFilter?: string) => { + if (providerFilter === 'fake') return [fakeSource] + return [claudeSource, fakeSource] + }, getProvider: async () => fakeProvider, })) @@ -115,12 +122,15 @@ describe('parseAllSessions source cache', () => { finish: vi.fn(), } - const first = await parseAllSessions(undefined, 'fake', { progress }) - expect(first[0]?.totalApiCalls).toBe(1) + const first = await parseAllSessions(undefined, undefined, { progress }) + expect(first).toEqual(expect.any(Array)) expect(parseCalls).toBe(1) - expect(progress.start).toHaveBeenCalledWith('Updating cache', 1) - expect(progress.advance).toHaveBeenCalledWith('fake.jsonl') - expect(progress.finish).toHaveBeenCalled() + expect(progress.start).toHaveBeenCalledTimes(1) + expect(progress.start).toHaveBeenCalledWith(2) + expect(progress.advance).toHaveBeenCalledWith('claude') + expect(progress.advance).toHaveBeenCalledWith('fake') + expect(progress.finish).toHaveBeenCalledTimes(1) + expect(progress.finish).toHaveBeenCalledWith('fake') const second = await parseAllSessions(undefined, 'fake') expect(second[0]?.totalApiCalls).toBe(1) @@ -136,6 +146,118 @@ describe('parseAllSessions source cache', () => { expect(parseCalls).toBe(3) }) + it('reuses a broader cached window for a narrower date range', async () => { + const providerName = 'fake-range-reuse' + const fakeSource = { + path: sourcePath, + fingerprintPath: sourcePath, + project: 'fake-project', + provider: providerName, + cacheStrategy: 'full-reparse', + } as SessionSource + + const fakeProvider: Provider = { + name: providerName, + displayName: 'Fake', + modelDisplayName: model => model, + toolDisplayName: tool => tool, + discoverSessions: async () => [fakeSource], + createSessionParser() { + return { + async *parse() { + parseCalls += 1 + yield { ...makeCall(0), timestamp: '2026-04-20T10:00:00.000Z', deduplicationKey: `${providerName}:day1` } + yield { ...makeCall(1), timestamp: '2026-04-21T10:00:00.000Z', deduplicationKey: `${providerName}:day2` } + }, + } + }, + } + + vi.doMock('../src/providers/index.js', () => ({ + discoverAllSessions: async () => [fakeSource], + getProvider: async () => fakeProvider, + })) + + vi.resetModules() + const { parseAllSessions: parseWithCache } = await import('../src/parser.js') + + const wide = { + start: new Date('2026-04-19T00:00:00.000Z'), + end: new Date('2026-04-21T23:59:59.999Z'), + } + const narrow = { + start: new Date('2026-04-20T00:00:00.000Z'), + end: new Date('2026-04-20T23:59:59.999Z'), + } + + const wideProjects = await parseWithCache(wide, providerName) + expect(wideProjects[0]?.totalApiCalls).toBe(2) + expect(parseCalls).toBe(1) + + const narrowProjects = await parseWithCache(narrow, providerName) + expect(narrowProjects[0]?.totalApiCalls).toBe(1) + expect(parseCalls).toBe(1) + }) + + it('does not deduplicate claude turns across different session files', async () => { + const sharedMsgId = 'msg_shared_duplicate_123' + const secondSessionPath = join(claudeRoot, 'projects', 'demo-project', 'session-2.jsonl') + await Promise.all([ + writeFile(claudeSessionPath, [ + JSON.stringify({ + type: 'user', + timestamp: '2026-04-20T09:00:00.000Z', + sessionId: 'sess-1', + message: { role: 'user', content: 'first' }, + }), + JSON.stringify({ + type: 'assistant', + timestamp: '2026-04-20T09:00:01.000Z', + message: { + id: sharedMsgId, + model: 'claude-sonnet-4-6', + role: 'assistant', + type: 'message', + content: [], + usage: { input_tokens: 10, output_tokens: 20 }, + }, + }), + ].join('\n') + '\n'), + writeFile(secondSessionPath, [ + JSON.stringify({ + type: 'user', + timestamp: '2026-04-20T09:05:00.000Z', + sessionId: 'sess-2', + message: { role: 'user', content: 'second' }, + }), + JSON.stringify({ + type: 'assistant', + timestamp: '2026-04-20T09:05:01.000Z', + message: { + id: sharedMsgId, + model: 'claude-sonnet-4-6', + role: 'assistant', + type: 'message', + content: [], + usage: { input_tokens: 11, output_tokens: 21 }, + }, + }), + ].join('\n') + '\n'), + ]) + + vi.doUnmock('../src/providers/index.js') + vi.resetModules() + const { parseAllSessions } = await import('../src/parser.js') + + const first = await parseAllSessions(undefined, 'claude') + const project = first.find(project => project.project === 'demo-project') + expect(project?.totalApiCalls).toBe(2) + + const second = await parseAllSessions(undefined, 'claude') + const cachedProject = second.find(project => project.project === 'demo-project') + expect(cachedProject?.totalApiCalls).toBe(2) + }) + it('filters cached full sessions down to the requested date range', async () => { const fakeSource = { path: sourcePath, @@ -307,4 +429,47 @@ describe('parseAllSessions source cache', () => { expect(session?.turns[0]?.userMessage).toBe('first') expect(session?.turns[0]?.assistantCalls).toHaveLength(2) }) + + it('caches Claude session files that contain no turns', async () => { + await writeFile(claudeSessionPath, [ + JSON.stringify({ + type: 'user', + timestamp: '2026-04-20T09:00:00.000Z', + sessionId: 'sess-empty', + message: { role: 'user', content: 'no assistant response' }, + }), + ].join('\n') + '\n', 'utf-8') + + const readSessionFileCalls: string[] = [] + vi.doMock('../src/fs-utils.js', async () => { + const actual = await vi.importActual('../src/fs-utils.js') + return { + ...actual, + readSessionFile: vi.fn(async (filePath: string) => { + readSessionFileCalls.push(filePath) + return actual.readSessionFile(filePath) + }), + } + }) + + vi.resetModules() + const { parseAllSessions } = await import('../src/parser.js') + + const first = await parseAllSessions(undefined, 'claude') + const cacheRoot = join(root, 'cache', 'source-cache-v1') + const manifest = JSON.parse(await readFile(join(cacheRoot, 'manifest.json'), 'utf-8')) as { + entries: Record + } + const entryKey = `claude:${claudeSessionPath}` + expect(manifest.entries[entryKey]).toBeDefined() + const cacheEntry = JSON.parse(await readFile(join(cacheRoot, 'entries', manifest.entries[entryKey]!.file), 'utf-8')) as { sessions: unknown[] } + + expect(first.find(project => project.project === 'demo-project')?.totalApiCalls).toBeUndefined() + expect(readSessionFileCalls.filter(path => path === claudeSessionPath)).toHaveLength(1) + expect(cacheEntry.sessions).toHaveLength(0) + + const second = await parseAllSessions(undefined, 'claude') + expect(second.find(project => project.project === 'demo-project')?.totalApiCalls).toBeUndefined() + expect(readSessionFileCalls.filter(path => path === claudeSessionPath)).toHaveLength(1) + }) })