From f0c4cf432c3a9d57c1d323ec75c6d8bced7891bd Mon Sep 17 00:00:00 2001 From: "zuber Khan(zuberikea)" Date: Wed, 3 Jun 2026 14:15:22 +0200 Subject: [PATCH 1/2] feat(copilot): add JetBrains (IntelliJ/DataGrip) session discovery and parsing The Copilot provider previously only discovered sessions from: - ~/.copilot/session-state/ (legacy VS Code format) - VS Code workspace storage directories IntelliJ and other JetBrains IDEs store Copilot chat sessions in ~/.copilot/jb//partition-*.jsonl using a slightly different event format. This commit adds: - discoverJetBrainsSessions(): finds all .jsonl files under ~/.copilot/jb/ - isJetBrainsFormat(): detects JB event format (no session.start header) - parseJetBrainsEvents(): parses JB events (assistant.message with text/ thinking, tool.execution_start for tool names, tool call ID prefixes for model inference) - inferJBProjectName(): extracts project name from tool execution paths Closes #N/A --- src/providers/copilot.ts | 244 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 239 insertions(+), 5 deletions(-) diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index 00034740..15c66650 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -334,6 +334,155 @@ function isTranscriptFormat(content: string): boolean { } } +function isJetBrainsFormat(content: string): boolean { + const firstLine = content.split('\n')[0] ?? '' + try { + const event = JSON.parse(firstLine) + // JB format starts with user.message_rendered or user.message (no session.start) + // and has events with turnId / iterationNumber patterns + return ( + event.type === 'user.message_rendered' || + event.type === 'user.message' || + event.type === 'partition.created' + ) + } catch { + return false + } +} + +// --- JetBrains (IntelliJ/DataGrip) format parser --- + +type JBEvent = { + type: string + timestamp?: string + id?: string + data: Record +} + +function parseJetBrainsEvents(content: string, sessionId: string, seenKeys: Set): ParsedProviderCall[] { + const results: ParsedProviderCall[] = [] + const lines = content.split('\n').filter(l => l.trim()) + const events: JBEvent[] = [] + + for (const line of lines) { + try { + events.push(JSON.parse(line)) + } catch { + continue + } + } + + // Infer model from tool call IDs (same heuristic as transcript format) + const modelCounts = new Map() + for (const e of events) { + if (e.type === 'tool.execution_start' || e.type === 'tool.execution_complete') { + const toolCallId = (e.data.toolCallId as string) ?? '' + for (const hint of transcriptToolCallModelHints) { + if (!toolCallId.startsWith(hint.prefix)) continue + modelCounts.set(hint.model, (modelCounts.get(hint.model) ?? 0) + 1) + break + } + } + } + const model = modelCounts.size > 0 + ? [...modelCounts.entries()].sort((a, b) => b[1] - a[1])[0]![0] + : 'copilot-auto' + + // Collect tool names per turn (messageId) + const toolsByTurn = new Map() + let currentTurnId = '' + + // First pass: gather user message text + let pendingUserMessage = '' + for (const e of events) { + if (e.type === 'user.message_rendered') { + const msg = (e.data.renderedMessage as string) ?? '' + pendingUserMessage = msg.slice(0, 500) + } + if (e.type === 'user.message') { + const msg = (e.data.content as string) ?? '' + if (msg) pendingUserMessage = msg.slice(0, 500) + } + } + + // Reset for second pass + let userMsg = '' + for (const e of events) { + if (e.type === 'user.message_rendered') { + userMsg = ((e.data.renderedMessage as string) ?? '').slice(0, 500) + } + if (e.type === 'user.message') { + const msg = (e.data.content as string) ?? '' + if (msg) userMsg = msg.slice(0, 500) + } + + if (e.type === 'assistant.turn_start') { + currentTurnId = (e.data.turnId as string) ?? '' + } + + if (e.type === 'tool.execution_start') { + const toolName = (e.data.toolName as string) ?? '' + const normalized = normalizeToolName(toolName) + if (normalized) { + const msgId = currentTurnId || 'unknown' + const existing = toolsByTurn.get(msgId) ?? [] + existing.push(normalized) + toolsByTurn.set(msgId, existing) + } + } + + if (e.type === 'assistant.message') { + const data = e.data as { messageId?: string; content?: string; text?: string; reasoningText?: string; thinking?: { text?: string }; iterationNumber?: number; outputTokens?: number } + const contentText = data.text ?? data.content ?? '' + const reasoningText = data.reasoningText ?? data.thinking?.text ?? '' + + // Skip empty messages (streaming placeholders) + if (contentText.length === 0 && reasoningText.length === 0) continue + + const messageId = data.messageId ?? e.id ?? '' + const dedupKey = `copilot:jb:${sessionId}:${messageId}:${data.iterationNumber ?? 0}` + if (seenKeys.has(dedupKey)) continue + seenKeys.add(dedupKey) + + let outputTokens = data.outputTokens ?? 0 + let reasoningTokens = 0 + if (outputTokens === 0) { + outputTokens = Math.ceil(contentText.length / CHARS_PER_TOKEN) + reasoningTokens = Math.ceil(reasoningText.length / CHARS_PER_TOKEN) + } + + const inputTokens = Math.ceil(userMsg.length / CHARS_PER_TOKEN) + const tools = toolsByTurn.get(currentTurnId || messageId) ?? [] + const costUSD = calculateCost(model, inputTokens, outputTokens + reasoningTokens, 0, 0, 0) + + results.push({ + provider: 'copilot', + model, + inputTokens, + outputTokens, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens, + webSearchRequests: 0, + costUSD, + tools, + bashCommands: [], + timestamp: e.timestamp ?? '', + speed: 'standard', + deduplicationKey: dedupKey, + userMessage: userMsg, + sessionId, + }) + + // Only count user message once per assistant turn + userMsg = '' + } + } + + return results +} + function createParser(source: SessionSource, seenKeys: Set): SessionParser { return { async *parse(): AsyncGenerator { @@ -343,9 +492,14 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars ? basename(source.path, '.jsonl') : basename(dirname(source.path)) - const calls = isTranscriptFormat(content) - ? parseTranscriptEvents(content, sessionId, seenKeys) - : parseLegacyEvents(content, sessionId, seenKeys) + let calls: ParsedProviderCall[] + if (isTranscriptFormat(content)) { + calls = parseTranscriptEvents(content, sessionId, seenKeys) + } else if (isJetBrainsFormat(content)) { + calls = parseJetBrainsEvents(content, sessionId, seenKeys) + } else { + calls = parseLegacyEvents(content, sessionId, seenKeys) + } for (const call of calls) { yield call @@ -409,6 +563,84 @@ async function readWorkspaceProject(workspaceDir: string): Promise { return basename(workspaceDir) } +function getJetBrainsSessionDir(override?: string): string { + return override ?? join(homedir(), '.copilot', 'jb') +} + +async function discoverJetBrainsSessions(jbDir: string): Promise { + const sources: SessionSource[] = [] + + let sessionDirs: string[] + try { + sessionDirs = await readdir(jbDir) + } catch { + return sources + } + + for (const sessionId of sessionDirs) { + const sessionPath = join(jbDir, sessionId) + const s = await stat(sessionPath).catch(() => null) + if (!s?.isDirectory()) continue + + let partitions: string[] + try { + partitions = await readdir(sessionPath) + } catch { + continue + } + + for (const file of partitions) { + if (!file.endsWith('.jsonl')) continue + const filePath = join(sessionPath, file) + const fs = await stat(filePath).catch(() => null) + if (!fs?.isFile()) continue + + // Try to infer project name from tool execution paths in the first few lines + const project = await inferJBProjectName(filePath) ?? sessionId + sources.push({ path: filePath, project, provider: 'copilot' }) + } + } + + return sources +} + +async function inferJBProjectName(filePath: string): Promise { + try { + const content = await readFile(filePath, 'utf-8') + const lines = content.split('\n').slice(0, 100) // Only scan first 100 lines + const homeParts = homedir().split(sep) + const homeDepth = homeParts.length + + for (const line of lines) { + try { + const e = JSON.parse(line) + if (e.type === 'tool.execution_start') { + const args = e.data?.arguments + if (typeof args === 'object' && args !== null && typeof args.path === 'string') { + const pathVal: string = args.path + const parts = pathVal.split('/') + // Pick the first meaningful directory after home (the project root) + if (parts.length > homeDepth + 1) { + // Skip common intermediate dirs like "IKEA", "projects", "repos", "src", "work" + // Try to return the first unique project-level folder + const afterHome = parts.slice(homeDepth) + // Return up to 2 levels deep for context (e.g. "01_SPE/spe-price-data-service") + if (afterHome.length >= 2) { + return basename(afterHome.slice(0, afterHome.length > 2 ? 2 : afterHome.length).join('/')) + || afterHome[0] || null + } + return afterHome[0] || null + } + } + } + } catch { + continue + } + } + } catch {} + return null +} + async function discoverLegacySessions(sessionStateDir: string): Promise { const sources: SessionSource[] = [] @@ -475,6 +707,7 @@ async function discoverVSCodeTranscripts(workspaceStorageDir: string): Promise { - const [legacy, ...vscodeResults] = await Promise.all([ + const [legacy, jb, ...vscodeResults] = await Promise.all([ discoverLegacySessions(legacyDir), + discoverJetBrainsSessions(jbDir), ...vscodeDirs.map(discoverVSCodeTranscripts), ]) - return [...legacy, ...vscodeResults.flat()] + return [...legacy, ...jb, ...vscodeResults.flat()] }, createSessionParser(source: SessionSource, seenKeys: Set): SessionParser { From 45172837aa1773d58710214e4b9ec74e6ad85f61 Mon Sep 17 00:00:00 2001 From: "zuber Khan(zuberikea)" Date: Thu, 4 Jun 2026 09:30:08 +0200 Subject: [PATCH 2/2] fix: address PR review feedback for JetBrains Copilot support - Add 'sep' to path import (fixes ReferenceError in project inference) - Use incrementing index as dedup key fallback when messageId is absent to prevent undercounting tokens/cost - Move project inference to parse time using already-loaded content (avoids bypassing readSessionFile's 128MB safety cap) - Tighten isJetBrainsFormat to only match user.message_rendered and partition.created (avoids false-positive on legacy files starting with user.message) - Add dedicated inferModelFromEvents for JB format that checks data.model (100x weight) and tool call ID prefixes on tool.execution_start/complete events - Remove dead first-pass loop that was never read - Add jbDirOverride param to createCopilotProvider for test fixtures --- src/providers/copilot.ts | 150 +++++++++++++++++++++------------------ 1 file changed, 79 insertions(+), 71 deletions(-) diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index 15c66650..eda483ed 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -1,6 +1,6 @@ import { existsSync } from 'fs' import { readdir, readFile, stat } from 'fs/promises' -import { basename, dirname, join, posix, win32 } from 'path' +import { basename, dirname, join, posix, sep, win32 } from 'path' import { homedir } from 'os' import { readSessionFile } from '../fs-utils.js' @@ -243,6 +243,34 @@ function inferModelFromToolCallIds(events: TranscriptEvent[]): string { return 'copilot-auto' } +/** Model inference for JB events — checks data.model (100x weight) and tool call + * ID prefixes on tool.execution_start/complete events. */ +function inferModelFromEvents(events: JBEvent[]): string { + const modelCounts = new Map() + + for (const e of events) { + const data = e.data as { model?: string; toolCallId?: string } + if (typeof data.model === 'string' && data.model) { + modelCounts.set(data.model, (modelCounts.get(data.model) ?? 0) + 100) + } + + if (e.type === 'tool.execution_start' || e.type === 'tool.execution_complete') { + const toolCallId = data.toolCallId ?? '' + for (const hint of transcriptToolCallModelHints) { + if (!toolCallId.startsWith(hint.prefix)) continue + modelCounts.set(hint.model, (modelCounts.get(hint.model) ?? 0) + 1) + break + } + } + } + + if (modelCounts.size > 0) { + return [...modelCounts.entries()].sort((a, b) => b[1] - a[1])[0]![0] + } + + return 'copilot-auto' +} + function parseTranscriptEvents(content: string, sessionId: string, seenKeys: Set): ParsedProviderCall[] { const results: ParsedProviderCall[] = [] const lines = content.split('\n').filter(l => l.trim()) @@ -338,11 +366,11 @@ function isJetBrainsFormat(content: string): boolean { const firstLine = content.split('\n')[0] ?? '' try { const event = JSON.parse(firstLine) - // JB format starts with user.message_rendered or user.message (no session.start) - // and has events with turnId / iterationNumber patterns + // JB format starts with user.message_rendered or partition.created (JB-specific). + // We intentionally exclude user.message here since legacy files can also start + // with that event type — routing them to the JB parser would misparse them. return ( event.type === 'user.message_rendered' || - event.type === 'user.message' || event.type === 'partition.created' ) } catch { @@ -372,41 +400,17 @@ function parseJetBrainsEvents(content: string, sessionId: string, seenKeys: Set< } } - // Infer model from tool call IDs (same heuristic as transcript format) - const modelCounts = new Map() - for (const e of events) { - if (e.type === 'tool.execution_start' || e.type === 'tool.execution_complete') { - const toolCallId = (e.data.toolCallId as string) ?? '' - for (const hint of transcriptToolCallModelHints) { - if (!toolCallId.startsWith(hint.prefix)) continue - modelCounts.set(hint.model, (modelCounts.get(hint.model) ?? 0) + 1) - break - } - } - } - const model = modelCounts.size > 0 - ? [...modelCounts.entries()].sort((a, b) => b[1] - a[1])[0]![0] - : 'copilot-auto' + // Reuse the shared model inference logic: check explicit data.model fields + // (weighted 100x) and tool call ID prefix heuristics from both assistant.message + // toolRequests and tool.execution_start/complete events (JB-specific). + const model = inferModelFromEvents(events) - // Collect tool names per turn (messageId) + // Collect tool names per turn const toolsByTurn = new Map() let currentTurnId = '' - - // First pass: gather user message text - let pendingUserMessage = '' - for (const e of events) { - if (e.type === 'user.message_rendered') { - const msg = (e.data.renderedMessage as string) ?? '' - pendingUserMessage = msg.slice(0, 500) - } - if (e.type === 'user.message') { - const msg = (e.data.content as string) ?? '' - if (msg) pendingUserMessage = msg.slice(0, 500) - } - } - - // Reset for second pass let userMsg = '' + let msgIndex = 0 + for (const e of events) { if (e.type === 'user.message_rendered') { userMsg = ((e.data.renderedMessage as string) ?? '').slice(0, 500) @@ -439,8 +443,11 @@ function parseJetBrainsEvents(content: string, sessionId: string, seenKeys: Set< // Skip empty messages (streaming placeholders) if (contentText.length === 0 && reasoningText.length === 0) continue + // Use messageId if available, otherwise fall back to an incrementing index + // to avoid dedup collisions when messageId is absent. const messageId = data.messageId ?? e.id ?? '' - const dedupKey = `copilot:jb:${sessionId}:${messageId}:${data.iterationNumber ?? 0}` + const dedupId = messageId || String(msgIndex++) + const dedupKey = `copilot:jb:${sessionId}:${dedupId}:${data.iterationNumber ?? 0}` if (seenKeys.has(dedupKey)) continue seenKeys.add(dedupKey) @@ -497,6 +504,13 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars calls = parseTranscriptEvents(content, sessionId, seenKeys) } else if (isJetBrainsFormat(content)) { calls = parseJetBrainsEvents(content, sessionId, seenKeys) + // Infer project name from tool paths now that content is loaded + const inferredProject = inferJBProjectFromContent(content) + if (inferredProject) { + for (const call of calls) { + call.project = inferredProject + } + } } else { calls = parseLegacyEvents(content, sessionId, seenKeys) } @@ -594,50 +608,44 @@ async function discoverJetBrainsSessions(jbDir: string): Promise null) if (!fs?.isFile()) continue - - // Try to infer project name from tool execution paths in the first few lines - const project = await inferJBProjectName(filePath) ?? sessionId - sources.push({ path: filePath, project, provider: 'copilot' }) + sources.push({ path: filePath, project: sessionId, provider: 'copilot' }) } } return sources } -async function inferJBProjectName(filePath: string): Promise { - try { - const content = await readFile(filePath, 'utf-8') - const lines = content.split('\n').slice(0, 100) // Only scan first 100 lines - const homeParts = homedir().split(sep) - const homeDepth = homeParts.length - - for (const line of lines) { - try { - const e = JSON.parse(line) - if (e.type === 'tool.execution_start') { - const args = e.data?.arguments - if (typeof args === 'object' && args !== null && typeof args.path === 'string') { - const pathVal: string = args.path - const parts = pathVal.split('/') - // Pick the first meaningful directory after home (the project root) - if (parts.length > homeDepth + 1) { - // Skip common intermediate dirs like "IKEA", "projects", "repos", "src", "work" - // Try to return the first unique project-level folder - const afterHome = parts.slice(homeDepth) - // Return up to 2 levels deep for context (e.g. "01_SPE/spe-price-data-service") - if (afterHome.length >= 2) { - return basename(afterHome.slice(0, afterHome.length > 2 ? 2 : afterHome.length).join('/')) - || afterHome[0] || null - } - return afterHome[0] || null +/** Infer a project name from tool execution paths in already-loaded content. */ +function inferJBProjectFromContent(content: string): string | null { + const homeParts = homedir().split(sep) + const homeDepth = homeParts.length + const lines = content.split('\n') + const limit = Math.min(lines.length, 200) + + for (let i = 0; i < limit; i++) { + const line = lines[i] + if (!line) continue + try { + const e = JSON.parse(line) + if (e.type === 'tool.execution_start') { + const args = e.data?.arguments + if (typeof args === 'object' && args !== null && typeof args.path === 'string') { + const pathVal: string = args.path + const parts = pathVal.split('/') + if (parts.length > homeDepth + 1) { + const afterHome = parts.slice(homeDepth) + if (afterHome.length >= 2) { + return basename(afterHome.slice(0, afterHome.length > 2 ? 2 : afterHome.length).join('/')) + || afterHome[0] || null } + return afterHome[0] || null } } - } catch { - continue } + } catch { + continue } - } catch {} + } return null } @@ -704,10 +712,10 @@ async function discoverVSCodeTranscripts(workspaceStorageDir: string): Promise