Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 249 additions & 7 deletions src/providers/copilot.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { existsSync } from 'fs'
import { readdir, readFile, stat } from 'fs/promises'
import { basename, dirname, join, posix, win32 } from 'path'
import { basename, dirname, join, posix, sep, win32 } from 'path'
import { homedir } from 'os'

import { readSessionFile } from '../fs-utils.js'
Expand Down Expand Up @@ -243,6 +243,34 @@ function inferModelFromToolCallIds(events: TranscriptEvent[]): string {
return 'copilot-auto'
}

/** Model inference for JB events — checks data.model (100x weight) and tool call
* ID prefixes on tool.execution_start/complete events. */
function inferModelFromEvents(events: JBEvent[]): string {
const modelCounts = new Map<string, number>()

for (const e of events) {
const data = e.data as { model?: string; toolCallId?: string }
if (typeof data.model === 'string' && data.model) {
modelCounts.set(data.model, (modelCounts.get(data.model) ?? 0) + 100)
}

if (e.type === 'tool.execution_start' || e.type === 'tool.execution_complete') {
const toolCallId = data.toolCallId ?? ''
for (const hint of transcriptToolCallModelHints) {
if (!toolCallId.startsWith(hint.prefix)) continue
modelCounts.set(hint.model, (modelCounts.get(hint.model) ?? 0) + 1)
break
}
}
}

if (modelCounts.size > 0) {
return [...modelCounts.entries()].sort((a, b) => b[1] - a[1])[0]![0]
}

return 'copilot-auto'
}

function parseTranscriptEvents(content: string, sessionId: string, seenKeys: Set<string>): ParsedProviderCall[] {
const results: ParsedProviderCall[] = []
const lines = content.split('\n').filter(l => l.trim())
Expand Down Expand Up @@ -334,6 +362,134 @@ function isTranscriptFormat(content: string): boolean {
}
}

function isJetBrainsFormat(content: string): boolean {
const firstLine = content.split('\n')[0] ?? ''
try {
const event = JSON.parse(firstLine)
// JB format starts with user.message_rendered or partition.created (JB-specific).
// We intentionally exclude user.message here since legacy files can also start
// with that event type — routing them to the JB parser would misparse them.
return (
event.type === 'user.message_rendered' ||
event.type === 'partition.created'
)
} catch {
return false
}
}

// --- JetBrains (IntelliJ/DataGrip) format parser ---

type JBEvent = {
type: string
timestamp?: string
id?: string
data: Record<string, unknown>
}

function parseJetBrainsEvents(content: string, sessionId: string, seenKeys: Set<string>): ParsedProviderCall[] {
const results: ParsedProviderCall[] = []
const lines = content.split('\n').filter(l => l.trim())
const events: JBEvent[] = []

for (const line of lines) {
try {
events.push(JSON.parse(line))
} catch {
continue
}
}

// Reuse the shared model inference logic: check explicit data.model fields
// (weighted 100x) and tool call ID prefix heuristics from both assistant.message
// toolRequests and tool.execution_start/complete events (JB-specific).
const model = inferModelFromEvents(events)

// Collect tool names per turn
const toolsByTurn = new Map<string, string[]>()
let currentTurnId = ''
let userMsg = ''
let msgIndex = 0

for (const e of events) {
if (e.type === 'user.message_rendered') {
userMsg = ((e.data.renderedMessage as string) ?? '').slice(0, 500)
}
if (e.type === 'user.message') {
const msg = (e.data.content as string) ?? ''
if (msg) userMsg = msg.slice(0, 500)
}

if (e.type === 'assistant.turn_start') {
currentTurnId = (e.data.turnId as string) ?? ''
}

if (e.type === 'tool.execution_start') {
const toolName = (e.data.toolName as string) ?? ''
const normalized = normalizeToolName(toolName)
if (normalized) {
const msgId = currentTurnId || 'unknown'
const existing = toolsByTurn.get(msgId) ?? []
existing.push(normalized)
toolsByTurn.set(msgId, existing)
}
}

if (e.type === 'assistant.message') {
const data = e.data as { messageId?: string; content?: string; text?: string; reasoningText?: string; thinking?: { text?: string }; iterationNumber?: number; outputTokens?: number }
const contentText = data.text ?? data.content ?? ''
const reasoningText = data.reasoningText ?? data.thinking?.text ?? ''

// Skip empty messages (streaming placeholders)
if (contentText.length === 0 && reasoningText.length === 0) continue

// Use messageId if available, otherwise fall back to an incrementing index
// to avoid dedup collisions when messageId is absent.
const messageId = data.messageId ?? e.id ?? ''
const dedupId = messageId || String(msgIndex++)
const dedupKey = `copilot:jb:${sessionId}:${dedupId}:${data.iterationNumber ?? 0}`
if (seenKeys.has(dedupKey)) continue
seenKeys.add(dedupKey)

let outputTokens = data.outputTokens ?? 0
let reasoningTokens = 0
if (outputTokens === 0) {
outputTokens = Math.ceil(contentText.length / CHARS_PER_TOKEN)
reasoningTokens = Math.ceil(reasoningText.length / CHARS_PER_TOKEN)
}

const inputTokens = Math.ceil(userMsg.length / CHARS_PER_TOKEN)
const tools = toolsByTurn.get(currentTurnId || messageId) ?? []
const costUSD = calculateCost(model, inputTokens, outputTokens + reasoningTokens, 0, 0, 0)

results.push({
provider: 'copilot',
model,
inputTokens,
outputTokens,
cacheCreationInputTokens: 0,
cacheReadInputTokens: 0,
cachedInputTokens: 0,
reasoningTokens,
webSearchRequests: 0,
costUSD,
tools,
bashCommands: [],
timestamp: e.timestamp ?? '',
speed: 'standard',
deduplicationKey: dedupKey,
userMessage: userMsg,
sessionId,
})

// Only count user message once per assistant turn
userMsg = ''
}
}

return results
}

function createParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
return {
async *parse(): AsyncGenerator<ParsedProviderCall> {
Expand All @@ -343,9 +499,21 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
? basename(source.path, '.jsonl')
: basename(dirname(source.path))

const calls = isTranscriptFormat(content)
? parseTranscriptEvents(content, sessionId, seenKeys)
: parseLegacyEvents(content, sessionId, seenKeys)
let calls: ParsedProviderCall[]
if (isTranscriptFormat(content)) {
calls = parseTranscriptEvents(content, sessionId, seenKeys)
} else if (isJetBrainsFormat(content)) {
calls = parseJetBrainsEvents(content, sessionId, seenKeys)
// Infer project name from tool paths now that content is loaded
const inferredProject = inferJBProjectFromContent(content)
if (inferredProject) {
for (const call of calls) {
call.project = inferredProject
}
}
} else {
calls = parseLegacyEvents(content, sessionId, seenKeys)
}

for (const call of calls) {
yield call
Expand Down Expand Up @@ -409,6 +577,78 @@ async function readWorkspaceProject(workspaceDir: string): Promise<string> {
return basename(workspaceDir)
}

function getJetBrainsSessionDir(override?: string): string {
return override ?? join(homedir(), '.copilot', 'jb')
}

async function discoverJetBrainsSessions(jbDir: string): Promise<SessionSource[]> {
const sources: SessionSource[] = []

let sessionDirs: string[]
try {
sessionDirs = await readdir(jbDir)
} catch {
return sources
}

for (const sessionId of sessionDirs) {
const sessionPath = join(jbDir, sessionId)
const s = await stat(sessionPath).catch(() => null)
if (!s?.isDirectory()) continue

let partitions: string[]
try {
partitions = await readdir(sessionPath)
} catch {
continue
}

for (const file of partitions) {
if (!file.endsWith('.jsonl')) continue
const filePath = join(sessionPath, file)
const fs = await stat(filePath).catch(() => null)
if (!fs?.isFile()) continue
sources.push({ path: filePath, project: sessionId, provider: 'copilot' })
}
}

return sources
}

/** Infer a project name from tool execution paths in already-loaded content. */
function inferJBProjectFromContent(content: string): string | null {
const homeParts = homedir().split(sep)
const homeDepth = homeParts.length
const lines = content.split('\n')
const limit = Math.min(lines.length, 200)

for (let i = 0; i < limit; i++) {
const line = lines[i]
if (!line) continue
try {
const e = JSON.parse(line)
if (e.type === 'tool.execution_start') {
const args = e.data?.arguments
if (typeof args === 'object' && args !== null && typeof args.path === 'string') {
const pathVal: string = args.path
const parts = pathVal.split('/')
if (parts.length > homeDepth + 1) {
const afterHome = parts.slice(homeDepth)
if (afterHome.length >= 2) {
return basename(afterHome.slice(0, afterHome.length > 2 ? 2 : afterHome.length).join('/'))
|| afterHome[0] || null
}
return afterHome[0] || null
}
}
}
} catch {
continue
}
}
return null
}

async function discoverLegacySessions(sessionStateDir: string): Promise<SessionSource[]> {
const sources: SessionSource[] = []

Expand Down Expand Up @@ -472,9 +712,10 @@ async function discoverVSCodeTranscripts(workspaceStorageDir: string): Promise<S
return sources
}

export function createCopilotProvider(sessionStateDir?: string, workspaceStorageDirOverride?: string): Provider {
export function createCopilotProvider(sessionStateDir?: string, workspaceStorageDirOverride?: string, jbDirOverride?: string): Provider {
const legacyDir = getCopilotSessionStateDir(sessionStateDir)
const vscodeDirs = workspaceStorageDirOverride != null ? [workspaceStorageDirOverride] : getVSCodeWorkspaceStorageDirs()
const jbDir = getJetBrainsSessionDir(jbDirOverride)

return {
name: 'copilot',
Expand All @@ -495,11 +736,12 @@ export function createCopilotProvider(sessionStateDir?: string, workspaceStorage
},

async discoverSessions(): Promise<SessionSource[]> {
const [legacy, ...vscodeResults] = await Promise.all([
const [legacy, jb, ...vscodeResults] = await Promise.all([
discoverLegacySessions(legacyDir),
discoverJetBrainsSessions(jbDir),
...vscodeDirs.map(discoverVSCodeTranscripts),
])
return [...legacy, ...vscodeResults.flat()]
return [...legacy, ...jb, ...vscodeResults.flat()]
},

createSessionParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
Expand Down