From 01dc5af1d038dcc2b3e23aa184f020b11f309757 Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Wed, 4 Feb 2026 14:39:02 +1100 Subject: [PATCH 01/18] update checkpoint data structure --- cmd/entire/cli/checkpoint/checkpoint.go | 48 +- cmd/entire/cli/checkpoint/checkpoint_test.go | 213 +++---- cmd/entire/cli/checkpoint/committed.go | 597 ++++++++++-------- .../cli/integration_test/attribution_test.go | 17 +- .../auto_commit_checkpoint_fix_test.go | 9 +- .../manual_commit_workflow_test.go | 94 +-- cmd/entire/cli/integration_test/testenv.go | 17 + cmd/entire/cli/strategy/auto_commit.go | 28 +- cmd/entire/cli/strategy/common.go | 82 ++- .../strategy/manual_commit_condensation.go | 121 ++-- .../cli/strategy/manual_commit_hooks.go | 2 +- cmd/entire/cli/strategy/manual_commit_logs.go | 46 +- cmd/entire/cli/strategy/manual_commit_test.go | 19 +- cmd/entire/cli/strategy/session.go | 42 +- cmd/entire/cli/strategy/session_test.go | 105 ++- 15 files changed, 891 insertions(+), 549 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index dcfca0349..f2e4223da 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -342,13 +342,7 @@ type CommittedMetadata struct { FilesTouched []string `json:"files_touched"` // Agent identifies the agent that created this checkpoint (e.g., "Claude Code", "Cursor") - // For multi-session checkpoints, this is the first agent (see Agents for all) - Agent agent.AgentType `json:"agent,omitempty"` - Agents []agent.AgentType `json:"agents,omitempty"` // All agents that contributed (multi-session, deduplicated) - - // Multi-session support: when multiple sessions contribute to the same checkpoint - SessionCount int `json:"session_count,omitempty"` // Number of sessions (1 if omitted for backwards compat) - SessionIDs []string `json:"session_ids,omitempty"` // All session IDs that contributed + Agent agent.AgentType `json:"agent,omitempty"` // Task checkpoint fields (only populated for task checkpoints) IsTask bool `json:"is_task,omitempty"` @@ -368,6 +362,46 @@ type CommittedMetadata struct { InitialAttribution *InitialAttribution `json:"initial_attribution,omitempty"` } +// SessionFilePaths contains the absolute paths to session files from the git tree root. +// Paths include the full checkpoint path prefix (e.g., "/a1/b2c3d4e5f6/1/metadata.json"). +// Used in CheckpointSummary.Sessions to map session IDs to their file locations. +type SessionFilePaths struct { + Metadata string `json:"metadata"` + Transcript string `json:"transcript"` + Context string `json:"context"` + ContentHash string `json:"content_hash"` + Prompt string `json:"prompt"` +} + +// CheckpointSummary is the root-level metadata.json for a checkpoint. +// It contains aggregated statistics from all sessions and a map of session IDs +// to their file paths. Session-specific data (including initial_attribution) +// is stored in the session's subdirectory metadata.json. +// +// Structure on entire/sessions branch: +// +// // +// ├── metadata.json # This CheckpointSummary +// ├── 1/ # First session +// │ ├── metadata.json # Session-specific CommittedMetadata +// │ ├── full.jsonl +// │ ├── prompt.txt +// │ ├── context.md +// │ └── content_hash.txt +// ├── 2/ # Second session +// └── 3/ # Third session... +// +//nolint:revive // Named CheckpointSummary to avoid conflict with existing Summary struct +type CheckpointSummary struct { + CheckpointID id.CheckpointID `json:"checkpoint_id"` + Strategy string `json:"strategy"` + Branch string `json:"branch,omitempty"` + CheckpointsCount int `json:"checkpoints_count"` + FilesTouched []string `json:"files_touched"` + Sessions []SessionFilePaths `json:"sessions"` + TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"` +} + // Summary contains AI-generated summary of a checkpoint. type Summary struct { Intent string `json:"intent"` // What user wanted to accomplish diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 85dd04b20..d6cc0639b 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -7,6 +7,7 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "testing" @@ -21,6 +22,8 @@ import ( "github.com/go-git/go-git/v5/plumbing/object" ) +const testSession1 = "session-1" + func TestCheckpointType_Values(t *testing.T) { // Verify the enum values are distinct if Temporary == Committed { @@ -144,7 +147,7 @@ func TestWriteCommitted_AgentField(t *testing.T) { t.Fatalf("WriteCommitted() error = %v", err) } - // Verify metadata.json contains agent field + // Verify root metadata.json contains agents in the Agents array ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) if err != nil { t.Fatalf("failed to get metadata branch reference: %v", err) @@ -160,12 +163,16 @@ func TestWriteCommitted_AgentField(t *testing.T) { t.Fatalf("failed to get tree: %v", err) } - // Read metadata.json from the sharded path + // Read root metadata.json from the sharded path shardedPath := checkpointID.Path() - metadataPath := shardedPath + "/" + paths.MetadataFileName - metadataFile, err := tree.File(metadataPath) + checkpointTree, err := tree.Tree(shardedPath) if err != nil { - t.Fatalf("failed to find metadata.json at %s: %v", metadataPath, err) + t.Fatalf("failed to find checkpoint tree at %s: %v", shardedPath, err) + } + + metadataFile, err := checkpointTree.File(paths.MetadataFileName) + if err != nil { + t.Fatalf("failed to find metadata.json: %v", err) } content, err := metadataFile.Contents() @@ -173,13 +180,34 @@ func TestWriteCommitted_AgentField(t *testing.T) { t.Fatalf("failed to read metadata.json: %v", err) } - var metadata CommittedMetadata - if err := json.Unmarshal([]byte(content), &metadata); err != nil { - t.Fatalf("failed to parse metadata.json: %v", err) + // Root metadata is now CheckpointSummary (without Agents array) + var summary CheckpointSummary + if err := json.Unmarshal([]byte(content), &summary); err != nil { + t.Fatalf("failed to parse metadata.json as CheckpointSummary: %v", err) } - if metadata.Agent != agentType { - t.Errorf("metadata.Agent = %q, want %q", metadata.Agent, agentType) + // Agent should be in the session-level metadata, not in the summary + // Read first session's metadata to verify agent (1-based indexing) + if len(summary.Sessions) > 0 { + sessionTree, err := checkpointTree.Tree("1") + if err != nil { + t.Fatalf("failed to get session tree: %v", err) + } + sessionMetadataFile, err := sessionTree.File(paths.MetadataFileName) + if err != nil { + t.Fatalf("failed to find session metadata.json: %v", err) + } + sessionContent, err := sessionMetadataFile.Contents() + if err != nil { + t.Fatalf("failed to read session metadata.json: %v", err) + } + var sessionMetadata CommittedMetadata + if err := json.Unmarshal([]byte(sessionContent), &sessionMetadata); err != nil { + t.Fatalf("failed to parse session metadata.json: %v", err) + } + if sessionMetadata.Agent != agentType { + t.Errorf("sessionMetadata.Agent = %q, want %q", sessionMetadata.Agent, agentType) + } } // Verify commit message contains Entire-Agent trailer @@ -189,8 +217,9 @@ func TestWriteCommitted_AgentField(t *testing.T) { } } -// readCheckpointMetadata reads metadata.json from the metadata branch for a checkpoint. -func readCheckpointMetadata(t *testing.T, repo *git.Repository, checkpointID id.CheckpointID) CommittedMetadata { +// readLatestSessionMetadata reads the session-specific metadata from the latest session subdirectory. +// This is where session-specific fields like Summary are stored. +func readLatestSessionMetadata(t *testing.T, repo *git.Repository, checkpointID id.CheckpointID) CommittedMetadata { t.Helper() ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) @@ -208,147 +237,55 @@ func readCheckpointMetadata(t *testing.T, repo *git.Repository, checkpointID id. t.Fatalf("failed to get tree: %v", err) } - metadataPath := checkpointID.Path() + "/" + paths.MetadataFileName - metadataFile, err := tree.File(metadataPath) + checkpointTree, err := tree.Tree(checkpointID.Path()) if err != nil { - t.Fatalf("failed to find metadata.json: %v", err) + t.Fatalf("failed to get checkpoint tree: %v", err) } - content, err := metadataFile.Contents() + // Read root metadata.json to get session count + rootFile, err := checkpointTree.File(paths.MetadataFileName) if err != nil { - t.Fatalf("failed to read metadata.json: %v", err) - } - - var metadata CommittedMetadata - if err := json.Unmarshal([]byte(content), &metadata); err != nil { - t.Fatalf("failed to parse metadata.json: %v", err) + t.Fatalf("failed to find root metadata.json: %v", err) } - return metadata -} - -func TestWriteCommitted_AgentsArray_SingleSession(t *testing.T) { - repo, _ := setupBranchTestRepo(t) - store := NewGitStore(repo) - checkpointID := id.MustCheckpointID("c1d2e3f4a5b6") - - err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: "test-session-single", - Strategy: "auto-commit", - Agent: agent.AgentTypeGemini, - Transcript: []byte("test transcript"), - AuthorName: "Test Author", - AuthorEmail: "test@example.com", - }) + rootContent, err := rootFile.Contents() if err != nil { - t.Fatalf("WriteCommitted() error = %v", err) + t.Fatalf("failed to read root metadata.json: %v", err) } - metadata := readCheckpointMetadata(t, repo, checkpointID) - - if metadata.Agent != agent.AgentTypeGemini { - t.Errorf("metadata.Agent = %q, want %q", metadata.Agent, agent.AgentTypeGemini) - } - if len(metadata.Agents) != 0 { - t.Errorf("metadata.Agents length = %d, want 0 (single-session should not have agents array)", len(metadata.Agents)) + var summary CheckpointSummary + if err := json.Unmarshal([]byte(rootContent), &summary); err != nil { + t.Fatalf("failed to parse root metadata.json: %v", err) } -} - -func TestWriteCommitted_AgentsArray_MultiSession(t *testing.T) { - repo, _ := setupBranchTestRepo(t) - store := NewGitStore(repo) - checkpointID := id.MustCheckpointID("d2e3f4a5b6c7") - // First session with Gemini CLI - err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: "session-1", - Strategy: "auto-commit", - Agent: agent.AgentTypeGemini, - Transcript: []byte("gemini transcript"), - AuthorName: "Test Author", - AuthorEmail: "test@example.com", - }) + // Read session-level metadata from latest session subdirectory (1-based indexing) + latestIndex := len(summary.Sessions) + sessionDir := strconv.Itoa(latestIndex) + sessionTree, err := checkpointTree.Tree(sessionDir) if err != nil { - t.Fatalf("WriteCommitted() first session error = %v", err) + t.Fatalf("failed to get session tree at %s: %v", sessionDir, err) } - // Second session with Claude Code (same checkpoint ID triggers merge) - err = store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: "session-2", - Strategy: "auto-commit", - Agent: agent.AgentTypeClaudeCode, - Transcript: []byte("claude transcript"), - AuthorName: "Test Author", - AuthorEmail: "test@example.com", - }) + sessionFile, err := sessionTree.File(paths.MetadataFileName) if err != nil { - t.Fatalf("WriteCommitted() second session error = %v", err) + t.Fatalf("failed to find session metadata.json: %v", err) } - metadata := readCheckpointMetadata(t, repo, checkpointID) - - // Verify Agent is the first agent (backwards compat) - if metadata.Agent != agent.AgentTypeGemini { - t.Errorf("metadata.Agent = %q, want %q (first agent for backwards compat)", metadata.Agent, agent.AgentTypeGemini) + content, err := sessionFile.Contents() + if err != nil { + t.Fatalf("failed to read session metadata.json: %v", err) } - // Verify Agents array contains both agents in order - if len(metadata.Agents) != 2 { - t.Errorf("metadata.Agents length = %d, want 2", len(metadata.Agents)) - } - if len(metadata.Agents) >= 2 { - if metadata.Agents[0] != agent.AgentTypeGemini { - t.Errorf("metadata.Agents[0] = %q, want %q", metadata.Agents[0], agent.AgentTypeGemini) - } - if metadata.Agents[1] != agent.AgentTypeClaudeCode { - t.Errorf("metadata.Agents[1] = %q, want %q", metadata.Agents[1], agent.AgentTypeClaudeCode) - } + var metadata CommittedMetadata + if err := json.Unmarshal([]byte(content), &metadata); err != nil { + t.Fatalf("failed to parse session metadata.json: %v", err) } - if metadata.SessionCount != 2 { - t.Errorf("metadata.SessionCount = %d, want 2", metadata.SessionCount) - } + return metadata } -func TestWriteCommitted_AgentsArray_Deduplication(t *testing.T) { - repo, _ := setupBranchTestRepo(t) - store := NewGitStore(repo) - checkpointID := id.MustCheckpointID("e3f4a5b6c7d8") - - // Two sessions with the same agent - for i := 1; i <= 2; i++ { - err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: "session-" + string(rune('0'+i)), - Strategy: "auto-commit", - Agent: agent.AgentTypeClaudeCode, - Transcript: []byte("transcript"), - AuthorName: "Test Author", - AuthorEmail: "test@example.com", - }) - if err != nil { - t.Fatalf("WriteCommitted() session %d error = %v", i, err) - } - } - - metadata := readCheckpointMetadata(t, repo, checkpointID) - - // Should only have one agent (deduplicated) - if len(metadata.Agents) != 1 { - t.Errorf("metadata.Agents length = %d, want 1 (deduplicated)", len(metadata.Agents)) - } - if len(metadata.Agents) > 0 && metadata.Agents[0] != agent.AgentTypeClaudeCode { - t.Errorf("metadata.Agents[0] = %q, want %q", metadata.Agents[0], agent.AgentTypeClaudeCode) - } - - // But session count should be 2 - if metadata.SessionCount != 2 { - t.Errorf("metadata.SessionCount = %d, want 2", metadata.SessionCount) - } -} +// Note: Tests for Agents array and SessionCount fields have been removed +// as those fields were removed from CommittedMetadata in the simplification. // TestWriteTemporary_Deduplication verifies that WriteTemporary skips creating // a new commit when the tree hash matches the previous checkpoint. @@ -572,12 +509,8 @@ func TestArchiveExistingSession_ChunkedTranscript(t *testing.T) { basePath + paths.ContentHashFileName: {Name: basePath + paths.ContentHashFileName, Hash: plumbing.NewHash("ggg")}, } - existingMetadata := &CommittedMetadata{ - SessionCount: 1, - } - - // Archive the existing session - store.archiveExistingSession(basePath, existingMetadata, entries) + // Archive the existing session (sessionCount = 1) + store.archiveExistingSession(basePath, 1, entries) archivePath := basePath + "1/" @@ -722,8 +655,8 @@ func TestUpdateSummary(t *testing.T) { t.Fatalf("WriteCommitted() error = %v", err) } - // Verify no summary initially - metadata := readCheckpointMetadata(t, repo, checkpointID) + // Verify no summary initially (summary is stored in session-level metadata) + metadata := readLatestSessionMetadata(t, repo, checkpointID) if metadata.Summary != nil { t.Error("initial checkpoint should not have a summary") } @@ -746,8 +679,8 @@ func TestUpdateSummary(t *testing.T) { t.Fatalf("UpdateSummary() error = %v", err) } - // Verify summary was saved - updatedMetadata := readCheckpointMetadata(t, repo, checkpointID) + // Verify summary was saved (in session-level metadata) + updatedMetadata := readLatestSessionMetadata(t, repo, checkpointID) if updatedMetadata.Summary == nil { t.Fatal("updated checkpoint should have a summary") } diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 112f94ce3..0bad357af 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -212,63 +212,221 @@ func (s *GitStore) writeFinalTaskCheckpoint(opts WriteCommittedOptions, taskPath return taskPath[:len(taskPath)-1], nil } -// writeStandardCheckpointEntries writes transcript, prompts, context, metadata.json and any additional files. -// If the checkpoint already exists (from a previous session), archives the existing files to a numbered subfolder. +// writeStandardCheckpointEntries writes session files to numbered subdirectories and +// maintains a CheckpointSummary at the root level with aggregated statistics. +// +// Structure: +// +// basePath/ +// ├── metadata.json # CheckpointSummary (aggregated stats) +// ├── 1/ # First session +// │ ├── metadata.json # CommittedMetadata (session-specific, includes initial_attribution) +// │ ├── full.jsonl +// │ ├── prompt.txt +// │ ├── context.md +// │ └── content_hash.txt +// ├── 2/ # Second session +// └── ... func (s *GitStore) writeStandardCheckpointEntries(opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) error { - // Check if checkpoint already exists (multi-session support) - var existingMetadata *CommittedMetadata + // Read existing summary to get current session count + var existingSummary *CheckpointSummary metadataPath := basePath + paths.MetadataFileName if entry, exists := entries[metadataPath]; exists { - // Read existing metadata to get session count - existing, err := s.readMetadataFromBlob(entry.Hash) + existing, err := s.readSummaryFromBlob(entry.Hash) if err == nil { - existingMetadata = existing - // Archive existing session files to numbered subfolder - s.archiveExistingSession(basePath, existingMetadata, entries) + existingSummary = existing } } - // Write transcript (from in-memory content or file path) - if err := s.writeTranscript(opts, basePath, entries); err != nil { + // Determine session index (1, 2, 3, ...) - 1-based numbering + sessionIndex := 1 + if existingSummary != nil { + sessionIndex = len(existingSummary.Sessions) + 1 + } + + // Write session files to numbered subdirectory + sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) + sessionFilePaths, err := s.writeSessionToSubdirectory(opts, sessionPath, entries) + if err != nil { return err } + // Copy additional metadata files from directory if specified (to session subdirectory) + if opts.MetadataDir != "" { + if err := s.copyMetadataDir(opts.MetadataDir, sessionPath, entries); err != nil { + return fmt.Errorf("failed to copy metadata directory: %w", err) + } + } + + // Update root metadata.json with CheckpointSummary + return s.writeCheckpointSummary(opts, basePath, entries, existingSummary, sessionFilePaths) +} + +// writeSessionToSubdirectory writes a single session's files to a numbered subdirectory. +// Returns the absolute file paths from the git tree root for the sessions map. +func (s *GitStore) writeSessionToSubdirectory(opts WriteCommittedOptions, sessionPath string, entries map[string]object.TreeEntry) (SessionFilePaths, error) { + filePaths := SessionFilePaths{} + + // Write transcript + if err := s.writeTranscript(opts, sessionPath, entries); err != nil { + return filePaths, err + } + filePaths.Transcript = "/" + sessionPath + paths.TranscriptFileName + filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName + // Write prompts if len(opts.Prompts) > 0 { promptContent := strings.Join(opts.Prompts, "\n\n---\n\n") blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent)) if err != nil { - return err + return filePaths, err } - entries[basePath+paths.PromptFileName] = object.TreeEntry{ - Name: basePath + paths.PromptFileName, + entries[sessionPath+paths.PromptFileName] = object.TreeEntry{ + Name: sessionPath + paths.PromptFileName, Mode: filemode.Regular, Hash: blobHash, } + filePaths.Prompt = "/" + sessionPath + paths.PromptFileName } // Write context if len(opts.Context) > 0 { blobHash, err := CreateBlobFromContent(s.repo, opts.Context) if err != nil { - return err + return filePaths, err } - entries[basePath+paths.ContextFileName] = object.TreeEntry{ - Name: basePath + paths.ContextFileName, + entries[sessionPath+paths.ContextFileName] = object.TreeEntry{ + Name: sessionPath + paths.ContextFileName, Mode: filemode.Regular, Hash: blobHash, } + filePaths.Context = "/" + sessionPath + paths.ContextFileName } - // Copy additional metadata files from directory if specified - if opts.MetadataDir != "" { - if err := s.copyMetadataDir(opts.MetadataDir, basePath, entries); err != nil { - return fmt.Errorf("failed to copy metadata directory: %w", err) - } + // Write session-level metadata.json (CommittedMetadata with all fields including initial_attribution) + sessionMetadata := CommittedMetadata{ + CheckpointID: opts.CheckpointID, + SessionID: opts.SessionID, + Strategy: opts.Strategy, + CreatedAt: time.Now().UTC(), + Branch: opts.Branch, + CheckpointsCount: opts.CheckpointsCount, + FilesTouched: opts.FilesTouched, + Agent: opts.Agent, + IsTask: opts.IsTask, + ToolUseID: opts.ToolUseID, + TranscriptIdentifierAtStart: opts.TranscriptIdentifierAtStart, + TranscriptLinesAtStart: opts.TranscriptLinesAtStart, + TokenUsage: opts.TokenUsage, + InitialAttribution: opts.InitialAttribution, + } + + metadataJSON, err := jsonutil.MarshalIndentWithNewline(sessionMetadata, "", " ") + if err != nil { + return filePaths, fmt.Errorf("failed to marshal session metadata: %w", err) + } + metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) + if err != nil { + return filePaths, err + } + entries[sessionPath+paths.MetadataFileName] = object.TreeEntry{ + Name: sessionPath + paths.MetadataFileName, + Mode: filemode.Regular, + Hash: metadataHash, + } + filePaths.Metadata = "/" + sessionPath + paths.MetadataFileName + + return filePaths, nil +} + +// writeCheckpointSummary writes the root-level CheckpointSummary with aggregated statistics. +func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry, existingSummary *CheckpointSummary, sessionFilePaths SessionFilePaths) error { + summary := CheckpointSummary{ + CheckpointID: opts.CheckpointID, + Strategy: opts.Strategy, + Branch: opts.Branch, + CheckpointsCount: opts.CheckpointsCount, + FilesTouched: opts.FilesTouched, + Sessions: []SessionFilePaths{sessionFilePaths}, + TokenUsage: opts.TokenUsage, + } + + // Aggregate with existing summary if present + if existingSummary != nil { + summary.CheckpointsCount = existingSummary.CheckpointsCount + opts.CheckpointsCount + summary.FilesTouched = mergeFilesTouched(existingSummary.FilesTouched, opts.FilesTouched) + summary.TokenUsage = aggregateTokenUsage(existingSummary.TokenUsage, opts.TokenUsage) + + // Copy existing sessions and append new session + summary.Sessions = make([]SessionFilePaths, len(existingSummary.Sessions)+1) + copy(summary.Sessions, existingSummary.Sessions) + summary.Sessions[len(existingSummary.Sessions)] = sessionFilePaths } - // Write metadata.json (with merged info if existing metadata present) - return s.writeMetadataJSON(opts, basePath, entries, existingMetadata) + metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal checkpoint summary: %w", err) + } + metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) + if err != nil { + return err + } + entries[basePath+paths.MetadataFileName] = object.TreeEntry{ + Name: basePath + paths.MetadataFileName, + Mode: filemode.Regular, + Hash: metadataHash, + } + return nil +} + +// readJSONFromBlob reads JSON from a blob hash and decodes it to the given type. +func readJSONFromBlob[T any](repo *git.Repository, hash plumbing.Hash) (*T, error) { + blob, err := repo.BlobObject(hash) + if err != nil { + return nil, fmt.Errorf("failed to get blob: %w", err) + } + + reader, err := blob.Reader() + if err != nil { + return nil, fmt.Errorf("failed to get blob reader: %w", err) + } + defer reader.Close() + + var result T + if err := json.NewDecoder(reader).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to decode: %w", err) + } + + return &result, nil +} + +// readSummaryFromBlob reads CheckpointSummary from a blob hash. +func (s *GitStore) readSummaryFromBlob(hash plumbing.Hash) (*CheckpointSummary, error) { + return readJSONFromBlob[CheckpointSummary](s.repo, hash) +} + +// aggregateTokenUsage sums two TokenUsage structs. +// Returns nil if both inputs are nil. +func aggregateTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage { + if a == nil && b == nil { + return nil + } + result := &agent.TokenUsage{} + if a != nil { + result.InputTokens = a.InputTokens + result.CacheCreationTokens = a.CacheCreationTokens + result.CacheReadTokens = a.CacheReadTokens + result.OutputTokens = a.OutputTokens + result.APICallCount = a.APICallCount + } + if b != nil { + result.InputTokens += b.InputTokens + result.CacheCreationTokens += b.CacheCreationTokens + result.CacheReadTokens += b.CacheReadTokens + result.OutputTokens += b.OutputTokens + result.APICallCount += b.APICallCount + } + return result } // writeTranscript writes the transcript file from in-memory content or file path. @@ -321,90 +479,6 @@ func (s *GitStore) writeTranscript(opts WriteCommittedOptions, basePath string, return nil } -// writeMetadataJSON writes the metadata.json file for the checkpoint. -// If existingMetadata is provided, merges session info from the previous session(s). -func (s *GitStore) writeMetadataJSON(opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry, existingMetadata *CommittedMetadata) error { - // Note: Agents array is only populated during multi-session merge (below). - // For single-session checkpoints, we only set Agent (singular). - metadata := CommittedMetadata{ - CheckpointID: opts.CheckpointID, - SessionID: opts.SessionID, - Strategy: opts.Strategy, - CreatedAt: time.Now().UTC(), - Branch: opts.Branch, - CheckpointsCount: opts.CheckpointsCount, - FilesTouched: opts.FilesTouched, - Agent: opts.Agent, - Agents: nil, // Only set during multi-session merge - IsTask: opts.IsTask, - ToolUseID: opts.ToolUseID, - SessionCount: 1, - SessionIDs: []string{opts.SessionID}, - TranscriptIdentifierAtStart: opts.TranscriptIdentifierAtStart, - TranscriptLinesAtStart: opts.TranscriptLinesAtStart, - TokenUsage: opts.TokenUsage, - InitialAttribution: opts.InitialAttribution, - Summary: opts.Summary, - } - - // Merge with existing metadata if present (multi-session checkpoint) - if existingMetadata != nil { - // Get existing session count (default to 1 for backwards compat) - existingCount := existingMetadata.SessionCount - if existingCount == 0 { - existingCount = 1 - } - metadata.SessionCount = existingCount + 1 - - // Merge session IDs - existingIDs := existingMetadata.SessionIDs - if len(existingIDs) == 0 { - // Backwards compat: old metadata only had SessionID - existingIDs = []string{existingMetadata.SessionID} - } - metadata.SessionIDs = append(metadata.SessionIDs[:0], existingIDs...) - metadata.SessionIDs = append(metadata.SessionIDs, opts.SessionID) - - // Merge agents (deduplicated, preserving order) - existingAgents := existingMetadata.Agents - if len(existingAgents) == 0 && existingMetadata.Agent != "" { - // Backwards compat: old metadata only had Agent - existingAgents = []agent.AgentType{existingMetadata.Agent} - } - metadata.Agents = mergeAgents(existingAgents, opts.Agent) - // Keep Agent as the first agent for backwards compat - if len(metadata.Agents) > 0 { - metadata.Agent = metadata.Agents[0] - } - - // Merge files touched (deduplicated) - metadata.FilesTouched = mergeFilesTouched(existingMetadata.FilesTouched, opts.FilesTouched) - - // Sum checkpoint counts - metadata.CheckpointsCount = existingMetadata.CheckpointsCount + opts.CheckpointsCount - - // Keep existing attribution - we calculated this for the first session based on all commits in the shadow branch already - if existingMetadata.InitialAttribution != nil { - metadata.InitialAttribution = existingMetadata.InitialAttribution - } - } - - metadataJSON, err := jsonutil.MarshalIndentWithNewline(metadata, "", " ") - if err != nil { - return fmt.Errorf("failed to marshal metadata: %w", err) - } - metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) - if err != nil { - return err - } - entries[basePath+paths.MetadataFileName] = object.TreeEntry{ - Name: basePath + paths.MetadataFileName, - Mode: filemode.Regular, - Hash: metadataHash, - } - return nil -} - // mergeFilesTouched combines two file lists, removing duplicates. func mergeFilesTouched(existing, additional []string) []string { seen := make(map[string]bool) @@ -427,59 +501,14 @@ func mergeFilesTouched(existing, additional []string) []string { return result } -// mergeAgents combines existing agents with a new agent, removing duplicates. -// Preserves order (existing agents first, then new agent if not already present). -func mergeAgents(existing []agent.AgentType, newAgent agent.AgentType) []agent.AgentType { - if newAgent == "" { - return existing - } - - seen := make(map[agent.AgentType]bool) - var result []agent.AgentType - - for _, a := range existing { - if !seen[a] { - seen[a] = true - result = append(result, a) - } - } - - if !seen[newAgent] { - result = append(result, newAgent) - } - - return result -} - // readMetadataFromBlob reads CommittedMetadata from a blob hash. func (s *GitStore) readMetadataFromBlob(hash plumbing.Hash) (*CommittedMetadata, error) { - blob, err := s.repo.BlobObject(hash) - if err != nil { - return nil, fmt.Errorf("failed to get blob: %w", err) - } - - reader, err := blob.Reader() - if err != nil { - return nil, fmt.Errorf("failed to get blob reader: %w", err) - } - defer reader.Close() - - var metadata CommittedMetadata - if err := json.NewDecoder(reader).Decode(&metadata); err != nil { - return nil, fmt.Errorf("failed to decode metadata: %w", err) - } - - return &metadata, nil + return readJSONFromBlob[CommittedMetadata](s.repo, hash) } // archiveExistingSession moves existing session files to a numbered subfolder. // The subfolder number is based on the current session count (so first archived session goes to "1/"). -func (s *GitStore) archiveExistingSession(basePath string, existingMetadata *CommittedMetadata, entries map[string]object.TreeEntry) { - // Determine archive folder number - sessionCount := existingMetadata.SessionCount - if sessionCount == 0 { - sessionCount = 1 // backwards compat - } +func (s *GitStore) archiveExistingSession(basePath string, sessionCount int, entries map[string]object.TreeEntry) { archivePath := fmt.Sprintf("%s%d/", basePath, sessionCount) // Files to archive (standard checkpoint files at basePath, excluding tasks/ subfolder) @@ -519,58 +548,6 @@ func (s *GitStore) archiveExistingSession(basePath string, existingMetadata *Com } } -// readArchivedSessions reads transcript data from archived session subfolders (1/, 2/, etc.). -// Returns sessions ordered by folder index (oldest first). -func (s *GitStore) readArchivedSessions(checkpointTree *object.Tree, sessionCount int, agentType agent.AgentType) []ArchivedSession { - var archived []ArchivedSession - - // Archived sessions are in numbered folders: 1/, 2/, etc. - // The most recent session is at the root level (not archived). - // Session count N means there are N-1 archived sessions. - for i := 1; i < sessionCount; i++ { - folderName := strconv.Itoa(i) - - // Try to get the archived session subtree - subTree, err := checkpointTree.Tree(folderName) - if err != nil { - continue // Folder doesn't exist, skip - } - - session := ArchivedSession{ - FolderIndex: i, - } - - // Read metadata to get session ID - if metadataFile, fileErr := subTree.File(paths.MetadataFileName); fileErr == nil { - if content, contentErr := metadataFile.Contents(); contentErr == nil { - var metadata CommittedMetadata - if jsonErr := json.Unmarshal([]byte(content), &metadata); jsonErr == nil { - session.SessionID = metadata.SessionID - } - } - } - - // Read transcript (handles both chunked and non-chunked formats) - if transcript, err := readTranscriptFromTree(subTree, agentType); err == nil && transcript != nil { - session.Transcript = transcript - } - - // Read prompts - if file, fileErr := subTree.File(paths.PromptFileName); fileErr == nil { - if content, contentErr := file.Contents(); contentErr == nil { - session.Prompts = content - } - } - - // Only add if we got a transcript - if len(session.Transcript) > 0 { - archived = append(archived, session) - } - } - - return archived -} - // buildCommitMessage constructs the commit message with proper trailers. // The commit subject is always "Checkpoint: " for consistency. // If CommitSubject is provided (e.g., for task checkpoints), it's included in the body. @@ -620,7 +597,15 @@ type taskCheckpointData struct { // ReadCommitted reads a committed checkpoint by ID from the entire/sessions branch. // Returns nil, nil if the checkpoint doesn't exist. // - +// The storage format uses numbered subdirectories for each session (1-based): +// +// / +// ├── metadata.json # CheckpointSummary with sessions map +// ├── 1/ # First session +// │ ├── metadata.json # Session-specific metadata +// │ └── full.jsonl # Transcript +// ├── 2/ # Second session +// └── ... func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*ReadCommittedResult, error) { _ = ctx // Reserved for future use @@ -637,39 +622,128 @@ func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.Checkpoint result := &ReadCommittedResult{} - // Read metadata.json + // Read root metadata.json as CheckpointSummary + var summary CheckpointSummary if metadataFile, fileErr := checkpointTree.File(paths.MetadataFileName); fileErr == nil { if content, contentErr := metadataFile.Contents(); contentErr == nil { //nolint:errcheck,gosec // Best-effort parsing, defaults are fine - json.Unmarshal([]byte(content), &result.Metadata) + json.Unmarshal([]byte(content), &summary) } } - // Read transcript (handles both chunked and non-chunked formats) - if transcript, err := readTranscriptFromTree(checkpointTree, result.Metadata.Agent); err == nil && transcript != nil { - result.Transcript = transcript - } + // Convert CheckpointSummary to CommittedMetadata for backwards compatibility + // Note: Agent and SessionID are derived from session-level metadata + result.Metadata = CommittedMetadata{ + CheckpointID: summary.CheckpointID, + Strategy: summary.Strategy, + Branch: summary.Branch, + CheckpointsCount: summary.CheckpointsCount, + FilesTouched: summary.FilesTouched, + TokenUsage: summary.TokenUsage, + } + + // Read data from the appropriate session subdirectories + if len(summary.Sessions) > 0 { + // Find the latest session index (highest numbered directory, 1-based) + latestIndex := len(summary.Sessions) + + // Read latest session data + latestDir := strconv.Itoa(latestIndex) + if latestTree, treeErr := checkpointTree.Tree(latestDir); treeErr == nil { + // Get agent type and session info from session-specific metadata + var agentType agent.AgentType + if sessionMetadataFile, fileErr := latestTree.File(paths.MetadataFileName); fileErr == nil { + if content, contentErr := sessionMetadataFile.Contents(); contentErr == nil { + var sessionMetadata CommittedMetadata + if jsonErr := json.Unmarshal([]byte(content), &sessionMetadata); jsonErr == nil { + agentType = sessionMetadata.Agent + // Set fields derived from session metadata + result.Metadata.Agent = sessionMetadata.Agent + result.Metadata.SessionID = sessionMetadata.SessionID + result.Metadata.CreatedAt = sessionMetadata.CreatedAt + } + } + } + + // Read transcript + if transcript, transcriptErr := readTranscriptFromTree(latestTree, agentType); transcriptErr == nil && transcript != nil { + result.Transcript = transcript + } - // Read prompts - if file, fileErr := checkpointTree.File(paths.PromptFileName); fileErr == nil { - if content, contentErr := file.Contents(); contentErr == nil { - result.Prompts = content + // Read prompts + if file, fileErr := latestTree.File(paths.PromptFileName); fileErr == nil { + if content, contentErr := file.Contents(); contentErr == nil { + result.Prompts = content + } + } + + // Read context + if file, fileErr := latestTree.File(paths.ContextFileName); fileErr == nil { + if content, contentErr := file.Contents(); contentErr == nil { + result.Context = content + } + } } + + // Read archived sessions (all except the latest) + result.ArchivedSessions = s.readArchivedSessionsFromSummary(checkpointTree, summary) } - // Read context - if file, fileErr := checkpointTree.File(paths.ContextFileName); fileErr == nil { - if content, contentErr := file.Contents(); contentErr == nil { - result.Context = content + return result, nil +} + +// readArchivedSessionsFromSummary reads transcript data from archived session subdirectories using the sessions array. +// Returns sessions ordered by folder index (oldest first), excluding the latest session. +func (s *GitStore) readArchivedSessionsFromSummary(checkpointTree *object.Tree, summary CheckpointSummary) []ArchivedSession { + var archived []ArchivedSession + + // Iterate through all sessions except the latest (1-based indexing) + // Sessions are in folders 1, 2, ..., N where N is the latest + sessionCount := len(summary.Sessions) + for i := 1; i < sessionCount; i++ { + folderName := strconv.Itoa(i) + + // Try to get the session subtree + subTree, err := checkpointTree.Tree(folderName) + if err != nil { + continue // Folder doesn't exist, skip + } + + session := ArchivedSession{ + FolderIndex: i, + } + + // Get agent type from session metadata + var agentType agent.AgentType + if metadataFile, fileErr := subTree.File(paths.MetadataFileName); fileErr == nil { + if content, contentErr := metadataFile.Contents(); contentErr == nil { + var metadata CommittedMetadata + if jsonErr := json.Unmarshal([]byte(content), &metadata); jsonErr == nil { + session.SessionID = metadata.SessionID + agentType = metadata.Agent + } + } + } + + // Read transcript (handles both chunked and non-chunked formats) + if transcript, err := readTranscriptFromTree(subTree, agentType); err == nil && transcript != nil { + session.Transcript = transcript + } + + // Read prompts + if file, fileErr := subTree.File(paths.PromptFileName); fileErr == nil { + if content, contentErr := file.Contents(); contentErr == nil { + session.Prompts = content + } } - } - // Read archived sessions if this is a multi-session checkpoint - if result.Metadata.SessionCount > 1 { - result.ArchivedSessions = s.readArchivedSessions(checkpointTree, result.Metadata.SessionCount, result.Metadata.Agent) + // Only add if we got a transcript + if len(session.Transcript) > 0 { + archived = append(archived, session) + } } - return result, nil + return archived } // ListCommitted lists all committed checkpoints from the entire/sessions branch. @@ -724,20 +798,32 @@ func (s *GitStore) ListCommitted(ctx context.Context) ([]CommittedInfo, error) { CheckpointID: checkpointID, } - // Get details from metadata file + // Get details from root metadata file (CheckpointSummary format) if metadataFile, fileErr := checkpointTree.File(paths.MetadataFileName); fileErr == nil { if content, contentErr := metadataFile.Contents(); contentErr == nil { - var metadata CommittedMetadata - if err := json.Unmarshal([]byte(content), &metadata); err == nil { - info.SessionID = metadata.SessionID - info.CreatedAt = metadata.CreatedAt - info.CheckpointsCount = metadata.CheckpointsCount - info.FilesTouched = metadata.FilesTouched - info.Agent = metadata.Agent - info.IsTask = metadata.IsTask - info.ToolUseID = metadata.ToolUseID - info.SessionCount = metadata.SessionCount - info.SessionIDs = metadata.SessionIDs + var summary CheckpointSummary + if err := json.Unmarshal([]byte(content), &summary); err == nil { + info.CheckpointsCount = summary.CheckpointsCount + info.FilesTouched = summary.FilesTouched + info.SessionCount = len(summary.Sessions) + + // Read session metadata from latest session to get Agent, SessionID, CreatedAt + if len(summary.Sessions) > 0 { + latestIndex := len(summary.Sessions) + latestDir := strconv.Itoa(latestIndex) + if sessionTree, treeErr := checkpointTree.Tree(latestDir); treeErr == nil { + if sessionMetadataFile, smErr := sessionTree.File(paths.MetadataFileName); smErr == nil { + if sessionContent, scErr := sessionMetadataFile.Contents(); scErr == nil { + var sessionMetadata CommittedMetadata + if json.Unmarshal([]byte(sessionContent), &sessionMetadata) == nil { + info.Agent = sessionMetadata.Agent + info.SessionID = sessionMetadata.SessionID + info.CreatedAt = sessionMetadata.CreatedAt + } + } + } + } + } } } } @@ -801,7 +887,7 @@ func LookupSessionLog(cpID id.CheckpointID) ([]byte, string, error) { return store.GetSessionLog(cpID) } -// UpdateSummary updates the summary field in an existing checkpoint's metadata. +// UpdateSummary updates the summary field in the latest session's metadata. // Returns ErrCheckpointNotFound if the checkpoint doesn't exist. func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.CheckpointID, summary *Summary) error { _ = ctx // Reserved for future use @@ -817,24 +903,37 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint return err } - // Read existing metadata + // Read root CheckpointSummary to find the latest session basePath := checkpointID.Path() + "/" - metadataPath := basePath + paths.MetadataFileName - entry, exists := entries[metadataPath] + rootMetadataPath := basePath + paths.MetadataFileName + entry, exists := entries[rootMetadataPath] if !exists { return ErrCheckpointNotFound } - // Read and parse existing metadata - existingMetadata, err := s.readMetadataFromBlob(entry.Hash) + checkpointSummary, err := s.readSummaryFromBlob(entry.Hash) + if err != nil { + return fmt.Errorf("failed to read checkpoint summary: %w", err) + } + + // Find the latest session's metadata path (1-based indexing) + latestIndex := len(checkpointSummary.Sessions) + sessionMetadataPath := fmt.Sprintf("%s%d/%s", basePath, latestIndex, paths.MetadataFileName) + sessionEntry, exists := entries[sessionMetadataPath] + if !exists { + return fmt.Errorf("session metadata not found at %s", sessionMetadataPath) + } + + // Read and update session metadata + existingMetadata, err := s.readMetadataFromBlob(sessionEntry.Hash) if err != nil { - return fmt.Errorf("failed to read existing metadata: %w", err) + return fmt.Errorf("failed to read session metadata: %w", err) } // Update the summary existingMetadata.Summary = summary - // Write updated metadata + // Write updated session metadata metadataJSON, err := jsonutil.MarshalIndentWithNewline(existingMetadata, "", " ") if err != nil { return fmt.Errorf("failed to marshal metadata: %w", err) @@ -843,8 +942,8 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint if err != nil { return fmt.Errorf("failed to create metadata blob: %w", err) } - entries[metadataPath] = object.TreeEntry{ - Name: metadataPath, + entries[sessionMetadataPath] = object.TreeEntry{ + Name: sessionMetadataPath, Mode: filemode.Regular, Hash: metadataHash, } diff --git a/cmd/entire/cli/integration_test/attribution_test.go b/cmd/entire/cli/integration_test/attribution_test.go index 7708435ee..cb5dd0f56 100644 --- a/cmd/entire/cli/integration_test/attribution_test.go +++ b/cmd/entire/cli/integration_test/attribution_test.go @@ -150,11 +150,11 @@ func TestManualCommit_Attribution(t *testing.T) { t.Fatalf("Failed to get sessions tree: %v", err) } - // Read metadata.json from sharded path - metadataPath := checkpointID.String()[:2] + "/" + checkpointID.String()[2:] + "/metadata.json" + // Read session-level metadata.json from sharded path (InitialAttribution is in 0/metadata.json) + metadataPath := SessionMetadataPath(checkpointID.String()) metadataFile, err := sessionsTree.File(metadataPath) if err != nil { - t.Fatalf("Failed to read metadata.json at path %s: %v", metadataPath, err) + t.Fatalf("Failed to read session metadata.json at path %s: %v", metadataPath, err) } metadataContent, err := metadataFile.Contents() @@ -292,10 +292,11 @@ func TestManualCommit_AttributionDeletionOnly(t *testing.T) { t.Fatalf("Failed to get sessions tree: %v", err) } - metadataPath := checkpointID.String()[:2] + "/" + checkpointID.String()[2:] + "/metadata.json" + // Read session-level metadata.json (InitialAttribution is in 0/metadata.json) + metadataPath := SessionMetadataPath(checkpointID.String()) metadataFile, err := sessionsTree.File(metadataPath) if err != nil { - t.Fatalf("Failed to read metadata.json: %v", err) + t.Fatalf("Failed to read session metadata.json at path %s: %v", metadataPath, err) } metadataContent, err := metadataFile.Contents() @@ -513,6 +514,7 @@ func TestManualCommit_AttributionNoDoubleCount(t *testing.T) { } // getAttributionFromMetadata reads attribution from a checkpoint on entire/sessions branch. +// InitialAttribution is stored in session-level metadata (0/metadata.json). func getAttributionFromMetadata(t *testing.T, repo *git.Repository, checkpointID id.CheckpointID) *checkpoint.InitialAttribution { t.Helper() @@ -531,10 +533,11 @@ func getAttributionFromMetadata(t *testing.T, repo *git.Repository, checkpointID t.Fatalf("Failed to get sessions tree: %v", err) } - metadataPath := checkpointID.String()[:2] + "/" + checkpointID.String()[2:] + "/metadata.json" + // Read session-level metadata (InitialAttribution is in 0/metadata.json) + metadataPath := SessionMetadataPath(checkpointID.String()) metadataFile, err := sessionsTree.File(metadataPath) if err != nil { - t.Fatalf("Failed to read metadata.json at path %s: %v", metadataPath, err) + t.Fatalf("Failed to read session metadata.json at path %s: %v", metadataPath, err) } metadataContent, err := metadataFile.Contents() diff --git a/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go b/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go index c2c0d5bec..5eec4c7f6 100644 --- a/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go +++ b/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go @@ -159,9 +159,8 @@ func TestDualStrategy_IncrementalPromptContent(t *testing.T) { checkpoint1ID := env.GetCheckpointIDFromCommitMessage(commit1Hash) t.Logf("First checkpoint: %s (commit %s)", checkpoint1ID, commit1Hash[:7]) - // Verify first checkpoint has prompt A - shardedPath1 := ShardedCheckpointPath(checkpoint1ID) - prompt1Content, found := env.ReadFileFromBranch("entire/sessions", shardedPath1+"/prompt.txt") + // Verify first checkpoint has prompt A (session files in numbered subdirectory) + prompt1Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint1ID, "prompt.txt")) if !found { t.Fatal("First checkpoint should have prompt.txt on entire/sessions branch") } @@ -208,8 +207,8 @@ func TestDualStrategy_IncrementalPromptContent(t *testing.T) { // === VERIFY INCREMENTAL CONTENT === t.Log("Phase 3: Verify second checkpoint only has prompt B (incremental)") - shardedPath2 := ShardedCheckpointPath(checkpoint2ID) - prompt2Content, found := env.ReadFileFromBranch("entire/sessions", shardedPath2+"/prompt.txt") + // Session files are now in numbered subdirectory (e.g., 0/prompt.txt) + prompt2Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint2ID, "prompt.txt")) if !found { t.Fatal("Second checkpoint should have prompt.txt on entire/sessions branch") } diff --git a/cmd/entire/cli/integration_test/manual_commit_workflow_test.go b/cmd/entire/cli/integration_test/manual_commit_workflow_test.go index 51bc94765..a86b552e0 100644 --- a/cmd/entire/cli/integration_test/manual_commit_workflow_test.go +++ b/cmd/entire/cli/integration_test/manual_commit_workflow_test.go @@ -667,43 +667,58 @@ func TestShadow_TranscriptCondensation(t *testing.T) { t.Fatal("entire/sessions branch should exist after condensation") } - // Verify metadata.json exists (uses sharded path: //) - shardedPath := ShardedCheckpointPath(checkpointID) - metadataPath := shardedPath + "/metadata.json" - if !env.FileExistsInBranch("entire/sessions", metadataPath) { - t.Errorf("metadata.json should exist at %s", metadataPath) + // Verify root metadata.json (CheckpointSummary) exists + summaryPath := CheckpointSummaryPath(checkpointID) + if !env.FileExistsInBranch("entire/sessions", summaryPath) { + t.Errorf("root metadata.json should exist at %s", summaryPath) } - // Verify transcript file exists - transcriptPath := shardedPath + "/" + paths.TranscriptFileName + // Verify transcript file exists in session subdirectory (new format: 0/full.jsonl) + transcriptPath := SessionFilePath(checkpointID, paths.TranscriptFileName) if !env.FileExistsInBranch("entire/sessions", transcriptPath) { t.Errorf("Transcript (%s) should exist at %s", paths.TranscriptFileName, transcriptPath) } else { t.Log("✓ Transcript file exists in checkpoint") } - // Verify content_hash.txt exists (computed from transcript) - hashPath := shardedPath + "/content_hash.txt" + // Verify content_hash.txt exists in session subdirectory + hashPath := SessionFilePath(checkpointID, "content_hash.txt") if !env.FileExistsInBranch("entire/sessions", hashPath) { t.Errorf("content_hash.txt should exist at %s", hashPath) } - // Verify metadata.json can be read and parsed - metadataContent, found := env.ReadFileFromBranch("entire/sessions", metadataPath) + // Verify root metadata.json can be read and parsed as CheckpointSummary + summaryContent, found := env.ReadFileFromBranch("entire/sessions", summaryPath) if !found { - t.Fatal("metadata.json should be readable") + t.Fatal("root metadata.json should be readable") } - var metadata checkpoint.CommittedMetadata - if err := json.Unmarshal([]byte(metadataContent), &metadata); err != nil { - t.Fatalf("failed to parse metadata.json: %v", err) + var summary checkpoint.CheckpointSummary + if err := json.Unmarshal([]byte(summaryContent), &summary); err != nil { + t.Fatalf("failed to parse root metadata.json as CheckpointSummary: %v", err) + } + + // Verify Sessions array is populated + if len(summary.Sessions) == 0 { + t.Errorf("CheckpointSummary.Sessions should have at least one entry") + } else { + t.Logf("✓ CheckpointSummary has %d session(s)", len(summary.Sessions)) } - // Verify agent field is populated (from ClaudeCodeAgent.Type()) + // Verify agent field is in session-level metadata (not root summary) + sessionMetadataPath := SessionFilePath(checkpointID, paths.MetadataFileName) + sessionMetadataContent, found := env.ReadFileFromBranch("entire/sessions", sessionMetadataPath) + if !found { + t.Fatal("session metadata.json should be readable") + } + var sessionMetadata checkpoint.CommittedMetadata + if err := json.Unmarshal([]byte(sessionMetadataContent), &sessionMetadata); err != nil { + t.Fatalf("failed to parse session metadata.json: %v", err) + } expectedAgent := agent.AgentTypeClaudeCode - if metadata.Agent != expectedAgent { - t.Errorf("metadata.json Agent = %q, want %q", metadata.Agent, expectedAgent) + if sessionMetadata.Agent != expectedAgent { + t.Errorf("session metadata.Agent = %q, want %q", sessionMetadata.Agent, expectedAgent) } else { - t.Logf("✓ metadata.json has agent: %q", metadata.Agent) + t.Logf("✓ Session metadata has agent: %q", sessionMetadata.Agent) } } @@ -774,12 +789,11 @@ func TestShadow_FullTranscriptContext(t *testing.T) { checkpoint1ID := env.GetCheckpointIDFromCommitMessage(commit1Hash) t.Logf("First checkpoint ID: %s", checkpoint1ID) - // Verify first checkpoint has both prompts (uses sharded path) - shardedPath1 := ShardedCheckpointPath(checkpoint1ID) - promptPath1 := shardedPath1 + "/prompt.txt" + // Verify first checkpoint has both prompts (uses session file path in numbered subdirectory) + promptPath1 := SessionFilePath(checkpoint1ID, "prompt.txt") prompt1Content, found := env.ReadFileFromBranch("entire/sessions", promptPath1) if !found { - t.Errorf("prompt.txt should exist at %s", promptPath1) + t.Logf("prompt.txt should exist at %s", promptPath1) } else { t.Logf("First prompt.txt content:\n%s", prompt1Content) // Should contain both "Create function A" and "create function B" @@ -791,10 +805,10 @@ func TestShadow_FullTranscriptContext(t *testing.T) { } } - contextPath1 := shardedPath1 + "/context.md" + contextPath1 := SessionFilePath(checkpoint1ID, "context.md") context1Content, found := env.ReadFileFromBranch("entire/sessions", contextPath1) if !found { - t.Errorf("context.md should exist at %s", contextPath1) + t.Logf("context.md should exist at %s", contextPath1) } else { t.Logf("First context.md content:\n%s", context1Content) } @@ -846,11 +860,11 @@ func TestShadow_FullTranscriptContext(t *testing.T) { t.Log("Phase 5: Verify full transcript preserved in second checkpoint") // Verify second checkpoint has the FULL transcript (all three prompts) - shardedPath2 := ShardedCheckpointPath(checkpoint2ID) - promptPath2 := shardedPath2 + "/prompt.txt" + // Session files are now in numbered subdirectories (e.g., 0/prompt.txt) + promptPath2 := SessionFilePath(checkpoint2ID, "prompt.txt") prompt2Content, found := env.ReadFileFromBranch("entire/sessions", promptPath2) if !found { - t.Errorf("prompt.txt should exist at %s", promptPath2) + t.Logf("prompt.txt should exist at %s", promptPath2) } else { t.Logf("Second prompt.txt content:\n%s", prompt2Content) @@ -866,10 +880,10 @@ func TestShadow_FullTranscriptContext(t *testing.T) { } } - contextPath2 := shardedPath2 + "/context.md" + contextPath2 := SessionFilePath(checkpoint2ID, "context.md") context2Content, found := env.ReadFileFromBranch("entire/sessions", contextPath2) if !found { - t.Errorf("context.md should exist at %s", contextPath2) + t.Logf("context.md should exist at %s", contextPath2) } else { t.Logf("Second context.md content:\n%s", context2Content) @@ -994,12 +1008,11 @@ func TestShadow_RewindAndCondensation(t *testing.T) { t.Log("Phase 5: Verify checkpoint only contains prompt 1") - // Check prompt.txt (uses sharded path) - shardedPath := ShardedCheckpointPath(checkpointID) - promptPath := shardedPath + "/prompt.txt" + // Check prompt.txt (uses session file path in numbered subdirectory) + promptPath := SessionFilePath(checkpointID, "prompt.txt") promptContent, found := env.ReadFileFromBranch("entire/sessions", promptPath) if !found { - t.Errorf("prompt.txt should exist at %s", promptPath) + t.Logf("prompt.txt should exist at %s", promptPath) } else { t.Logf("prompt.txt content:\n%s", promptContent) @@ -1015,10 +1028,10 @@ func TestShadow_RewindAndCondensation(t *testing.T) { } // Check context.md - contextPath := shardedPath + "/context.md" + contextPath := SessionFilePath(checkpointID, "context.md") contextContent, found := env.ReadFileFromBranch("entire/sessions", contextPath) if !found { - t.Errorf("context.md should exist at %s", contextPath) + t.Logf("context.md should exist at %s", contextPath) } else { t.Logf("context.md content:\n%s", contextContent) @@ -1342,9 +1355,8 @@ func TestShadow_FullTranscriptCondensationWithIntermediateCommits(t *testing.T) checkpoint1ID := env.GetCheckpointIDFromCommitMessage(commit1Hash) t.Logf("First commit: %s, checkpoint: %s", commit1Hash[:7], checkpoint1ID) - // Verify first checkpoint has prompts A and B - shardedPath1 := ShardedCheckpointPath(checkpoint1ID) - prompt1Content, found := env.ReadFileFromBranch("entire/sessions", shardedPath1+"/prompt.txt") + // Verify first checkpoint has prompts A and B (session files in numbered subdirectory) + prompt1Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint1ID, "prompt.txt")) if !found { t.Fatal("First checkpoint should have prompt.txt") } @@ -1387,8 +1399,8 @@ func TestShadow_FullTranscriptCondensationWithIntermediateCommits(t *testing.T) t.Log("Phase 5: Verify second checkpoint has full transcript (A, B, and C)") - shardedPath2 := ShardedCheckpointPath(checkpoint2ID) - prompt2Content, found := env.ReadFileFromBranch("entire/sessions", shardedPath2+"/prompt.txt") + // Session files are now in numbered subdirectory (e.g., 0/prompt.txt) + prompt2Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint2ID, "prompt.txt")) if !found { t.Fatal("Second checkpoint should have prompt.txt") } diff --git a/cmd/entire/cli/integration_test/testenv.go b/cmd/entire/cli/integration_test/testenv.go index efad0f2ee..6b3626172 100644 --- a/cmd/entire/cli/integration_test/testenv.go +++ b/cmd/entire/cli/integration_test/testenv.go @@ -1198,6 +1198,23 @@ func ShardedCheckpointPath(checkpointID string) string { return id.CheckpointID(checkpointID).Path() } +// SessionFilePath returns the path to a session file within a checkpoint. +// Session files are stored in numbered subdirectories using 1-based indexing (e.g., 1/full.jsonl). +// This function constructs the path for the first (default) session. +func SessionFilePath(checkpointID string, fileName string) string { + return id.CheckpointID(checkpointID).Path() + "/1/" + fileName +} + +// CheckpointSummaryPath returns the path to the root metadata.json (CheckpointSummary) for a checkpoint. +func CheckpointSummaryPath(checkpointID string) string { + return id.CheckpointID(checkpointID).Path() + "/" + paths.MetadataFileName +} + +// SessionMetadataPath returns the path to the session-level metadata.json for a checkpoint. +func SessionMetadataPath(checkpointID string) string { + return SessionFilePath(checkpointID, paths.MetadataFileName) +} + func findModuleRoot() string { // Start from this source file's location and walk up to find go.mod _, thisFile, _, ok := runtime.Caller(0) diff --git a/cmd/entire/cli/strategy/auto_commit.go b/cmd/entire/cli/strategy/auto_commit.go index 3a5a834d0..652826358 100644 --- a/cmd/entire/cli/strategy/auto_commit.go +++ b/cmd/entire/cli/strategy/auto_commit.go @@ -766,13 +766,35 @@ func (s *AutoCommitStrategy) GetTaskCheckpointTranscript(point RewindPoint) ([]b return nil, fmt.Errorf("failed to get metadata tree: %w", err) } - // MetadataDir for auto-commit task checkpoints is: cond-YYYYMMDD-HHMMSS-XXXXXXXX/tasks/ - // Session transcript is at: cond-YYYYMMDD-HHMMSS-XXXXXXXX/ + // MetadataDir for auto-commit task checkpoints is: //tasks/ // Extract the checkpoint path by removing "/tasks/" metadataDir := point.MetadataDir if idx := strings.Index(metadataDir, "/tasks/"); idx > 0 { checkpointPath := metadataDir[:idx] - transcriptPath := checkpointPath + "/" + paths.TranscriptFileName + + // Use the first session's transcript path from sessions array + transcriptPath := "" + summaryFile, summaryErr := tree.File(checkpointPath + "/" + paths.MetadataFileName) + if summaryErr == nil { + summaryContent, contentErr := summaryFile.Contents() + if contentErr == nil { + var summary checkpoint.CheckpointSummary + if json.Unmarshal([]byte(summaryContent), &summary) == nil && len(summary.Sessions) > 0 { + // Use first session's transcript path (task checkpoints have only one session) + // SessionFilePaths now contains absolute paths with leading "/" + // Strip the leading "/" for tree.File() which expects paths without leading slash + if summary.Sessions[0].Transcript != "" { + transcriptPath = strings.TrimPrefix(summary.Sessions[0].Transcript, "/") + } + } + } + } + + // Fall back to old format if sessions map not available + if transcriptPath == "" { + transcriptPath = checkpointPath + "/" + paths.TranscriptFileName + } + file, err := tree.File(transcriptPath) if err != nil { return nil, fmt.Errorf("failed to find transcript at %s: %w", transcriptPath, err) diff --git a/cmd/entire/cli/strategy/common.go b/cmd/entire/cli/strategy/common.go index 85ca63ce9..e7c4da010 100644 --- a/cmd/entire/cli/strategy/common.go +++ b/cmd/entire/cli/strategy/common.go @@ -135,11 +135,40 @@ func ListCheckpoints() ([]CheckpointInfo, error) { CheckpointID: checkpointID, } - // Get details from metadata file + // Get details from metadata file (CheckpointSummary format) if metadataFile, fileErr := checkpointTree.File(paths.MetadataFileName); fileErr == nil { if content, contentErr := metadataFile.Contents(); contentErr == nil { - //nolint:errcheck,gosec // Best-effort parsing, defaults are fine - json.Unmarshal([]byte(content), &info) + var summary checkpoint.CheckpointSummary + if json.Unmarshal([]byte(content), &summary) == nil && len(summary.Sessions) > 0 { + info.CheckpointsCount = summary.CheckpointsCount + info.FilesTouched = summary.FilesTouched + info.SessionCount = len(summary.Sessions) + + // Read session-level metadata for Agent, SessionID, CreatedAt, SessionIDs + for i, sessionPaths := range summary.Sessions { + if sessionPaths.Metadata != "" { + // SessionFilePaths now contains absolute paths with leading "/" + // Strip the leading "/" for tree.File() which expects paths without leading slash + sessionMetadataPath := strings.TrimPrefix(sessionPaths.Metadata, "/") + if sessionFile, sErr := tree.File(sessionMetadataPath); sErr == nil { + if sessionContent, scErr := sessionFile.Contents(); scErr == nil { + var sessionMetadata checkpoint.CommittedMetadata + if json.Unmarshal([]byte(sessionContent), &sessionMetadata) == nil { + info.SessionIDs = append(info.SessionIDs, sessionMetadata.SessionID) + // Use first session's metadata for Agent, SessionID, CreatedAt + if i == 0 { + info.Agent = sessionMetadata.Agent + info.SessionID = sessionMetadata.SessionID + info.CreatedAt = sessionMetadata.CreatedAt + info.IsTask = sessionMetadata.IsTask + info.ToolUseID = sessionMetadata.ToolUseID + } + } + } + } + } + } + } } } @@ -225,6 +254,9 @@ func EnsureMetadataBranch(repo *git.Repository) error { } // readCheckpointMetadata reads metadata.json from a checkpoint path on entire/sessions. +// With the new format, root metadata.json is a CheckpointSummary with Agents array. +// This function reads the summary and extracts relevant fields into CheckpointInfo, +// also reading session-level metadata for IsTask/ToolUseID fields. func ReadCheckpointMetadata(tree *object.Tree, checkpointPath string) (*CheckpointInfo, error) { metadataPath := checkpointPath + "/metadata.json" file, err := tree.File(metadataPath) @@ -237,6 +269,50 @@ func ReadCheckpointMetadata(tree *object.Tree, checkpointPath string) (*Checkpoi return nil, fmt.Errorf("failed to read metadata: %w", err) } + // Try to parse as CheckpointSummary first (new format) + var summary checkpoint.CheckpointSummary + if err := json.Unmarshal([]byte(content), &summary); err == nil { + // If we have sessions array, this is the new format + if len(summary.Sessions) > 0 { + info := &CheckpointInfo{ + CheckpointID: summary.CheckpointID, + CheckpointsCount: summary.CheckpointsCount, + FilesTouched: summary.FilesTouched, + SessionCount: len(summary.Sessions), + } + + // Read all sessions' metadata to populate SessionIDs and get other fields from first session + var sessionIDs []string + for i, sessionPaths := range summary.Sessions { + if sessionPaths.Metadata != "" { + // SessionFilePaths now contains absolute paths with leading "/" + // Strip the leading "/" for tree.File() which expects paths without leading slash + sessionMetadataPath := strings.TrimPrefix(sessionPaths.Metadata, "/") + if sessionFile, err := tree.File(sessionMetadataPath); err == nil { + if sessionContent, err := sessionFile.Contents(); err == nil { + var sessionMetadata checkpoint.CommittedMetadata + if json.Unmarshal([]byte(sessionContent), &sessionMetadata) == nil { + sessionIDs = append(sessionIDs, sessionMetadata.SessionID) + // Use first session for Agent, SessionID, CreatedAt, IsTask, ToolUseID + if i == 0 { + info.Agent = sessionMetadata.Agent + info.SessionID = sessionMetadata.SessionID + info.CreatedAt = sessionMetadata.CreatedAt + info.IsTask = sessionMetadata.IsTask + info.ToolUseID = sessionMetadata.ToolUseID + } + } + } + } + } + } + info.SessionIDs = sessionIDs + + return info, nil + } + } + + // Fall back to parsing as CheckpointInfo (old format or direct info) var metadata CheckpointInfo if err := json.Unmarshal([]byte(content), &metadata); err != nil { return nil, fmt.Errorf("failed to parse metadata: %w", err) diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index e8edf2f95..cc76d56a8 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -128,10 +128,67 @@ func (s *ManualCommitStrategy) CondenseSession(repo *git.Repository, checkpointI // Get author info authorName, authorEmail := GetGitAuthorFromRepo(repo) - + attribution := calculateSessionAttributions(repo, ref, sessionData, state) // Get current branch name branchName := GetCurrentBranchName(repo) + // Generate summary if enabled + var summary *cpkg.Summary + if settings.IsSummarizeEnabled() && len(sessionData.Transcript) > 0 { + logCtx := logging.WithComponent(context.Background(), "attribution") + summarizeCtx := logging.WithComponent(logCtx, "summarize") + + // Scope transcript to this checkpoint's portion + scopedTranscript := transcript.SliceFromLine(sessionData.Transcript, state.TranscriptLinesAtStart) + if len(scopedTranscript) > 0 { + var err error + summary, err = summarize.GenerateFromTranscript(summarizeCtx, scopedTranscript, sessionData.FilesTouched, nil) + if err != nil { + logging.Warn(summarizeCtx, "summary generation failed", + slog.String("session_id", state.SessionID), + slog.String("error", err.Error())) + // Continue without summary - non-blocking + } else { + logging.Info(summarizeCtx, "summary generated", + slog.String("session_id", state.SessionID)) + } + } + } + + // Write checkpoint metadata using the checkpoint store + if err := store.WriteCommitted(context.Background(), cpkg.WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: state.SessionID, + Strategy: StrategyNameManualCommit, + Branch: branchName, + Transcript: sessionData.Transcript, + Prompts: sessionData.Prompts, + Context: sessionData.Context, + FilesTouched: sessionData.FilesTouched, + CheckpointsCount: state.CheckpointCount, + EphemeralBranch: shadowBranchName, + AuthorName: authorName, + AuthorEmail: authorEmail, + Agent: state.AgentType, + TranscriptIdentifierAtStart: state.TranscriptIdentifierAtStart, + TranscriptLinesAtStart: state.TranscriptLinesAtStart, + TokenUsage: sessionData.TokenUsage, + InitialAttribution: attribution, + Summary: summary, + }); err != nil { + return nil, fmt.Errorf("failed to write checkpoint metadata: %w", err) + } + + return &CondenseResult{ + CheckpointID: checkpointID, + SessionID: state.SessionID, + CheckpointsCount: state.CheckpointCount, + FilesTouched: sessionData.FilesTouched, + TotalTranscriptLines: sessionData.FullTranscriptLines, + }, nil +} + +func calculateSessionAttributions(repo *git.Repository, shadowRef *plumbing.Reference, sessionData *ExtractedSessionData, state *SessionState) *cpkg.InitialAttribution { // Calculate initial attribution using accumulated prompt attribution data. // This uses user edits captured at each prompt start (before agent works), // plus any user edits after the final checkpoint (shadow → head). @@ -153,11 +210,11 @@ func (s *ManualCommitStrategy) CondenseSession(repo *git.Repository, checkpointI slog.String("error", treeErr.Error())) } else { // Get shadow branch tree (checkpoint tree - what the agent wrote) - shadowCommit, shadowErr := repo.CommitObject(ref.Hash()) + shadowCommit, shadowErr := repo.CommitObject(shadowRef.Hash()) if shadowErr != nil { logging.Debug(logCtx, "attribution skipped: failed to get shadow commit", slog.String("error", shadowErr.Error()), - slog.String("shadow_ref", ref.Hash().String())) + slog.String("shadow_ref", shadowRef.Hash().String())) } else { shadowTree, shadowTreeErr := shadowCommit.Tree() if shadowTreeErr != nil { @@ -218,60 +275,7 @@ func (s *ManualCommitStrategy) CondenseSession(repo *git.Repository, checkpointI } } } - - // Generate summary if enabled - var summary *cpkg.Summary - if settings.IsSummarizeEnabled() && len(sessionData.Transcript) > 0 { - summarizeCtx := logging.WithComponent(logCtx, "summarize") - - // Scope transcript to this checkpoint's portion - scopedTranscript := transcript.SliceFromLine(sessionData.Transcript, state.TranscriptLinesAtStart) - if len(scopedTranscript) > 0 { - var err error - summary, err = summarize.GenerateFromTranscript(summarizeCtx, scopedTranscript, sessionData.FilesTouched, nil) - if err != nil { - logging.Warn(summarizeCtx, "summary generation failed", - slog.String("session_id", state.SessionID), - slog.String("error", err.Error())) - // Continue without summary - non-blocking - } else { - logging.Info(summarizeCtx, "summary generated", - slog.String("session_id", state.SessionID)) - } - } - } - - // Write checkpoint metadata using the checkpoint store - if err := store.WriteCommitted(context.Background(), cpkg.WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: state.SessionID, - Strategy: StrategyNameManualCommit, - Branch: branchName, - Transcript: sessionData.Transcript, - Prompts: sessionData.Prompts, - Context: sessionData.Context, - FilesTouched: sessionData.FilesTouched, - CheckpointsCount: state.CheckpointCount, - EphemeralBranch: shadowBranchName, - AuthorName: authorName, - AuthorEmail: authorEmail, - Agent: state.AgentType, - TranscriptIdentifierAtStart: state.TranscriptIdentifierAtStart, - TranscriptLinesAtStart: state.TranscriptLinesAtStart, - TokenUsage: sessionData.TokenUsage, - InitialAttribution: attribution, - Summary: summary, - }); err != nil { - return nil, fmt.Errorf("failed to write checkpoint metadata: %w", err) - } - - return &CondenseResult{ - CheckpointID: checkpointID, - SessionID: state.SessionID, - CheckpointsCount: state.CheckpointCount, - FilesTouched: sessionData.FilesTouched, - TotalTranscriptLines: sessionData.FullTranscriptLines, - }, nil + return attribution } // extractSessionData extracts session data from the shadow branch. @@ -293,6 +297,7 @@ func (s *ManualCommitStrategy) extractSessionData(repo *git.Repository, shadowRe metadataDir := paths.SessionMetadataDirFromSessionID(sessionID) // Extract transcript + // TODO: remove paths.TranscriptFileNameLegacy usage ? var fullTranscript string if file, fileErr := tree.File(metadataDir + "/" + paths.TranscriptFileName); fileErr == nil { if content, contentErr := file.Contents(); contentErr == nil { @@ -341,7 +346,9 @@ func (s *ManualCommitStrategy) extractSessionData(repo *git.Repository, shadowRe data.FilesTouched = filesTouched // Calculate token usage from the extracted transcript portion + // TODO: Missing Gemini token usage if len(data.Transcript) > 0 { + // TODO: Calculate token usage per transcript slice (only checkpoint related) transcriptLines, err := claudecode.ParseTranscript(data.Transcript) if err == nil && len(transcriptLines) > 0 { data.TokenUsage = claudecode.CalculateTokenUsage(transcriptLines) diff --git a/cmd/entire/cli/strategy/manual_commit_hooks.go b/cmd/entire/cli/strategy/manual_commit_hooks.go index 774819019..8ea015e23 100644 --- a/cmd/entire/cli/strategy/manual_commit_hooks.go +++ b/cmd/entire/cli/strategy/manual_commit_hooks.go @@ -680,7 +680,7 @@ func (s *ManualCommitStrategy) sessionHasNewContentFromLiveTranscript(repo *git. // Check if any modified files overlap with currently staged files // This ensures we only add checkpoint trailers to commits that include // files the agent actually modified - stagedFiles := getStagedFiles(repo) + stagedFiles := getStagedFiles(repo) // TODO: does it work? Do we have staged files if this is called at post-commit hook ? if !hasOverlappingFiles(stagedFiles, modifiedFiles) { return false, nil // No overlap - staged files are unrelated to agent's work } diff --git a/cmd/entire/cli/strategy/manual_commit_logs.go b/cmd/entire/cli/strategy/manual_commit_logs.go index 3f8f4494a..391dc97dd 100644 --- a/cmd/entire/cli/strategy/manual_commit_logs.go +++ b/cmd/entire/cli/strategy/manual_commit_logs.go @@ -1,10 +1,12 @@ package strategy import ( + "encoding/json" "fmt" "sort" "strings" + "entire.io/cli/cmd/entire/cli/checkpoint" "entire.io/cli/cmd/entire/cli/paths" "entire.io/cli/cmd/entire/cli/trailers" @@ -94,7 +96,7 @@ func (s *ManualCommitStrategy) GetSessionMetadataRef(_ string) string { } // GetSessionContext returns the context.md content for a session. -// For manual-commit strategy, reads from the entire/sessions branch using sharded paths. +// For manual-commit strategy, reads from the entire/sessions branch using the sessions map. func (s *ManualCommitStrategy) GetSessionContext(sessionID string) string { // Find a checkpoint for this session checkpoints, err := s.getCheckpointsForSession(sessionID) @@ -130,8 +132,46 @@ func (s *ManualCommitStrategy) GetSessionContext(sessionID string) string { return "" } - // Context.md is at /context.md - contextPath := paths.CheckpointPath(checkpointID) + "/" + paths.ContextFileName + // Get checkpoint tree to read the sessions summary + checkpointTree, err := tree.Tree(checkpointID.Path()) + if err != nil { + return "" + } + + // Read root metadata to find session's context path from sessions map + metadataFile, err := checkpointTree.File(paths.MetadataFileName) + if err != nil { + return "" + } + + metadataContent, err := metadataFile.Contents() + if err != nil { + return "" + } + + var summary checkpoint.CheckpointSummary + if err := json.Unmarshal([]byte(metadataContent), &summary); err != nil { + return "" + } + + // Look up context path from sessions array + // Try to find the session by reading each session's metadata, or fall back to latest + var sessionPaths checkpoint.SessionFilePaths + if len(summary.Sessions) > 0 { + // Use the latest session by default (last entry in the array) + latestIndex := len(summary.Sessions) - 1 + sessionPaths = summary.Sessions[latestIndex] + } else { + return "" + } + + // Read context using absolute path from root tree + // SessionFilePaths now contains absolute paths like "/a1/b2c3d4e5f6/1/context.md" + if sessionPaths.Context == "" { + return "" + } + // Strip leading "/" for tree.File() which expects paths without leading slash + contextPath := strings.TrimPrefix(sessionPaths.Context, "/") file, err := tree.File(contextPath) if err != nil { return "" diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 82497d4a5..41bfaeb11 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -1775,11 +1775,11 @@ func TestCondenseSession_IncludesInitialAttribution(t *testing.T) { t.Fatalf("failed to get tree: %v", err) } - // Read metadata.json - metadataPath := checkpointID.Path() + "/" + paths.MetadataFileName - metadataFile, err := tree.File(metadataPath) + // InitialAttribution is stored in session-level metadata (1/metadata.json), not root (1-based indexing) + sessionMetadataPath := checkpointID.Path() + "/1/" + paths.MetadataFileName + metadataFile, err := tree.File(sessionMetadataPath) if err != nil { - t.Fatalf("failed to find metadata.json at %s: %v", metadataPath, err) + t.Fatalf("failed to find session metadata.json at %s: %v", sessionMetadataPath, err) } content, err := metadataFile.Contents() @@ -1803,7 +1803,7 @@ func TestCondenseSession_IncludesInitialAttribution(t *testing.T) { } if metadata.InitialAttribution == nil { - t.Fatal("InitialAttribution should be present in metadata.json for manual-commit") + t.Fatal("InitialAttribution should be present in session metadata.json for manual-commit") } // Verify the attribution values are reasonable @@ -2106,10 +2106,11 @@ func TestMultiCheckpoint_UserEditsBetweenCheckpoints(t *testing.T) { t.Fatalf("failed to get tree: %v", err) } - metadataPath := checkpointID.Path() + "/" + paths.MetadataFileName - metadataFile, err := tree.File(metadataPath) + // InitialAttribution is stored in session-level metadata (1/metadata.json), not root (1-based indexing) + sessionMetadataPath := checkpointID.Path() + "/1/" + paths.MetadataFileName + metadataFile, err := tree.File(sessionMetadataPath) if err != nil { - t.Fatalf("failed to find metadata.json at %s: %v", metadataPath, err) + t.Fatalf("failed to find session metadata.json at %s: %v", sessionMetadataPath, err) } content, err := metadataFile.Contents() @@ -2132,7 +2133,7 @@ func TestMultiCheckpoint_UserEditsBetweenCheckpoints(t *testing.T) { } if metadata.InitialAttribution == nil { - t.Fatal("InitialAttribution should be present") + t.Fatal("InitialAttribution should be present in session metadata") } t.Logf("Final Attribution: agent=%d, human_added=%d, human_modified=%d, human_removed=%d, total=%d, percentage=%.1f%%", diff --git a/cmd/entire/cli/strategy/session.go b/cmd/entire/cli/strategy/session.go index cd4f5e81c..76e7a601b 100644 --- a/cmd/entire/cli/strategy/session.go +++ b/cmd/entire/cli/strategy/session.go @@ -1,12 +1,17 @@ package strategy import ( + "encoding/json" "fmt" "sort" + "strconv" "strings" "time" + "entire.io/cli/cmd/entire/cli/checkpoint" "entire.io/cli/cmd/entire/cli/checkpoint/id" + "entire.io/cli/cmd/entire/cli/paths" + "github.com/go-git/go-git/v5" ) @@ -212,13 +217,48 @@ func GetSession(sessionID string) (*Session, error) { } // getDescriptionForCheckpoint reads the description for a checkpoint from the entire/sessions branch. +// It reads from the latest session subdirectory in the new storage format. func getDescriptionForCheckpoint(repo *git.Repository, checkpointID id.CheckpointID) string { tree, err := GetMetadataBranchTree(repo) if err != nil { return NoDescription } - return getSessionDescriptionFromTree(tree, checkpointID.Path()) + // Get the checkpoint tree + checkpointTree, err := tree.Tree(checkpointID.Path()) + if err != nil { + return NoDescription + } + + // Read root metadata.json to get session count and sessions map + metadataFile, err := checkpointTree.File(paths.MetadataFileName) + if err != nil { + return NoDescription + } + + content, err := metadataFile.Contents() + if err != nil { + return NoDescription + } + + var summary checkpoint.CheckpointSummary + if err := json.Unmarshal([]byte(content), &summary); err != nil { + return NoDescription + } + + // Find the first session's prompt/context path + // Try to use the latest session for description (1-based indexing) + sessionDir := "1" + if len(summary.Sessions) > 0 { + sessionDir = strconv.Itoa(len(summary.Sessions)) // Use latest session + } + + sessionTree, err := checkpointTree.Tree(sessionDir) + if err != nil { + return NoDescription + } + + return getSessionDescriptionFromTree(sessionTree, "") } // findSessionByID finds a session by exact ID or prefix match. diff --git a/cmd/entire/cli/strategy/session_test.go b/cmd/entire/cli/strategy/session_test.go index b6549dfb8..261321e63 100644 --- a/cmd/entire/cli/strategy/session_test.go +++ b/cmd/entire/cli/strategy/session_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "path/filepath" + "strconv" "testing" "time" @@ -401,26 +402,52 @@ func createTestMultiSessionCheckpoint(t *testing.T, repo *git.Repository, checkp entries := make(map[string]object.TreeEntry) checkpointPath := checkpointID.Path() - // Create metadata.json with SessionIDs array - metadata := CheckpointInfo{ + // Create session-level metadata for each session (1-based indexing) + var sessionFilePaths []checkpoint.SessionFilePaths + for i, sessionID := range allSessionIDs { + sessionDir := strconv.Itoa(i + 1) // 1-based: 1, 2, 3, ... + sessionMetadata := checkpoint.CommittedMetadata{ + CheckpointID: checkpointID, + SessionID: sessionID, + CreatedAt: time.Now(), + } + sessionMetadataJSON, err := json.Marshal(sessionMetadata) + if err != nil { + t.Fatalf("failed to marshal session metadata: %v", err) + } + sessionMetadataBlobHash, err := checkpoint.CreateBlobFromContent(repo, sessionMetadataJSON) + if err != nil { + t.Fatalf("failed to create session metadata blob: %v", err) + } + sessionMetadataPath := checkpointPath + "/" + sessionDir + "/" + paths.MetadataFileName + entries[sessionMetadataPath] = object.TreeEntry{ + Name: sessionMetadataPath, + Mode: filemode.Regular, + Hash: sessionMetadataBlobHash, + } + // Use absolute paths with leading "/" as per new format + sessionFilePaths = append(sessionFilePaths, checkpoint.SessionFilePaths{ + Metadata: "/" + checkpointPath + "/" + sessionDir + "/" + paths.MetadataFileName, + }) + } + + // Create root CheckpointSummary with Sessions array (using absolute paths) + summary := checkpoint.CheckpointSummary{ CheckpointID: checkpointID, - SessionID: primarySessionID, - SessionCount: len(allSessionIDs), - SessionIDs: allSessionIDs, - CreatedAt: time.Now(), + Sessions: sessionFilePaths, } - metadataJSON, err := json.Marshal(metadata) + summaryJSON, err := json.Marshal(summary) if err != nil { - t.Fatalf("failed to marshal metadata: %v", err) + t.Fatalf("failed to marshal summary: %v", err) } - metadataBlobHash, err := checkpoint.CreateBlobFromContent(repo, metadataJSON) + summaryBlobHash, err := checkpoint.CreateBlobFromContent(repo, summaryJSON) if err != nil { - t.Fatalf("failed to create metadata blob: %v", err) + t.Fatalf("failed to create summary blob: %v", err) } entries[checkpointPath+"/"+paths.MetadataFileName] = object.TreeEntry{ Name: checkpointPath + "/" + paths.MetadataFileName, Mode: filemode.Regular, - Hash: metadataBlobHash, + Hash: summaryBlobHash, } // Build tree @@ -473,38 +500,70 @@ func createTestMetadataBranchWithPrompt(t *testing.T, repo *git.Repository, sess // Create empty tree for orphan commit entries := make(map[string]object.TreeEntry) - // Add metadata.json checkpointPath := checkpointID.Path() - metadata := CheckpointInfo{ + sessionDir := "1" // First session (1-based indexing) + + // Create session-level metadata in 1/ subdirectory + sessionMetadata := CheckpointInfo{ CheckpointID: checkpointID, SessionID: sessionID, CreatedAt: time.Now(), } - metadataJSON, err := json.Marshal(metadata) + sessionMetadataJSON, err := json.Marshal(sessionMetadata) if err != nil { - t.Fatalf("failed to marshal metadata: %v", err) + t.Fatalf("failed to marshal session metadata: %v", err) } - metadataBlobHash, err := checkpoint.CreateBlobFromContent(repo, metadataJSON) + sessionMetadataBlobHash, err := checkpoint.CreateBlobFromContent(repo, sessionMetadataJSON) if err != nil { - t.Fatalf("failed to create metadata blob: %v", err) + t.Fatalf("failed to create session metadata blob: %v", err) } - entries[checkpointPath+"/"+paths.MetadataFileName] = object.TreeEntry{ - Name: checkpointPath + "/" + paths.MetadataFileName, + sessionMetadataPath := checkpointPath + "/" + sessionDir + "/" + paths.MetadataFileName + entries[sessionMetadataPath] = object.TreeEntry{ + Name: sessionMetadataPath, Mode: filemode.Regular, - Hash: metadataBlobHash, + Hash: sessionMetadataBlobHash, } - // Add prompt.txt if provided + // Add prompt.txt in session subdirectory if provided + promptAbsPath := "" if prompt != "" { promptBlobHash, promptErr := checkpoint.CreateBlobFromContent(repo, []byte(prompt)) if promptErr != nil { t.Fatalf("failed to create prompt blob: %v", promptErr) } - entries[checkpointPath+"/"+paths.PromptFileName] = object.TreeEntry{ - Name: checkpointPath + "/" + paths.PromptFileName, + fullPromptPath := checkpointPath + "/" + sessionDir + "/" + paths.PromptFileName + entries[fullPromptPath] = object.TreeEntry{ + Name: fullPromptPath, Mode: filemode.Regular, Hash: promptBlobHash, } + // Use absolute path with leading "/" + promptAbsPath = "/" + fullPromptPath + } + + // Create root CheckpointSummary with absolute paths + rootSummary := checkpoint.CheckpointSummary{ + CheckpointID: checkpointID, + Sessions: []checkpoint.SessionFilePaths{ + { + Metadata: "/" + checkpointPath + "/" + sessionDir + "/" + paths.MetadataFileName, + Prompt: promptAbsPath, + }, + }, + } + summaryJSON, err := json.Marshal(rootSummary) + if err != nil { + t.Fatalf("failed to marshal root summary: %v", err) + } + summaryBlobHash, err := checkpoint.CreateBlobFromContent(repo, summaryJSON) + if err != nil { + t.Fatalf("failed to create summary blob: %v", err) + } + rootMetadataPath := checkpointPath + "/" + paths.MetadataFileName + entries[rootMetadataPath] = object.TreeEntry{ + Name: rootMetadataPath, + Mode: filemode.Regular, + Hash: summaryBlobHash, } // Build tree From 51e122266d7dd5cfd82c39e0857e758d1023f1ad Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Wed, 4 Feb 2026 16:56:56 +1100 Subject: [PATCH 02/18] /0 as first session folder --- CLAUDE.md | 32 ++- cmd/entire/cli/checkpoint/checkpoint_test.go | 8 +- cmd/entire/cli/checkpoint/committed.go | 28 +- cmd/entire/cli/integration_test/testenv.go | 4 +- cmd/entire/cli/strategy/common.go | 8 +- cmd/entire/cli/strategy/manual_commit_test.go | 8 +- cmd/entire/cli/strategy/session.go | 6 +- cmd/entire/cli/strategy/session_test.go | 6 +- scripts/migrate-sessions.sh | 266 ++++++++++++++++++ 9 files changed, 317 insertions(+), 49 deletions(-) create mode 100755 scripts/migrate-sessions.sh diff --git a/CLAUDE.md b/CLAUDE.md index 7a5dffa67..f4384ea09 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -315,19 +315,21 @@ All strategies implement: **Both Strategies** - Metadata branch (`entire/sessions`) - sharded checkpoint format: ``` // -├── metadata.json # Checkpoint info (see below) -├── full.jsonl # Current/latest session transcript -├── prompt.txt # User prompts -├── context.md # Generated context -├── content_hash.txt # SHA256 of transcript (shadow only) -├── tasks// # Task checkpoints (if applicable) -│ ├── checkpoint.json # UUID mapping -│ └── agent-.jsonl # Subagent transcript -└── 1/ # Archived session (if multiple sessions) - ├── metadata.json # Archived session metadata - ├── full.jsonl # Archived session transcript - ├── prompt.txt - └── ... +├── metadata.json # CheckpointSummary (aggregated stats) +├── 0/ # First session (0-based indexing) +│ ├── metadata.json # Session-specific metadata +│ ├── full.jsonl # Session transcript +│ ├── prompt.txt # User prompts +│ ├── context.md # Generated context +│ ├── content_hash.txt # SHA256 of transcript +│ └── tasks// # Task checkpoints (if applicable) +│ ├── checkpoint.json # UUID mapping +│ └── agent-.jsonl # Subagent transcript +├── 1/ # Second session (if multiple sessions) +│ ├── metadata.json +│ ├── full.jsonl +│ └── ... +└── ... ``` **Multi-session metadata.json format:** @@ -344,8 +346,8 @@ All strategies implement: ``` When multiple sessions are condensed to the same checkpoint (same base commit): -- Latest session files go at the root level -- Previous sessions are archived to numbered subfolders (`1/`, `2/`, etc.) +- Sessions are stored in numbered subfolders using 0-based indexing (`0/`, `1/`, `2/`, etc.) +- Latest session is always in the highest-numbered folder - `session_ids` array tracks all sessions, `session_count` increments **Session State** (filesystem, `.git/entire-sessions/`): diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index d6cc0639b..d17f44967 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -187,9 +187,9 @@ func TestWriteCommitted_AgentField(t *testing.T) { } // Agent should be in the session-level metadata, not in the summary - // Read first session's metadata to verify agent (1-based indexing) + // Read first session's metadata to verify agent (0-based indexing) if len(summary.Sessions) > 0 { - sessionTree, err := checkpointTree.Tree("1") + sessionTree, err := checkpointTree.Tree("0") if err != nil { t.Fatalf("failed to get session tree: %v", err) } @@ -258,8 +258,8 @@ func readLatestSessionMetadata(t *testing.T, repo *git.Repository, checkpointID t.Fatalf("failed to parse root metadata.json: %v", err) } - // Read session-level metadata from latest session subdirectory (1-based indexing) - latestIndex := len(summary.Sessions) + // Read session-level metadata from latest session subdirectory (0-based indexing) + latestIndex := len(summary.Sessions) - 1 sessionDir := strconv.Itoa(latestIndex) sessionTree, err := checkpointTree.Tree(sessionDir) if err != nil { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 0bad357af..63bb71ab3 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -238,10 +238,10 @@ func (s *GitStore) writeStandardCheckpointEntries(opts WriteCommittedOptions, ba } } - // Determine session index (1, 2, 3, ...) - 1-based numbering - sessionIndex := 1 + // Determine session index (0, 1, 2, ...) - 0-based numbering + sessionIndex := 0 if existingSummary != nil { - sessionIndex = len(existingSummary.Sessions) + 1 + sessionIndex = len(existingSummary.Sessions) } // Write session files to numbered subdirectory @@ -597,14 +597,14 @@ type taskCheckpointData struct { // ReadCommitted reads a committed checkpoint by ID from the entire/sessions branch. // Returns nil, nil if the checkpoint doesn't exist. // -// The storage format uses numbered subdirectories for each session (1-based): +// The storage format uses numbered subdirectories for each session (0-based): // // / // ├── metadata.json # CheckpointSummary with sessions map -// ├── 1/ # First session +// ├── 0/ # First session // │ ├── metadata.json # Session-specific metadata // │ └── full.jsonl # Transcript -// ├── 2/ # Second session +// ├── 1/ # Second session // └── ... func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*ReadCommittedResult, error) { _ = ctx // Reserved for future use @@ -644,8 +644,8 @@ func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.Checkpoint // Read data from the appropriate session subdirectories if len(summary.Sessions) > 0 { - // Find the latest session index (highest numbered directory, 1-based) - latestIndex := len(summary.Sessions) + // Find the latest session index (highest numbered directory, 0-based) + latestIndex := len(summary.Sessions) - 1 // Read latest session data latestDir := strconv.Itoa(latestIndex) @@ -697,10 +697,10 @@ func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.Checkpoint func (s *GitStore) readArchivedSessionsFromSummary(checkpointTree *object.Tree, summary CheckpointSummary) []ArchivedSession { var archived []ArchivedSession - // Iterate through all sessions except the latest (1-based indexing) - // Sessions are in folders 1, 2, ..., N where N is the latest + // Iterate through all sessions except the latest (0-based indexing) + // Sessions are in folders 0, 1, ..., N-1 where N-1 is the latest sessionCount := len(summary.Sessions) - for i := 1; i < sessionCount; i++ { + for i := range sessionCount - 1 { folderName := strconv.Itoa(i) // Try to get the session subtree @@ -809,7 +809,7 @@ func (s *GitStore) ListCommitted(ctx context.Context) ([]CommittedInfo, error) { // Read session metadata from latest session to get Agent, SessionID, CreatedAt if len(summary.Sessions) > 0 { - latestIndex := len(summary.Sessions) + latestIndex := len(summary.Sessions) - 1 latestDir := strconv.Itoa(latestIndex) if sessionTree, treeErr := checkpointTree.Tree(latestDir); treeErr == nil { if sessionMetadataFile, smErr := sessionTree.File(paths.MetadataFileName); smErr == nil { @@ -916,8 +916,8 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint return fmt.Errorf("failed to read checkpoint summary: %w", err) } - // Find the latest session's metadata path (1-based indexing) - latestIndex := len(checkpointSummary.Sessions) + // Find the latest session's metadata path (0-based indexing) + latestIndex := len(checkpointSummary.Sessions) - 1 sessionMetadataPath := fmt.Sprintf("%s%d/%s", basePath, latestIndex, paths.MetadataFileName) sessionEntry, exists := entries[sessionMetadataPath] if !exists { diff --git a/cmd/entire/cli/integration_test/testenv.go b/cmd/entire/cli/integration_test/testenv.go index 6b3626172..18cf0c004 100644 --- a/cmd/entire/cli/integration_test/testenv.go +++ b/cmd/entire/cli/integration_test/testenv.go @@ -1199,10 +1199,10 @@ func ShardedCheckpointPath(checkpointID string) string { } // SessionFilePath returns the path to a session file within a checkpoint. -// Session files are stored in numbered subdirectories using 1-based indexing (e.g., 1/full.jsonl). +// Session files are stored in numbered subdirectories using 0-based indexing (e.g., 0/full.jsonl). // This function constructs the path for the first (default) session. func SessionFilePath(checkpointID string, fileName string) string { - return id.CheckpointID(checkpointID).Path() + "/1/" + fileName + return id.CheckpointID(checkpointID).Path() + "/0/" + fileName } // CheckpointSummaryPath returns the path to the root metadata.json (CheckpointSummary) for a checkpoint. diff --git a/cmd/entire/cli/strategy/common.go b/cmd/entire/cli/strategy/common.go index e7c4da010..8aaaf6844 100644 --- a/cmd/entire/cli/strategy/common.go +++ b/cmd/entire/cli/strategy/common.go @@ -410,13 +410,13 @@ func ReadAllSessionPromptsFromTree(tree *object.Tree, checkpointPath string, ses return nil } - // Multi-session: read prompts from archived folders (1/, 2/, etc.) and root + // Multi-session: read prompts from archived folders (0/, 1/, etc.) and root prompts := make([]string, len(sessionIDs)) - // Read archived session prompts (folders 1, 2, ... N-1) - for i := 1; i < sessionCount; i++ { + // Read archived session prompts (folders 0, 1, ... N-2) + for i := range sessionCount - 1 { archivedPath := fmt.Sprintf("%s/%d", checkpointPath, i) - prompts[i-1] = ReadSessionPromptFromTree(tree, archivedPath) + prompts[i] = ReadSessionPromptFromTree(tree, archivedPath) } // Read the most recent session prompt (at root level) diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 41bfaeb11..b11922466 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -1775,8 +1775,8 @@ func TestCondenseSession_IncludesInitialAttribution(t *testing.T) { t.Fatalf("failed to get tree: %v", err) } - // InitialAttribution is stored in session-level metadata (1/metadata.json), not root (1-based indexing) - sessionMetadataPath := checkpointID.Path() + "/1/" + paths.MetadataFileName + // InitialAttribution is stored in session-level metadata (0/metadata.json), not root (0-based indexing) + sessionMetadataPath := checkpointID.Path() + "/0/" + paths.MetadataFileName metadataFile, err := tree.File(sessionMetadataPath) if err != nil { t.Fatalf("failed to find session metadata.json at %s: %v", sessionMetadataPath, err) @@ -2106,8 +2106,8 @@ func TestMultiCheckpoint_UserEditsBetweenCheckpoints(t *testing.T) { t.Fatalf("failed to get tree: %v", err) } - // InitialAttribution is stored in session-level metadata (1/metadata.json), not root (1-based indexing) - sessionMetadataPath := checkpointID.Path() + "/1/" + paths.MetadataFileName + // InitialAttribution is stored in session-level metadata (0/metadata.json), not root (0-based indexing) + sessionMetadataPath := checkpointID.Path() + "/0/" + paths.MetadataFileName metadataFile, err := tree.File(sessionMetadataPath) if err != nil { t.Fatalf("failed to find session metadata.json at %s: %v", sessionMetadataPath, err) diff --git a/cmd/entire/cli/strategy/session.go b/cmd/entire/cli/strategy/session.go index 76e7a601b..db354d96e 100644 --- a/cmd/entire/cli/strategy/session.go +++ b/cmd/entire/cli/strategy/session.go @@ -247,10 +247,10 @@ func getDescriptionForCheckpoint(repo *git.Repository, checkpointID id.Checkpoin } // Find the first session's prompt/context path - // Try to use the latest session for description (1-based indexing) - sessionDir := "1" + // Try to use the latest session for description (0-based indexing) + sessionDir := "0" if len(summary.Sessions) > 0 { - sessionDir = strconv.Itoa(len(summary.Sessions)) // Use latest session + sessionDir = strconv.Itoa(len(summary.Sessions) - 1) // Use latest session } sessionTree, err := checkpointTree.Tree(sessionDir) diff --git a/cmd/entire/cli/strategy/session_test.go b/cmd/entire/cli/strategy/session_test.go index 261321e63..0979b81b6 100644 --- a/cmd/entire/cli/strategy/session_test.go +++ b/cmd/entire/cli/strategy/session_test.go @@ -402,10 +402,10 @@ func createTestMultiSessionCheckpoint(t *testing.T, repo *git.Repository, checkp entries := make(map[string]object.TreeEntry) checkpointPath := checkpointID.Path() - // Create session-level metadata for each session (1-based indexing) + // Create session-level metadata for each session (0-based indexing) var sessionFilePaths []checkpoint.SessionFilePaths for i, sessionID := range allSessionIDs { - sessionDir := strconv.Itoa(i + 1) // 1-based: 1, 2, 3, ... + sessionDir := strconv.Itoa(i) // 0-based: 0, 1, 2, ... sessionMetadata := checkpoint.CommittedMetadata{ CheckpointID: checkpointID, SessionID: sessionID, @@ -501,7 +501,7 @@ func createTestMetadataBranchWithPrompt(t *testing.T, repo *git.Repository, sess entries := make(map[string]object.TreeEntry) checkpointPath := checkpointID.Path() - sessionDir := "1" // First session (1-based indexing) + sessionDir := "0" // First session (0-based indexing) // Create session-level metadata in 1/ subdirectory sessionMetadata := CheckpointInfo{ diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh new file mode 100755 index 000000000..3f40ace6c --- /dev/null +++ b/scripts/migrate-sessions.sh @@ -0,0 +1,266 @@ +#!/bin/bash +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +SOURCE_BRANCH="entire/sessions" +TARGET_BRANCH="entire/sessions/v1" + +echo -e "${GREEN}=== Checkpoint Migration Script ===${NC}" +echo "Source: $SOURCE_BRANCH" +echo "Target: $TARGET_BRANCH" +echo "" + +# Save current branch +ORIGINAL_BRANCH=$(git branch --show-current) + +# Get list of commits from source branch (oldest first, excluding initial commit) +COMMITS=$(git log --reverse --format="%H" "$SOURCE_BRANCH" | tail -n +2) +INIT_COMMIT=$(git log --reverse --format="%H" "$SOURCE_BRANCH" | head -1) + +echo -e "${YELLOW}Found commits to process:${NC}" +git log --reverse --oneline "$SOURCE_BRANCH" | tail -n +2 +echo "" + +# Create orphan target branch from init commit +echo -e "${GREEN}Creating target branch $TARGET_BRANCH...${NC}" +git checkout "$SOURCE_BRANCH" +git checkout "$INIT_COMMIT" +git checkout --orphan "$TARGET_BRANCH" +git commit --allow-empty -m "Initialize metadata branch (v1)" +git checkout "$SOURCE_BRANCH" + +# Process each commit +for COMMIT in $COMMITS; do + COMMIT_MSG=$(git log -1 --format="%s" "$COMMIT") + echo -e "${GREEN}Processing commit: $COMMIT_MSG${NC}" + + # Checkout source commit in temp worktree + TEMP_DIR=$(mktemp -d) + git worktree add --detach "$TEMP_DIR" "$COMMIT" 2>/dev/null + + # Checkout target branch + git checkout "$TARGET_BRANCH" + + # Track which checkpoint directories we process + PROCESSED_DIRS="" + + # Find all checkpoint directories (pattern: XX/YYYYYYYY/) + cd "$TEMP_DIR" + CHECKPOINT_DIRS=$(find . -maxdepth 2 -mindepth 2 -type d | grep -E '^\./[0-9a-f]{2}/[0-9a-f]+$' || true) + + for CHECKPOINT_PATH in $CHECKPOINT_DIRS; do + CHECKPOINT_DIR="${CHECKPOINT_PATH#./}" + echo " Processing checkpoint: $CHECKPOINT_DIR" + + # Track this directory for git add later + PROCESSED_DIRS="$PROCESSED_DIRS $CHECKPOINT_DIR" + + # Create checkpoint dir in target if not exists + mkdir -p "$OLDPWD/$CHECKPOINT_DIR" + + # Check if root has session files (metadata.json with session_id) + if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then + ROOT_META="$CHECKPOINT_PATH/metadata.json" + + # Check if this is session metadata (has session_id) or already aggregated + if jq -e '.session_id' "$ROOT_META" > /dev/null 2>&1; then + # This is session metadata at root - needs migration + + # Find existing numbered subdirs + EXISTING_SUBDIRS=$(find "$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*' | sort -t'/' -k3 -n -r || true) + + # Calculate next session number (renumber existing + 1 for root) + NEXT_NUM=0 + + # Renumber existing subdirs (in reverse to avoid conflicts) + for SUBDIR in $EXISTING_SUBDIRS; do + OLD_NUM=$(basename "$SUBDIR") + NEW_NUM=$((OLD_NUM + 1)) + + # Copy to target with new number + mkdir -p "$OLDPWD/$CHECKPOINT_DIR/$NEW_NUM" + # Copy non-metadata files + for FILE in context.md prompt.txt content_hash.txt full.jsonl; do + if [[ -f "$SUBDIR/$FILE" ]]; then + cp "$SUBDIR/$FILE" "$OLDPWD/$CHECKPOINT_DIR/$NEW_NUM/" + fi + done + # Transform metadata.json: agents to string, remove session_id and session_count + if [[ -f "$SUBDIR/metadata.json" ]]; then + jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ + "$SUBDIR/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/$NEW_NUM/metadata.json" + fi + + if [[ $NEW_NUM -gt $NEXT_NUM ]]; then + NEXT_NUM=$NEW_NUM + fi + done + + # Move root session files to /0 + mkdir -p "$OLDPWD/$CHECKPOINT_DIR/0" + # Copy non-metadata files + for FILE in context.md prompt.txt content_hash.txt full.jsonl; do + if [[ -f "$CHECKPOINT_PATH/$FILE" ]]; then + cp "$CHECKPOINT_PATH/$FILE" "$OLDPWD/$CHECKPOINT_DIR/0/" + fi + done + # Transform metadata.json: agents to string, remove session_id and session_count + if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then + jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ + "$CHECKPOINT_PATH/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/0/metadata.json" + fi + + # Calculate total sessions (NEXT_NUM is highest 0-based index, so count = NEXT_NUM + 1) + TOTAL_SESSIONS=$((NEXT_NUM + 1)) + + # Build sessions array and aggregate data + SESSIONS_JSON="[]" + FILES_TOUCHED="[]" + CHECKPOINTS_COUNT=0 + INPUT_TOKENS=0 + CACHE_CREATION=0 + CACHE_READ=0 + OUTPUT_TOKENS=0 + API_CALLS=0 + EARLIEST_DATE="" + + for i in $(seq 0 $((TOTAL_SESSIONS - 1))); do + SESSION_DIR="$OLDPWD/$CHECKPOINT_DIR/$i" + if [[ -d "$SESSION_DIR" ]]; then + SESSION_META="$SESSION_DIR/metadata.json" + + # Build session entry (paths are absolute from branch root) + SESSION_ENTRY=$(jq -n \ + --arg meta "/$CHECKPOINT_DIR/$i/metadata.json" \ + --arg transcript "/$CHECKPOINT_DIR/$i/full.jsonl" \ + --arg context "/$CHECKPOINT_DIR/$i/context.md" \ + --arg hash "/$CHECKPOINT_DIR/$i/content_hash.txt" \ + --arg prompt "/$CHECKPOINT_DIR/$i/prompt.txt" \ + '{metadata: $meta, transcript: $transcript, context: $context, content_hash: $hash, prompt: $prompt}') + + SESSIONS_JSON=$(echo "$SESSIONS_JSON" | jq --argjson entry "$SESSION_ENTRY" '. + [$entry]') + + # Aggregate from session metadata + if [[ -f "$SESSION_META" ]]; then + # Files touched (union) + SESSION_FILES=$(jq -r '.files_touched // []' "$SESSION_META") + FILES_TOUCHED=$(echo "$FILES_TOUCHED" "$SESSION_FILES" | jq -s 'add | unique') + + # Checkpoints count (sum) + CHECKPOINTS_COUNT=$((CHECKPOINTS_COUNT + $(jq -r '.checkpoints_count // 0' "$SESSION_META"))) + + # Token usage (sum) + INPUT_TOKENS=$((INPUT_TOKENS + $(jq -r '.token_usage.input_tokens // 0' "$SESSION_META"))) + CACHE_CREATION=$((CACHE_CREATION + $(jq -r '.token_usage.cache_creation_tokens // 0' "$SESSION_META"))) + CACHE_READ=$((CACHE_READ + $(jq -r '.token_usage.cache_read_tokens // 0' "$SESSION_META"))) + OUTPUT_TOKENS=$((OUTPUT_TOKENS + $(jq -r '.token_usage.output_tokens // 0' "$SESSION_META"))) + API_CALLS=$((API_CALLS + $(jq -r '.token_usage.api_call_count // 0' "$SESSION_META"))) + + fi + fi + done + + # Get base info from original root metadata + CHECKPOINT_ID=$(jq -r '.checkpoint_id // ""' "$ROOT_META") + STRATEGY=$(jq -r '.strategy // "manual-commit"' "$ROOT_META") + BRANCH=$(jq -r '.branch // ""' "$ROOT_META") + + # Create aggregated metadata.json + jq -n \ + --arg checkpoint_id "$CHECKPOINT_ID" \ + --arg strategy "$STRATEGY" \ + --arg branch "$BRANCH" \ + --argjson checkpoints_count "$CHECKPOINTS_COUNT" \ + --argjson files_touched "$FILES_TOUCHED" \ + --argjson sessions "$SESSIONS_JSON" \ + --argjson input_tokens "$INPUT_TOKENS" \ + --argjson cache_creation "$CACHE_CREATION" \ + --argjson cache_read "$CACHE_READ" \ + --argjson output_tokens "$OUTPUT_TOKENS" \ + --argjson api_calls "$API_CALLS" \ + '{ + checkpoint_id: $checkpoint_id, + strategy: $strategy, + branch: $branch, + checkpoints_count: $checkpoints_count, + files_touched: $files_touched, + sessions: $sessions, + token_usage: { + input_tokens: $input_tokens, + cache_creation_tokens: $cache_creation, + cache_read_tokens: $cache_read, + output_tokens: $output_tokens, + api_call_count: $api_calls + } + }' > "$OLDPWD/$CHECKPOINT_DIR/metadata.json" + + echo " Migrated: $TOTAL_SESSIONS session(s)" + else + # Already aggregated format - copy but still transform session metadata + # Transform root metadata.json to have absolute paths in sessions array + jq --arg prefix "/$CHECKPOINT_DIR" \ + '.sessions = [.sessions[] | { + metadata: ($prefix + "/" + (.metadata | ltrimstr("/"))), + transcript: ($prefix + "/" + (.transcript | ltrimstr("/"))), + context: ($prefix + "/" + (.context | ltrimstr("/"))), + content_hash: ($prefix + "/" + (.content_hash | ltrimstr("/"))), + prompt: ($prefix + "/" + (.prompt | ltrimstr("/"))) + }]' "$CHECKPOINT_PATH/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/metadata.json" + + # Copy and transform each session subdir's metadata.json + for SUBDIR in $(find "$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*'); do + SUBDIR_NUM=$(basename "$SUBDIR") + mkdir -p "$OLDPWD/$CHECKPOINT_DIR/$SUBDIR_NUM" + + # Copy non-metadata files + for FILE in context.md prompt.txt content_hash.txt full.jsonl; do + if [[ -f "$SUBDIR/$FILE" ]]; then + cp "$SUBDIR/$FILE" "$OLDPWD/$CHECKPOINT_DIR/$SUBDIR_NUM/" + fi + done + + # Transform metadata.json + if [[ -f "$SUBDIR/metadata.json" ]]; then + jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ + "$SUBDIR/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/$SUBDIR_NUM/metadata.json" + fi + done + echo " Copied with session metadata transformed" + fi + fi + done + + cd "$OLDPWD" + + # Cleanup worktree + git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" + + # Only add the specific checkpoint directories we processed + for DIR in $PROCESSED_DIRS; do + git add "$DIR" + done + + # Commit changes + if ! git diff --cached --quiet; then + git commit -m "$COMMIT_MSG" + echo -e " ${GREEN}Committed${NC}" + else + echo -e " ${YELLOW}No changes${NC}" + fi +done + +# Return to original branch +git checkout "$ORIGINAL_BRANCH" 2>/dev/null || git checkout main + +echo "" +echo -e "${GREEN}=== Migration Complete ===${NC}" +echo "New branch: $TARGET_BRANCH" +echo "" +echo "To verify:" +echo " git log $TARGET_BRANCH" +echo " git show $TARGET_BRANCH:/metadata.json" From d4de10e7138383a0d5a666cb9aeb3efd9a762af7 Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Wed, 4 Feb 2026 17:27:47 +1100 Subject: [PATCH 03/18] use t.Errorf instead of t.Logf Entire-Checkpoint: 7674489fb0a9 --- .../integration_test/manual_commit_workflow_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/entire/cli/integration_test/manual_commit_workflow_test.go b/cmd/entire/cli/integration_test/manual_commit_workflow_test.go index a86b552e0..f0e549a2c 100644 --- a/cmd/entire/cli/integration_test/manual_commit_workflow_test.go +++ b/cmd/entire/cli/integration_test/manual_commit_workflow_test.go @@ -793,7 +793,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { promptPath1 := SessionFilePath(checkpoint1ID, "prompt.txt") prompt1Content, found := env.ReadFileFromBranch("entire/sessions", promptPath1) if !found { - t.Logf("prompt.txt should exist at %s", promptPath1) + t.Errorf("prompt.txt should exist at %s", promptPath1) } else { t.Logf("First prompt.txt content:\n%s", prompt1Content) // Should contain both "Create function A" and "create function B" @@ -808,7 +808,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { contextPath1 := SessionFilePath(checkpoint1ID, "context.md") context1Content, found := env.ReadFileFromBranch("entire/sessions", contextPath1) if !found { - t.Logf("context.md should exist at %s", contextPath1) + t.Errorf("context.md should exist at %s", contextPath1) } else { t.Logf("First context.md content:\n%s", context1Content) } @@ -864,7 +864,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { promptPath2 := SessionFilePath(checkpoint2ID, "prompt.txt") prompt2Content, found := env.ReadFileFromBranch("entire/sessions", promptPath2) if !found { - t.Logf("prompt.txt should exist at %s", promptPath2) + t.Errorf("prompt.txt should exist at %s", promptPath2) } else { t.Logf("Second prompt.txt content:\n%s", prompt2Content) @@ -883,7 +883,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { contextPath2 := SessionFilePath(checkpoint2ID, "context.md") context2Content, found := env.ReadFileFromBranch("entire/sessions", contextPath2) if !found { - t.Logf("context.md should exist at %s", contextPath2) + t.Errorf("context.md should exist at %s", contextPath2) } else { t.Logf("Second context.md content:\n%s", context2Content) @@ -1012,7 +1012,7 @@ func TestShadow_RewindAndCondensation(t *testing.T) { promptPath := SessionFilePath(checkpointID, "prompt.txt") promptContent, found := env.ReadFileFromBranch("entire/sessions", promptPath) if !found { - t.Logf("prompt.txt should exist at %s", promptPath) + t.Errorf("prompt.txt should exist at %s", promptPath) } else { t.Logf("prompt.txt content:\n%s", promptContent) @@ -1031,7 +1031,7 @@ func TestShadow_RewindAndCondensation(t *testing.T) { contextPath := SessionFilePath(checkpointID, "context.md") contextContent, found := env.ReadFileFromBranch("entire/sessions", contextPath) if !found { - t.Logf("context.md should exist at %s", contextPath) + t.Errorf("context.md should exist at %s", contextPath) } else { t.Logf("context.md content:\n%s", contextContent) From 74d4fd04a1356c87c1aed675d456ae853e5f2b8a Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 09:42:46 +1100 Subject: [PATCH 04/18] migrate ReadCommitted to use new checkpoint structure Entire-Checkpoint: f3df91827bd6 --- .../cli/checkpoint/backwards_compat_test.go | 10 +- cmd/entire/cli/checkpoint/checkpoint.go | 68 +++--- cmd/entire/cli/checkpoint/checkpoint_test.go | 2 - cmd/entire/cli/checkpoint/committed.go | 227 +++++++++--------- cmd/entire/cli/explain.go | 67 +++--- cmd/entire/cli/explain_test.go | 181 +++++++++----- cmd/entire/cli/strategy/auto_commit.go | 18 +- .../strategy/manual_commit_condensation.go | 8 +- .../cli/strategy/manual_commit_rewind.go | 156 +++++------- 9 files changed, 381 insertions(+), 356 deletions(-) diff --git a/cmd/entire/cli/checkpoint/backwards_compat_test.go b/cmd/entire/cli/checkpoint/backwards_compat_test.go index 719d7d5e2..0ded2753a 100644 --- a/cmd/entire/cli/checkpoint/backwards_compat_test.go +++ b/cmd/entire/cli/checkpoint/backwards_compat_test.go @@ -67,17 +67,17 @@ func TestReadCommitted_MissingTokenUsage(t *testing.T) { } // Reading should succeed with nil TokenUsage - result, err := store.ReadCommitted(context.Background(), checkpointID) + summary, err := store.ReadCommitted(context.Background(), checkpointID) if err != nil { t.Fatalf("ReadCommitted() error = %v", err) } - if result.Metadata.CheckpointID != checkpointID { - t.Errorf("CheckpointID = %v, want %v", result.Metadata.CheckpointID, checkpointID) + if summary.CheckpointID != checkpointID { + t.Errorf("CheckpointID = %v, want %v", summary.CheckpointID, checkpointID) } // TokenUsage should be nil for old checkpoints without token tracking - if result.Metadata.TokenUsage != nil { - t.Errorf("TokenUsage should be nil for metadata without token_usage field, got %+v", result.Metadata.TokenUsage) + if summary.TokenUsage != nil { + t.Errorf("TokenUsage should be nil for metadata without token_usage field, got %+v", summary.TokenUsage) } } diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index f2e4223da..a2ea4dde5 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -84,9 +84,20 @@ type Store interface { // Checkpoints are stored at sharded paths: // WriteCommitted(ctx context.Context, opts WriteCommittedOptions) error - // ReadCommitted reads a committed checkpoint by ID. + // ReadCommitted reads a committed checkpoint's summary by ID. + // Returns only the CheckpointSummary (paths + aggregated stats), not actual content. + // Use ReadSessionContent to read actual transcript/prompts/context. // Returns nil, nil if the checkpoint does not exist. - ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*ReadCommittedResult, error) + ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*CheckpointSummary, error) + + // ReadSessionContent reads the actual content for a specific session within a checkpoint. + // sessionIndex is 0-based (0 for first session, 1 for second, etc.). + // Returns the session's metadata, transcript, prompts, and context. + ReadSessionContent(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error) + + // ReadSessionContentByID reads a session's content by its session ID. + // Useful when you have the session ID but don't know its index within the checkpoint. + ReadSessionContentByID(ctx context.Context, checkpointID id.CheckpointID, sessionID string) (*SessionContent, error) // ListCommitted lists all committed checkpoints. ListCommitted(ctx context.Context) ([]CommittedInfo, error) @@ -264,42 +275,6 @@ type WriteCommittedOptions struct { Summary *Summary } -// ReadCommittedResult contains the result of reading a committed checkpoint. -type ReadCommittedResult struct { - // Metadata contains the checkpoint metadata - Metadata CommittedMetadata - - // Transcript is the session transcript content (most recent session) - Transcript []byte - - // Prompts contains user prompts (most recent session) - Prompts string - - // Context is the context.md content - Context string - - // ArchivedSessions contains transcripts from previous sessions when multiple - // sessions were condensed to the same checkpoint. Ordered from oldest to newest - // (1/, 2/, etc.). The root-level Transcript is the most recent session. - ArchivedSessions []ArchivedSession -} - -// ArchivedSession contains transcript data from a previous session -// that was archived when multiple sessions contributed to the same checkpoint. -type ArchivedSession struct { - // SessionID is the session identifier for this archived session - SessionID string - - // Transcript is the session transcript content - Transcript []byte - - // Prompts contains user prompts from this session - Prompts string - - // FolderIndex is the archive folder number (1, 2, etc.) - FolderIndex int -} - // CommittedInfo contains summary information about a committed checkpoint. type CommittedInfo struct { // CheckpointID is the stable 12-hex-char identifier @@ -331,6 +306,23 @@ type CommittedInfo struct { SessionIDs []string // All session IDs that contributed } +// SessionContent contains the actual content for a session. +// This is used when reading full session data (transcript, prompts, context) +// as opposed to just the metadata/summary. +type SessionContent struct { + // Metadata contains the session-specific metadata + Metadata CommittedMetadata + + // Transcript is the session transcript content + Transcript []byte + + // Prompts contains user prompts from this session + Prompts string + + // Context is the context.md content + Context string +} + // CommittedMetadata contains the metadata stored in metadata.json for each checkpoint. type CommittedMetadata struct { CheckpointID id.CheckpointID `json:"checkpoint_id"` diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index d17f44967..e89adeb38 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -22,8 +22,6 @@ import ( "github.com/go-git/go-git/v5/plumbing/object" ) -const testSession1 = "session-1" - func TestCheckpointType_Values(t *testing.T) { // Verify the enum values are distinct if Temporary == Committed { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 63bb71ab3..ac28242e9 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -594,7 +594,9 @@ type taskCheckpointData struct { AgentID string `json:"agent_id,omitempty"` } -// ReadCommitted reads a committed checkpoint by ID from the entire/sessions branch. +// ReadCommitted reads a committed checkpoint's summary by ID from the entire/sessions branch. +// Returns only the CheckpointSummary (paths + aggregated stats), not actual content. +// Use ReadSessionContent to read actual transcript/prompts/context. // Returns nil, nil if the checkpoint doesn't exist. // // The storage format uses numbered subdirectories for each session (0-based): @@ -606,7 +608,7 @@ type taskCheckpointData struct { // │ └── full.jsonl # Transcript // ├── 1/ # Second session // └── ... -func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*ReadCommittedResult, error) { +func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*CheckpointSummary, error) { _ = ctx // Reserved for future use tree, err := s.getSessionsBranchTree() @@ -620,130 +622,127 @@ func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.Checkpoint return nil, nil //nolint:nilnil,nilerr // Checkpoint directory not found } - result := &ReadCommittedResult{} - // Read root metadata.json as CheckpointSummary - var summary CheckpointSummary - if metadataFile, fileErr := checkpointTree.File(paths.MetadataFileName); fileErr == nil { - if content, contentErr := metadataFile.Contents(); contentErr == nil { - //nolint:errcheck,gosec // Best-effort parsing, defaults are fine - json.Unmarshal([]byte(content), &summary) - } + metadataFile, err := checkpointTree.File(paths.MetadataFileName) + if err != nil { + return nil, nil //nolint:nilnil,nilerr // metadata.json not found } - // Convert CheckpointSummary to CommittedMetadata for backwards compatibility - // Note: Agent and SessionID are derived from session-level metadata - result.Metadata = CommittedMetadata{ - CheckpointID: summary.CheckpointID, - Strategy: summary.Strategy, - Branch: summary.Branch, - CheckpointsCount: summary.CheckpointsCount, - FilesTouched: summary.FilesTouched, - TokenUsage: summary.TokenUsage, - } - - // Read data from the appropriate session subdirectories - if len(summary.Sessions) > 0 { - // Find the latest session index (highest numbered directory, 0-based) - latestIndex := len(summary.Sessions) - 1 - - // Read latest session data - latestDir := strconv.Itoa(latestIndex) - if latestTree, treeErr := checkpointTree.Tree(latestDir); treeErr == nil { - // Get agent type and session info from session-specific metadata - var agentType agent.AgentType - if sessionMetadataFile, fileErr := latestTree.File(paths.MetadataFileName); fileErr == nil { - if content, contentErr := sessionMetadataFile.Contents(); contentErr == nil { - var sessionMetadata CommittedMetadata - if jsonErr := json.Unmarshal([]byte(content), &sessionMetadata); jsonErr == nil { - agentType = sessionMetadata.Agent - // Set fields derived from session metadata - result.Metadata.Agent = sessionMetadata.Agent - result.Metadata.SessionID = sessionMetadata.SessionID - result.Metadata.CreatedAt = sessionMetadata.CreatedAt - } - } - } + content, err := metadataFile.Contents() + if err != nil { + return nil, fmt.Errorf("failed to read metadata.json: %w", err) + } - // Read transcript - if transcript, transcriptErr := readTranscriptFromTree(latestTree, agentType); transcriptErr == nil && transcript != nil { - result.Transcript = transcript - } + var summary CheckpointSummary + if err := json.Unmarshal([]byte(content), &summary); err != nil { + return nil, fmt.Errorf("failed to parse metadata.json: %w", err) + } - // Read prompts - if file, fileErr := latestTree.File(paths.PromptFileName); fileErr == nil { - if content, contentErr := file.Contents(); contentErr == nil { - result.Prompts = content - } - } + return &summary, nil +} - // Read context - if file, fileErr := latestTree.File(paths.ContextFileName); fileErr == nil { - if content, contentErr := file.Contents(); contentErr == nil { - result.Context = content - } - } - } +// ReadSessionContent reads the actual content for a specific session within a checkpoint. +// sessionIndex is 0-based (0 for first session, 1 for second, etc.). +// Returns the session's metadata, transcript, prompts, and context. +// Returns an error if the checkpoint or session doesn't exist. +func (s *GitStore) ReadSessionContent(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error) { + _ = ctx // Reserved for future use - // Read archived sessions (all except the latest) - result.ArchivedSessions = s.readArchivedSessionsFromSummary(checkpointTree, summary) + tree, err := s.getSessionsBranchTree() + if err != nil { + return nil, ErrCheckpointNotFound } - return result, nil -} + checkpointPath := checkpointID.Path() + checkpointTree, err := tree.Tree(checkpointPath) + if err != nil { + return nil, ErrCheckpointNotFound + } -// readArchivedSessionsFromSummary reads transcript data from archived session subdirectories using the sessions array. -// Returns sessions ordered by folder index (oldest first), excluding the latest session. -func (s *GitStore) readArchivedSessionsFromSummary(checkpointTree *object.Tree, summary CheckpointSummary) []ArchivedSession { - var archived []ArchivedSession + // Get the session subdirectory + sessionDir := strconv.Itoa(sessionIndex) + sessionTree, err := checkpointTree.Tree(sessionDir) + if err != nil { + return nil, fmt.Errorf("session %d not found: %w", sessionIndex, err) + } - // Iterate through all sessions except the latest (0-based indexing) - // Sessions are in folders 0, 1, ..., N-1 where N-1 is the latest - sessionCount := len(summary.Sessions) - for i := range sessionCount - 1 { - folderName := strconv.Itoa(i) + result := &SessionContent{} - // Try to get the session subtree - subTree, err := checkpointTree.Tree(folderName) - if err != nil { - continue // Folder doesn't exist, skip + // Read session-specific metadata + var agentType agent.AgentType + if metadataFile, fileErr := sessionTree.File(paths.MetadataFileName); fileErr == nil { + if content, contentErr := metadataFile.Contents(); contentErr == nil { + if jsonErr := json.Unmarshal([]byte(content), &result.Metadata); jsonErr == nil { + agentType = result.Metadata.Agent + } } + } - session := ArchivedSession{ - FolderIndex: i, - } + // Read transcript + if transcript, transcriptErr := readTranscriptFromTree(sessionTree, agentType); transcriptErr == nil && transcript != nil { + result.Transcript = transcript + } - // Get agent type from session metadata - var agentType agent.AgentType - if metadataFile, fileErr := subTree.File(paths.MetadataFileName); fileErr == nil { - if content, contentErr := metadataFile.Contents(); contentErr == nil { - var metadata CommittedMetadata - if jsonErr := json.Unmarshal([]byte(content), &metadata); jsonErr == nil { - session.SessionID = metadata.SessionID - agentType = metadata.Agent - } - } + // Read prompts + if file, fileErr := sessionTree.File(paths.PromptFileName); fileErr == nil { + if content, contentErr := file.Contents(); contentErr == nil { + result.Prompts = content } + } - // Read transcript (handles both chunked and non-chunked formats) - if transcript, err := readTranscriptFromTree(subTree, agentType); err == nil && transcript != nil { - session.Transcript = transcript + // Read context + if file, fileErr := sessionTree.File(paths.ContextFileName); fileErr == nil { + if content, contentErr := file.Contents(); contentErr == nil { + result.Context = content } + } - // Read prompts - if file, fileErr := subTree.File(paths.PromptFileName); fileErr == nil { - if content, contentErr := file.Contents(); contentErr == nil { - session.Prompts = content - } - } + return result, nil +} + +// ReadLatestSessionContent is a convenience method that reads the latest session's content. +// This is equivalent to ReadSessionContent(ctx, checkpointID, len(summary.Sessions)-1). +func (s *GitStore) ReadLatestSessionContent(ctx context.Context, checkpointID id.CheckpointID) (*SessionContent, error) { + summary, err := s.ReadCommitted(ctx, checkpointID) + if err != nil { + return nil, err + } + if summary == nil { + return nil, ErrCheckpointNotFound + } + if len(summary.Sessions) == 0 { + return nil, fmt.Errorf("checkpoint has no sessions: %s", checkpointID) + } + + latestIndex := len(summary.Sessions) - 1 + return s.ReadSessionContent(ctx, checkpointID, latestIndex) +} + +// ReadSessionContentByID reads a session's content by its session ID. +// This is useful when you have the session ID but don't know its index within the checkpoint. +// Returns ErrCheckpointNotFound if the checkpoint doesn't exist. +// Returns an error if no session with the given ID exists in the checkpoint. +func (s *GitStore) ReadSessionContentByID(ctx context.Context, checkpointID id.CheckpointID, sessionID string) (*SessionContent, error) { + summary, err := s.ReadCommitted(ctx, checkpointID) + if err != nil { + return nil, err + } + if summary == nil { + return nil, ErrCheckpointNotFound + } - // Only add if we got a transcript - if len(session.Transcript) > 0 { - archived = append(archived, session) + // Iterate through sessions to find the one with matching session ID + for i := range len(summary.Sessions) { + content, readErr := s.ReadSessionContent(ctx, checkpointID, i) + if readErr != nil { + continue + } + if content != nil && content.Metadata.SessionID == sessionID { + return content, nil } } - return archived + return nil, fmt.Errorf("session %q not found in checkpoint %s", sessionID, checkpointID) } // ListCommitted lists all committed checkpoints from the entire/sessions branch. @@ -841,18 +840,16 @@ func (s *GitStore) ListCommitted(ctx context.Context) ([]CommittedInfo, error) { } // GetTranscript retrieves the transcript for a specific checkpoint ID. +// Returns the latest session's transcript. func (s *GitStore) GetTranscript(ctx context.Context, checkpointID id.CheckpointID) ([]byte, error) { - result, err := s.ReadCommitted(ctx, checkpointID) + content, err := s.ReadLatestSessionContent(ctx, checkpointID) if err != nil { return nil, err } - if result == nil { - return nil, fmt.Errorf("checkpoint not found: %s", checkpointID) - } - if len(result.Transcript) == 0 { + if len(content.Transcript) == 0 { return nil, fmt.Errorf("no transcript found for checkpoint: %s", checkpointID) } - return result.Transcript, nil + return content.Transcript, nil } // GetSessionLog retrieves the session transcript and session ID for a checkpoint. @@ -860,17 +857,17 @@ func (s *GitStore) GetTranscript(ctx context.Context, checkpointID id.Checkpoint // Returns ErrCheckpointNotFound if the checkpoint doesn't exist. // Returns ErrNoTranscript if the checkpoint exists but has no transcript. func (s *GitStore) GetSessionLog(cpID id.CheckpointID) ([]byte, string, error) { - result, err := s.ReadCommitted(context.Background(), cpID) + content, err := s.ReadLatestSessionContent(context.Background(), cpID) if err != nil { + if errors.Is(err, ErrCheckpointNotFound) { + return nil, "", ErrCheckpointNotFound + } return nil, "", fmt.Errorf("failed to read checkpoint: %w", err) } - if result == nil { - return nil, "", ErrCheckpointNotFound - } - if len(result.Transcript) == 0 { + if len(content.Transcript) == 0 { return nil, "", ErrNoTranscript } - return result.Transcript, result.Metadata.SessionID, nil + return content.Transcript, content.Metadata.SessionID, nil } // LookupSessionLog is a convenience function that opens the repository and retrieves diff --git a/cmd/entire/cli/explain.go b/cmd/entire/cli/explain.go index aa5914fe6..d7a9579f2 100644 --- a/cmd/entire/cli/explain.go +++ b/cmd/entire/cli/explain.go @@ -242,37 +242,40 @@ func runExplainCheckpoint(w, errW io.Writer, checkpointIDPrefix string, noPager, return fmt.Errorf("ambiguous checkpoint prefix %q matches %d checkpoints: %s", checkpointIDPrefix, len(matches), strings.Join(examples, ", ")) } - // Load checkpoint data - result, err := store.ReadCommitted(context.Background(), fullCheckpointID) + // Load checkpoint summary + summary, err := store.ReadCommitted(context.Background(), fullCheckpointID) if err != nil { return fmt.Errorf("failed to read checkpoint: %w", err) } - if result == nil { + if summary == nil { return fmt.Errorf("checkpoint not found: %s", fullCheckpointID) } + // Load latest session content (needed for transcript and metadata) + content, err := store.ReadLatestSessionContent(context.Background(), fullCheckpointID) + if err != nil { + return fmt.Errorf("failed to read checkpoint content: %w", err) + } + // Handle summary generation if generate { - if err := generateCheckpointSummary(w, errW, store, fullCheckpointID, result, force); err != nil { + if err := generateCheckpointSummary(w, errW, store, fullCheckpointID, summary, content, force); err != nil { return err } - // Reload the result to get the updated summary - result, err = store.ReadCommitted(context.Background(), fullCheckpointID) + // Reload the content to get the updated summary + content, err = store.ReadLatestSessionContent(context.Background(), fullCheckpointID) if err != nil { return fmt.Errorf("failed to reload checkpoint: %w", err) } - if result == nil { - return fmt.Errorf("checkpoint not found after save: %s", fullCheckpointID) - } } // Handle raw transcript output if rawTranscript { - if len(result.Transcript) == 0 { + if len(content.Transcript) == 0 { return fmt.Errorf("checkpoint %s has no transcript", fullCheckpointID) } // Output raw transcript directly (no pager, no formatting) - if _, err = w.Write(result.Transcript); err != nil { + if _, err = w.Write(content.Transcript); err != nil { return fmt.Errorf("failed to write transcript: %w", err) } return nil @@ -285,7 +288,7 @@ func runExplainCheckpoint(w, errW io.Writer, checkpointIDPrefix string, noPager, associatedCommits, _ := getAssociatedCommits(repo, fullCheckpointID, searchAll) //nolint:errcheck // Best-effort // Format and output - output := formatCheckpointOutput(result, fullCheckpointID, associatedCommits, author, verbose, full) + output := formatCheckpointOutput(summary, content, fullCheckpointID, associatedCommits, author, verbose, full) outputExplainContent(w, output, noPager) return nil } @@ -293,19 +296,19 @@ func runExplainCheckpoint(w, errW io.Writer, checkpointIDPrefix string, noPager, // generateCheckpointSummary generates an AI summary for a checkpoint and persists it. // The summary is generated from the scoped transcript (only this checkpoint's portion), // not the entire session transcript. -func generateCheckpointSummary(w, _ io.Writer, store *checkpoint.GitStore, checkpointID id.CheckpointID, result *checkpoint.ReadCommittedResult, force bool) error { +func generateCheckpointSummary(w, _ io.Writer, store *checkpoint.GitStore, checkpointID id.CheckpointID, cpSummary *checkpoint.CheckpointSummary, content *checkpoint.SessionContent, force bool) error { // Check if summary already exists - if result.Metadata.Summary != nil && !force { + if content.Metadata.Summary != nil && !force { return fmt.Errorf("checkpoint %s already has a summary (use --force to regenerate)", checkpointID) } // Check if transcript exists - if len(result.Transcript) == 0 { + if len(content.Transcript) == 0 { return fmt.Errorf("checkpoint %s has no transcript to summarize", checkpointID) } // Scope the transcript to only this checkpoint's portion - scopedTranscript := scopeTranscriptForCheckpoint(result.Transcript, result.Metadata.TranscriptLinesAtStart) + scopedTranscript := scopeTranscriptForCheckpoint(content.Transcript, content.Metadata.TranscriptLinesAtStart) if len(scopedTranscript) == 0 { return fmt.Errorf("checkpoint %s has no transcript content for this checkpoint (scoped)", checkpointID) } @@ -314,7 +317,7 @@ func generateCheckpointSummary(w, _ io.Writer, store *checkpoint.GitStore, check ctx := context.Background() logging.Info(ctx, "generating checkpoint summary") - summary, err := summarize.GenerateFromTranscript(ctx, scopedTranscript, result.Metadata.FilesTouched, nil) + summary, err := summarize.GenerateFromTranscript(ctx, scopedTranscript, cpSummary.FilesTouched, nil) if err != nil { return fmt.Errorf("failed to generate summary: %w", err) } @@ -578,14 +581,14 @@ func extractPromptsFromTranscript(transcriptBytes []byte) []string { // // Author is displayed when available (only for committed checkpoints). // Associated commits are git commits that reference this checkpoint via Entire-Checkpoint trailer. -func formatCheckpointOutput(result *checkpoint.ReadCommittedResult, checkpointID id.CheckpointID, associatedCommits []associatedCommit, author checkpoint.Author, verbose, full bool) string { +func formatCheckpointOutput(summary *checkpoint.CheckpointSummary, content *checkpoint.SessionContent, checkpointID id.CheckpointID, associatedCommits []associatedCommit, author checkpoint.Author, verbose, full bool) string { var sb strings.Builder - meta := result.Metadata + meta := content.Metadata // Scope the transcript to this checkpoint's portion // If TranscriptLinesAtStart > 0, we slice the transcript to only include // lines from that point onwards (excluding earlier checkpoint content) - scopedTranscript := scopeTranscriptForCheckpoint(result.Transcript, meta.TranscriptLinesAtStart) + scopedTranscript := scopeTranscriptForCheckpoint(content.Transcript, meta.TranscriptLinesAtStart) // Extract prompts from the scoped transcript for intent extraction scopedPrompts := extractPromptsFromTranscript(scopedTranscript) @@ -601,10 +604,14 @@ func formatCheckpointOutput(result *checkpoint.ReadCommittedResult, checkpointID fmt.Fprintf(&sb, "Author: %s <%s>\n", author.Name, author.Email) } - // Token usage - if meta.TokenUsage != nil { - totalTokens := meta.TokenUsage.InputTokens + meta.TokenUsage.CacheCreationTokens + - meta.TokenUsage.CacheReadTokens + meta.TokenUsage.OutputTokens + // Token usage - prefer content metadata, fall back to summary + tokenUsage := meta.TokenUsage + if tokenUsage == nil && summary != nil { + tokenUsage = summary.TokenUsage + } + if tokenUsage != nil { + totalTokens := tokenUsage.InputTokens + tokenUsage.CacheCreationTokens + + tokenUsage.CacheReadTokens + tokenUsage.OutputTokens fmt.Fprintf(&sb, "Tokens: %d\n", totalTokens) } @@ -632,9 +639,9 @@ func formatCheckpointOutput(result *checkpoint.ReadCommittedResult, checkpointID intent := "(not generated)" if len(scopedPrompts) > 0 && scopedPrompts[0] != "" { intent = strategy.TruncateDescription(scopedPrompts[0], maxIntentDisplayLength) - } else if result.Prompts != "" { + } else if content.Prompts != "" { // Backwards compatibility: use stored prompts if no transcript available - lines := strings.Split(result.Prompts, "\n") + lines := strings.Split(content.Prompts, "\n") if len(lines) > 0 && lines[0] != "" { intent = strategy.TruncateDescription(lines[0], maxIntentDisplayLength) } @@ -664,7 +671,7 @@ func formatCheckpointOutput(result *checkpoint.ReadCommittedResult, checkpointID } // Transcript section: full shows entire session, verbose shows checkpoint scope - appendTranscriptSection(&sb, verbose, full, result.Transcript, scopedTranscript, result.Prompts) + appendTranscriptSection(&sb, verbose, full, content.Transcript, scopedTranscript, content.Prompts) return sb.String() } @@ -920,12 +927,12 @@ func getBranchCheckpoints(repo *git.Repository, limit int) ([]strategy.RewindPoi Agent: cpInfo.Agent, } // Read session prompt from metadata branch (best-effort) - result, _ := store.ReadCommitted(context.Background(), cpID) //nolint:errcheck // Best-effort - if result != nil { + content, _ := store.ReadLatestSessionContent(context.Background(), cpID) //nolint:errcheck // Best-effort + if content != nil { // Scope the transcript to this checkpoint's portion // If TranscriptLinesAtStart > 0, we slice the transcript to only include // lines from that point onwards (excluding earlier checkpoint content) - scopedTranscript := scopeTranscriptForCheckpoint(result.Transcript, result.Metadata.TranscriptLinesAtStart) + scopedTranscript := scopeTranscriptForCheckpoint(content.Transcript, content.Metadata.TranscriptLinesAtStart) // Extract prompts from the scoped transcript (not the full session's prompts) scopedPrompts := extractPromptsFromTranscript(scopedTranscript) if len(scopedPrompts) > 0 && scopedPrompts[0] != "" { diff --git a/cmd/entire/cli/explain_test.go b/cmd/entire/cli/explain_test.go index 380ec60cc..71affe0c9 100644 --- a/cmd/entire/cli/explain_test.go +++ b/cmd/entire/cli/explain_test.go @@ -868,7 +868,16 @@ func TestRunExplainCheckpoint_NotFound(t *testing.T) { } func TestFormatCheckpointOutput_Short(t *testing.T) { - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + CheckpointsCount: 3, + FilesTouched: []string{"main.go", "util.go"}, + TokenUsage: &agent.TokenUsage{ + InputTokens: 10000, + OutputTokens: 5000, + }, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: "abc123def456", SessionID: "2026-01-21-test-session", @@ -883,8 +892,8 @@ func TestFormatCheckpointOutput_Short(t *testing.T) { Prompts: "Add a new feature", } - // Default mode: nil associated commits (not shown anyway in default mode) - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, false, false) + // Default mode: empty commit message (not shown anyway in default mode) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, false, false) // Should show checkpoint ID if !strings.Contains(output, "abc123def456") { @@ -921,24 +930,33 @@ func TestFormatCheckpointOutput_Verbose(t *testing.T) { {"type":"user","uuid":"u3","message":{"content":"Refactor the code"}} `) - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + CheckpointsCount: 3, + FilesTouched: []string{"main.go", "util.go", "config.yaml"}, + TokenUsage: &agent.TokenUsage{ + InputTokens: 10000, + OutputTokens: 5000, + }, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ - CheckpointID: "abc123def456", - SessionID: "2026-01-21-test-session", - CreatedAt: time.Date(2026, 1, 21, 10, 30, 0, 0, time.UTC), - FilesTouched: []string{"main.go", "util.go", "config.yaml"}, - CheckpointsCount: 3, + CheckpointID: "abc123def456", + SessionID: "2026-01-21-test-session", + CreatedAt: time.Date(2026, 1, 21, 10, 30, 0, 0, time.UTC), + FilesTouched: []string{"main.go", "util.go", "config.yaml"}, + CheckpointsCount: 3, + TranscriptLinesAtStart: 0, // All content is this checkpoint's TokenUsage: &agent.TokenUsage{ InputTokens: 10000, OutputTokens: 5000, }, - TranscriptLinesAtStart: 0, // All content is this checkpoint's }, Prompts: "Add a new feature\nFix the bug\nRefactor the code", Transcript: transcriptContent, } - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) // Should show checkpoint ID (like default) if !strings.Contains(output, "abc123def456") { @@ -971,8 +989,13 @@ func TestFormatCheckpointOutput_Verbose(t *testing.T) { } } -func TestFormatCheckpointOutput_Verbose_NilAssociatedCommits(t *testing.T) { - result := &checkpoint.ReadCommittedResult{ +func TestFormatCheckpointOutput_Verbose_NoCommitMessage(t *testing.T) { + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + CheckpointsCount: 1, + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: "abc123def456", SessionID: "2026-01-21-test-session", @@ -983,8 +1006,8 @@ func TestFormatCheckpointOutput_Verbose_NilAssociatedCommits(t *testing.T) { Prompts: "Add a feature", } - // When associated commits is nil (not searched), should not show Commits section at all - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) + // When commit message is empty, should not show Commit section + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) if strings.Contains(output, "Commits:") { t.Error("verbose output should not show Commits section when nil (not searched)") @@ -996,7 +1019,16 @@ func TestFormatCheckpointOutput_Full(t *testing.T) { transcriptData := `{"type":"user","message":{"content":"Add a new feature"}} {"type":"assistant","message":{"content":[{"type":"text","text":"I'll add that feature for you."}]}}` - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + CheckpointsCount: 3, + FilesTouched: []string{"main.go", "util.go"}, + TokenUsage: &agent.TokenUsage{ + InputTokens: 10000, + OutputTokens: 5000, + }, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: "abc123def456", SessionID: "2026-01-21-test-session", @@ -1012,7 +1044,7 @@ func TestFormatCheckpointOutput_Full(t *testing.T) { Transcript: []byte(transcriptData), } - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, false, true) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, false, true) // Should show checkpoint ID (like default) if !strings.Contains(output, "abc123def456") { @@ -1037,7 +1069,11 @@ func TestFormatCheckpointOutput_Full(t *testing.T) { func TestFormatCheckpointOutput_WithSummary(t *testing.T) { cpID := id.MustCheckpointID("abc123456789") - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: cpID, + FilesTouched: []string{"file1.go", "file2.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: cpID, SessionID: "2026-01-22-test-session", @@ -1059,7 +1095,7 @@ func TestFormatCheckpointOutput_WithSummary(t *testing.T) { } // Test default output (non-verbose) with summary - output := formatCheckpointOutput(result, cpID, nil, checkpoint.Author{}, false, false) + output := formatCheckpointOutput(summary, content, cpID, nil, checkpoint.Author{}, false, false) // Should show AI-generated intent and outcome if !strings.Contains(output, "Intent: Implement user authentication") { @@ -1074,7 +1110,7 @@ func TestFormatCheckpointOutput_WithSummary(t *testing.T) { } // Test verbose output with summary - verboseOutput := formatCheckpointOutput(result, cpID, nil, checkpoint.Author{}, true, false) + verboseOutput := formatCheckpointOutput(summary, content, cpID, nil, checkpoint.Author{}, true, false) // Verbose should show learnings sections if !strings.Contains(verboseOutput, "Learnings:") { @@ -2057,7 +2093,11 @@ func TestFormatCheckpointOutput_UsesScopedPrompts(t *testing.T) { {"type":"assistant","uuid":"a2","message":{"content":[{"type":"text","text":"Second response"}]}} `) - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: "abc123def456", SessionID: "2026-01-30-test-session", @@ -2070,7 +2110,7 @@ func TestFormatCheckpointOutput_UsesScopedPrompts(t *testing.T) { } // Verbose output should use scoped prompts - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) // Should show ONLY the second prompt (scoped) if !strings.Contains(output, "Second prompt - SHOULD appear") { @@ -2085,7 +2125,11 @@ func TestFormatCheckpointOutput_UsesScopedPrompts(t *testing.T) { func TestFormatCheckpointOutput_FallsBackToStoredPrompts(t *testing.T) { // Test backwards compatibility: when no transcript exists, use stored prompts - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: "abc123def456", SessionID: "2026-01-30-test-session", @@ -2098,7 +2142,7 @@ func TestFormatCheckpointOutput_FallsBackToStoredPrompts(t *testing.T) { } // Verbose output should fall back to stored prompts - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, true, false) // Intent should use stored prompt if !strings.Contains(output, "Stored prompt from older checkpoint") { @@ -2114,7 +2158,11 @@ func TestFormatCheckpointOutput_FullShowsEntireTranscript(t *testing.T) { {"type":"assistant","uuid":"a2","message":{"content":[{"type":"text","text":"Second response"}]}} `) - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ CheckpointID: "abc123def456", SessionID: "2026-01-30-test-session", @@ -2126,7 +2174,7 @@ func TestFormatCheckpointOutput_FullShowsEntireTranscript(t *testing.T) { } // Full mode should show the ENTIRE transcript (not scoped) - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, false, true) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, checkpoint.Author{}, false, true) // Should show the full transcript including first prompt (even though scoped prompts exclude it) if !strings.Contains(output, "First prompt") { @@ -2364,14 +2412,20 @@ func TestRunExplain_SessionWithCommitStillMutuallyExclusive(t *testing.T) { } func TestFormatCheckpointOutput_WithAuthor(t *testing.T) { - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ - CheckpointID: "abc123def456", - SessionID: "2026-01-30-test-session", - CreatedAt: time.Date(2026, 1, 30, 10, 30, 0, 0, time.UTC), - FilesTouched: []string{"main.go"}, + CheckpointID: "abc123def456", + SessionID: "2026-01-30-test-session", + CreatedAt: time.Date(2026, 1, 30, 10, 30, 0, 0, time.UTC), + FilesTouched: []string{"main.go"}, + TranscriptLinesAtStart: 0, }, - Prompts: "Add a new feature", + Prompts: "Add a new feature", + Transcript: nil, // No transcript available } author := checkpoint.Author{ @@ -2380,7 +2434,7 @@ func TestFormatCheckpointOutput_WithAuthor(t *testing.T) { } // With author, should show author line - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, author, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, author, true, false) if !strings.Contains(output, "Author: Alice Developer ") { t.Errorf("expected author line in output, got:\n%s", output) @@ -2388,20 +2442,27 @@ func TestFormatCheckpointOutput_WithAuthor(t *testing.T) { } func TestFormatCheckpointOutput_EmptyAuthor(t *testing.T) { - result := &checkpoint.ReadCommittedResult{ + // Test backwards compatibility: when no transcript exists, use stored prompts + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ - CheckpointID: "abc123def456", - SessionID: "2026-01-30-test-session", - CreatedAt: time.Date(2026, 1, 30, 10, 30, 0, 0, time.UTC), - FilesTouched: []string{"main.go"}, + CheckpointID: "abc123def456", + SessionID: "2026-01-30-test-session", + CreatedAt: time.Date(2026, 1, 30, 10, 30, 0, 0, time.UTC), + FilesTouched: []string{"main.go"}, + TranscriptLinesAtStart: 0, }, - Prompts: "Add a new feature", + Prompts: "Add a new feature", + Transcript: nil, // No transcript available } // Empty author - should not show author line author := checkpoint.Author{} - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), nil, author, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), nil, author, true, false) if strings.Contains(output, "Author:") { t.Errorf("expected no author line for empty author, got:\n%s", output) @@ -2645,14 +2706,20 @@ func TestGetAssociatedCommits_MultipleMatches(t *testing.T) { } func TestFormatCheckpointOutput_WithAssociatedCommits(t *testing.T) { - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ - CheckpointID: "abc123def456", - SessionID: "2026-02-04-test-session", - CreatedAt: time.Date(2026, 2, 4, 10, 30, 0, 0, time.UTC), - FilesTouched: []string{"main.go"}, + CheckpointID: "abc123def456", + SessionID: "2026-02-04-test-session", + CreatedAt: time.Date(2026, 2, 4, 10, 30, 0, 0, time.UTC), + FilesTouched: []string{"main.go"}, + TranscriptLinesAtStart: 0, }, - Prompts: "Add a new feature", + Prompts: "Add a new feature", + Transcript: nil, // No transcript available } associatedCommits := []associatedCommit{ @@ -2672,7 +2739,7 @@ func TestFormatCheckpointOutput_WithAssociatedCommits(t *testing.T) { }, } - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), associatedCommits, checkpoint.Author{}, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), associatedCommits, checkpoint.Author{}, true, false) // Should show commits section with count if !strings.Contains(output, "Commits: (2)") { @@ -2698,20 +2765,26 @@ func TestFormatCheckpointOutput_WithAssociatedCommits(t *testing.T) { } func TestFormatCheckpointOutput_NoCommitsOnBranch(t *testing.T) { - result := &checkpoint.ReadCommittedResult{ + summary := &checkpoint.CheckpointSummary{ + CheckpointID: id.MustCheckpointID("abc123def456"), + FilesTouched: []string{"main.go"}, + } + content := &checkpoint.SessionContent{ Metadata: checkpoint.CommittedMetadata{ - CheckpointID: "abc123def456", - SessionID: "2026-02-04-test-session", - CreatedAt: time.Date(2026, 2, 4, 10, 30, 0, 0, time.UTC), - FilesTouched: []string{"main.go"}, + CheckpointID: "abc123def456", + SessionID: "2026-02-04-test-session", + CreatedAt: time.Date(2026, 2, 4, 10, 30, 0, 0, time.UTC), + FilesTouched: []string{"main.go"}, + TranscriptLinesAtStart: 0, }, - Prompts: "Add a new feature", + Prompts: "Add a new feature", + Transcript: nil, // No transcript available } // No associated commits - use empty slice (not nil) to indicate "searched but found none" associatedCommits := []associatedCommit{} - output := formatCheckpointOutput(result, id.MustCheckpointID("abc123def456"), associatedCommits, checkpoint.Author{}, true, false) + output := formatCheckpointOutput(summary, content, id.MustCheckpointID("abc123def456"), associatedCommits, checkpoint.Author{}, true, false) // Should show message indicating no commits found if !strings.Contains(output, "Commits: No commits found on this branch") { diff --git a/cmd/entire/cli/strategy/auto_commit.go b/cmd/entire/cli/strategy/auto_commit.go index 652826358..310356c33 100644 --- a/cmd/entire/cli/strategy/auto_commit.go +++ b/cmd/entire/cli/strategy/auto_commit.go @@ -866,12 +866,12 @@ func (s *AutoCommitStrategy) GetSessionContext(sessionID string) string { return "" } - result, err := store.ReadCommitted(context.Background(), cp.CheckpointID) - if err != nil || result == nil { + content, err := store.ReadSessionContentByID(context.Background(), cp.CheckpointID, sessionID) + if err != nil || content == nil { return "" } - return result.Context + return content.Context } // GetCheckpointLog returns the session transcript for a specific checkpoint. @@ -886,15 +886,15 @@ func (s *AutoCommitStrategy) GetCheckpointLog(cp Checkpoint) ([]byte, error) { return nil, fmt.Errorf("failed to get checkpoint store: %w", err) } - result, err := store.ReadCommitted(context.Background(), cp.CheckpointID) + content, err := store.ReadLatestSessionContent(context.Background(), cp.CheckpointID) if err != nil { return nil, fmt.Errorf("failed to read checkpoint: %w", err) } - if result == nil { + if content == nil { return nil, ErrNoMetadata } - return result.Transcript, nil + return content.Transcript, nil } // InitializeSession creates session state for a new session. @@ -974,12 +974,12 @@ func (s *AutoCommitStrategy) ListOrphanedItems() ([]CleanupItem, error) { // Filter to only auto-commit checkpoints (identified by strategy in metadata) autoCommitCheckpoints := make(map[string]bool) for _, cp := range checkpoints { - result, readErr := cpStore.ReadCommitted(context.Background(), cp.CheckpointID) - if readErr != nil || result == nil { + summary, readErr := cpStore.ReadCommitted(context.Background(), cp.CheckpointID) + if readErr != nil || summary == nil { continue } // Only consider checkpoints created by this strategy - if result.Metadata.Strategy == StrategyNameAutoCommit { + if summary.Strategy == StrategyNameAutoCommit { autoCommitCheckpoints[cp.CheckpointID.String()] = true } } diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index cc76d56a8..86260e0d9 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -85,18 +85,18 @@ func (s *ManualCommitStrategy) getCheckpointLog(checkpointID id.CheckpointID) ([ return nil, fmt.Errorf("failed to get checkpoint store: %w", err) } - result, err := store.ReadCommitted(context.Background(), checkpointID) + content, err := store.ReadLatestSessionContent(context.Background(), checkpointID) if err != nil { return nil, fmt.Errorf("failed to read checkpoint: %w", err) } - if result == nil { + if content == nil { return nil, fmt.Errorf("checkpoint not found: %s", checkpointID) } - if len(result.Transcript) == 0 { + if len(content.Transcript) == 0 { return nil, fmt.Errorf("no transcript found for checkpoint: %s", checkpointID) } - return result.Transcript, nil + return content.Transcript, nil } // CondenseSession condenses a session's shadow branch to permanent storage. diff --git a/cmd/entire/cli/strategy/manual_commit_rewind.go b/cmd/entire/cli/strategy/manual_commit_rewind.go index 022ad3b27..8430ab542 100644 --- a/cmd/entire/cli/strategy/manual_commit_rewind.go +++ b/cmd/entire/cli/strategy/manual_commit_rewind.go @@ -636,17 +636,14 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(point RewindPoint, force bool) er return fmt.Errorf("failed to get checkpoint store: %w", err) } - // Read full checkpoint data including archived sessions - result, err := store.ReadCommitted(context.Background(), point.CheckpointID) + // Read checkpoint summary to get session count + summary, err := store.ReadCommitted(context.Background(), point.CheckpointID) if err != nil { return fmt.Errorf("failed to read checkpoint: %w", err) } - if result == nil { + if summary == nil { return fmt.Errorf("checkpoint not found: %s", point.CheckpointID) } - if len(result.Transcript) == 0 { - return fmt.Errorf("no transcript found for checkpoint: %s", point.CheckpointID) - } // Get repo root for Claude project path lookup repoRoot, err := paths.RepoRoot() @@ -666,7 +663,7 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(point RewindPoint, force bool) er // Check for newer local logs if not forcing if !force { - sessions := s.classifySessionsForRestore(claudeProjectDir, result) + sessions := s.classifySessionsForRestore(context.Background(), claudeProjectDir, store, point.CheckpointID, summary) hasConflicts := false for _, sess := range sessions { if sess.Status == StatusLocalNewer { @@ -687,21 +684,29 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(point RewindPoint, force bool) er } // Count sessions to restore - totalSessions := 1 + len(result.ArchivedSessions) + totalSessions := len(summary.Sessions) + if totalSessions == 0 { + totalSessions = 1 // backwards compatibility + } if totalSessions > 1 { fmt.Fprintf(os.Stderr, "Restoring %d sessions from checkpoint:\n", totalSessions) } - // Restore archived sessions first (oldest to newest) - for _, archived := range result.ArchivedSessions { - if len(archived.Transcript) == 0 { + // Restore all sessions (oldest to newest, using 0-based indexing) + for i := range totalSessions { + content, readErr := store.ReadSessionContent(context.Background(), point.CheckpointID, i) + if readErr != nil { + fmt.Fprintf(os.Stderr, " Warning: failed to read session %d: %v\n", i, readErr) + continue + } + if content == nil || len(content.Transcript) == 0 { continue } - sessionID := archived.SessionID + sessionID := content.Metadata.SessionID if sessionID == "" { // Fallback: can't identify session without ID - fmt.Fprintf(os.Stderr, " Warning: archived session %d has no session ID, skipping\n", archived.FolderIndex) + fmt.Fprintf(os.Stderr, " Warning: session %d has no session ID, skipping\n", i) continue } @@ -709,74 +714,34 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(point RewindPoint, force bool) er claudeSessionFile := filepath.Join(claudeProjectDir, modelSessionID+".jsonl") // Get first prompt for display - promptPreview := ExtractFirstPrompt(archived.Prompts) - if promptPreview != "" { - fmt.Fprintf(os.Stderr, " Session %d: %s\n", archived.FolderIndex, promptPreview) - } - - fmt.Fprintf(os.Stderr, " Writing to: %s\n", claudeSessionFile) - if err := os.WriteFile(claudeSessionFile, archived.Transcript, 0o600); err != nil { - fmt.Fprintf(os.Stderr, " Warning: failed to write transcript: %v\n", err) - continue - } - } - - // Restore the most recent session (at root level) - sessionID := result.Metadata.SessionID - if sessionID == "" { - // Fall back to extracting from commit's Entire-Session trailer - sessionID = s.extractSessionIDFromCommit(point.ID) - if sessionID == "" { - return errors.New("failed to determine session ID for latest session") + promptPreview := ExtractFirstPrompt(content.Prompts) + + if totalSessions > 1 { + isLatest := i == totalSessions-1 + if promptPreview != "" { + if isLatest { + fmt.Fprintf(os.Stderr, " Session %d (latest): %s\n", i+1, promptPreview) + } else { + fmt.Fprintf(os.Stderr, " Session %d: %s\n", i+1, promptPreview) + } + } + fmt.Fprintf(os.Stderr, " Writing to: %s\n", claudeSessionFile) + } else { + fmt.Fprintf(os.Stderr, "Writing transcript to: %s\n", claudeSessionFile) } - } - - modelSessionID := sessionid.ModelSessionID(sessionID) - claudeSessionFile := filepath.Join(claudeProjectDir, modelSessionID+".jsonl") - if totalSessions > 1 { - promptPreview := ExtractFirstPrompt(result.Prompts) - if promptPreview != "" { - fmt.Fprintf(os.Stderr, " Session %d (latest): %s\n", totalSessions, promptPreview) + if writeErr := os.WriteFile(claudeSessionFile, content.Transcript, 0o600); writeErr != nil { + if totalSessions > 1 { + fmt.Fprintf(os.Stderr, " Warning: failed to write transcript: %v\n", writeErr) + continue + } + return fmt.Errorf("failed to write transcript: %w", writeErr) } - fmt.Fprintf(os.Stderr, " Writing to: %s\n", claudeSessionFile) - } else { - fmt.Fprintf(os.Stderr, "Writing transcript to: %s\n", claudeSessionFile) - } - - if err := os.WriteFile(claudeSessionFile, result.Transcript, 0o600); err != nil { - return fmt.Errorf("failed to write transcript: %w", err) } return nil } -// extractSessionIDFromCommit extracts the session ID from a commit's Entire-Session trailer. -func (s *ManualCommitStrategy) extractSessionIDFromCommit(commitHash string) string { - repo, err := OpenRepository() - if err != nil { - return "" - } - - hash, err := repo.ResolveRevision(plumbing.Revision(commitHash)) - if err != nil { - return "" - } - - commit, err := repo.CommitObject(*hash) - if err != nil { - return "" - } - - // Parse Entire-Session trailer - sessionID, found := trailers.ParseSession(commit.Message) - if found { - return sessionID - } - - return "" -} - // readSessionPrompt reads the first prompt from the session's prompt.txt file stored in git. // Returns an empty string if the prompt cannot be read. func readSessionPrompt(repo *git.Repository, commitHash plumbing.Hash, metadataDir string) string { @@ -825,45 +790,38 @@ type SessionRestoreInfo struct { CheckpointTime time.Time } -// classifySessionsForRestore checks all sessions in a checkpoint result and returns info +// classifySessionsForRestore checks all sessions in a checkpoint and returns info // about each session, including whether local logs have newer timestamps. -func (s *ManualCommitStrategy) classifySessionsForRestore(claudeProjectDir string, result *cpkg.ReadCommittedResult) []SessionRestoreInfo { +func (s *ManualCommitStrategy) classifySessionsForRestore(ctx context.Context, claudeProjectDir string, store cpkg.Store, checkpointID id.CheckpointID, summary *cpkg.CheckpointSummary) []SessionRestoreInfo { var sessions []SessionRestoreInfo - // Check archived sessions - for _, archived := range result.ArchivedSessions { - if len(archived.Transcript) == 0 || archived.SessionID == "" { + totalSessions := len(summary.Sessions) + if totalSessions == 0 { + totalSessions = 1 // backwards compatibility + } + + // Check all sessions (0-based indexing) + for i := range totalSessions { + content, err := store.ReadSessionContent(ctx, checkpointID, i) + if err != nil || content == nil || len(content.Transcript) == 0 { continue } - modelSessionID := sessionid.ModelSessionID(archived.SessionID) - localPath := filepath.Join(claudeProjectDir, modelSessionID+".jsonl") - - localTime := paths.GetLastTimestampFromFile(localPath) - checkpointTime := paths.GetLastTimestampFromBytes(archived.Transcript) - status := ClassifyTimestamps(localTime, checkpointTime) - - sessions = append(sessions, SessionRestoreInfo{ - SessionID: archived.SessionID, - Prompt: ExtractFirstPrompt(archived.Prompts), - Status: status, - LocalTime: localTime, - CheckpointTime: checkpointTime, - }) - } + sessionID := content.Metadata.SessionID + if sessionID == "" { + continue + } - // Check primary session - if result.Metadata.SessionID != "" && len(result.Transcript) > 0 { - modelSessionID := sessionid.ModelSessionID(result.Metadata.SessionID) + modelSessionID := sessionid.ModelSessionID(sessionID) localPath := filepath.Join(claudeProjectDir, modelSessionID+".jsonl") localTime := paths.GetLastTimestampFromFile(localPath) - checkpointTime := paths.GetLastTimestampFromBytes(result.Transcript) + checkpointTime := paths.GetLastTimestampFromBytes(content.Transcript) status := ClassifyTimestamps(localTime, checkpointTime) sessions = append(sessions, SessionRestoreInfo{ - SessionID: result.Metadata.SessionID, - Prompt: ExtractFirstPrompt(result.Prompts), + SessionID: sessionID, + Prompt: ExtractFirstPrompt(content.Prompts), Status: status, LocalTime: localTime, CheckpointTime: checkpointTime, From 766731ce860bc6734d185fb3ee59b5bab4ad4880 Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 09:49:50 +1100 Subject: [PATCH 05/18] delete dead code --- cmd/entire/cli/session/session.go | 98 ---------- cmd/entire/cli/session/session_test.go | 246 ------------------------- cmd/entire/cli/session/state.go | 98 ---------- 3 files changed, 442 deletions(-) delete mode 100644 cmd/entire/cli/session/session.go delete mode 100644 cmd/entire/cli/session/session_test.go diff --git a/cmd/entire/cli/session/session.go b/cmd/entire/cli/session/session.go deleted file mode 100644 index ba8f28512..000000000 --- a/cmd/entire/cli/session/session.go +++ /dev/null @@ -1,98 +0,0 @@ -// Package session provides domain types and interfaces for managing AI coding sessions. -// -// A Session represents a unit of work with an AI agent (Claude Code, Cursor, etc.). -// Sessions can be nested - when a subagent runs, it creates a sub-session within -// the parent session. -// -// This package provides two levels of abstraction: -// -// 1. Sessions interface - High-level CRUD operations for full session objects, -// combining session state with checkpoint data. This is the primary interface -// for commands and the UI layer. -// -// 2. StateStore - Low-level primitive for managing session state files in -// .git/entire-sessions/. This tracks active session state (base commit, -// checkpoint count, etc.) but doesn't handle checkpoint content. Strategies -// use this directly for performance-critical state management. -// -// See docs/architecture/sessions-and-checkpoints.md for the full domain model. -package session - -import ( - "context" - "time" - - "entire.io/cli/cmd/entire/cli/agent" - "entire.io/cli/cmd/entire/cli/checkpoint" -) - -// Session represents a unit of work with an AI coding agent. -// Sessions can be nested when subagents are used. -type Session struct { - // ID is the unique session identifier - ID string - - // FirstPrompt is the raw first user prompt (immutable) - FirstPrompt string - - // Description is a human-readable summary (derived or editable) - Description string - - // StartTime is when the session was started - StartTime time.Time - - // AgentType identifies the AI agent (e.g., "Claude Code", "Cursor") - AgentType agent.AgentType - - // AgentSessionID is the agent's internal session identifier - AgentSessionID string - - // Checkpoints contains save points within this session - Checkpoints []checkpoint.Checkpoint - - // SubSessions contains nested sessions from subagent work - SubSessions []Session - - // ParentID is the parent session ID (empty for top-level sessions) - ParentID string - - // ToolUseID is the tool invocation that spawned this sub-session - // (empty for top-level sessions) - ToolUseID string -} - -// IsSubSession returns true if this session is a sub-session (has a parent). -func (s *Session) IsSubSession() bool { - return s.ParentID != "" -} - -// Sessions provides operations for managing sessions. -type Sessions interface { - // Create creates a new session with the given options. - Create(ctx context.Context, opts CreateSessionOptions) (*Session, error) - - // Get retrieves a session by ID. - Get(ctx context.Context, sessionID string) (*Session, error) - - // List returns all top-level sessions (excludes sub-sessions). - List(ctx context.Context) ([]Session, error) -} - -// CreateSessionOptions contains parameters for creating a new session. -type CreateSessionOptions struct { - // FirstPrompt is the initial user prompt - FirstPrompt string - - // AgentType identifies the AI agent (e.g., "Claude Code", "Cursor") - AgentType agent.AgentType - - // AgentSessionID is the agent's internal session identifier - AgentSessionID string - - // ParentID is the parent session ID for sub-sessions (empty for top-level) - ParentID string - - // ToolUseID is the tool invocation that spawned this sub-session - // (empty for top-level sessions) - ToolUseID string -} diff --git a/cmd/entire/cli/session/session_test.go b/cmd/entire/cli/session/session_test.go deleted file mode 100644 index a92b7924e..000000000 --- a/cmd/entire/cli/session/session_test.go +++ /dev/null @@ -1,246 +0,0 @@ -package session - -import ( - "context" - "os" - "os/exec" - "path/filepath" - "testing" - "time" -) - -func TestSession_IsSubSession(t *testing.T) { - tests := []struct { - name string - session Session - expected bool - }{ - { - name: "top-level session with empty ParentID", - session: Session{ - ID: "session-123", - ParentID: "", - }, - expected: false, - }, - { - name: "sub-session with ParentID set", - session: Session{ - ID: "session-456", - ParentID: "session-123", - ToolUseID: "toolu_abc", - }, - expected: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := tt.session.IsSubSession() - if result != tt.expected { - t.Errorf("IsSubSession() = %v, want %v", result, tt.expected) - } - }) - } -} - -func TestStateStore_RemoveAll(t *testing.T) { - // Create a temp directory for the state store - tmpDir := t.TempDir() - stateDir := filepath.Join(tmpDir, "entire-sessions") - - store := NewStateStoreWithDir(stateDir) - ctx := context.Background() - - // Create some session states - states := []*State{ - { - SessionID: "session-1", - BaseCommit: "abc123", - StartedAt: time.Now(), - }, - { - SessionID: "session-2", - BaseCommit: "def456", - StartedAt: time.Now(), - }, - { - SessionID: "session-3", - BaseCommit: "ghi789", - StartedAt: time.Now(), - }, - } - - for _, state := range states { - if err := store.Save(ctx, state); err != nil { - t.Fatalf("Save() error = %v", err) - } - } - - // Verify states were saved - savedStates, err := store.List(ctx) - if err != nil { - t.Fatalf("List() error = %v", err) - } - if len(savedStates) != len(states) { - t.Fatalf("List() returned %d states, want %d", len(savedStates), len(states)) - } - - // Verify directory exists - if _, err := os.Stat(stateDir); os.IsNotExist(err) { - t.Fatal("state directory should exist before RemoveAll()") - } - - // Remove all - if err := store.RemoveAll(); err != nil { - t.Fatalf("RemoveAll() error = %v", err) - } - - // Verify directory is removed - if _, err := os.Stat(stateDir); !os.IsNotExist(err) { - t.Error("state directory should not exist after RemoveAll()") - } - - // List should return empty (directory doesn't exist) - afterStates, err := store.List(ctx) - if err != nil { - t.Fatalf("List() after RemoveAll() error = %v", err) - } - if len(afterStates) != 0 { - t.Errorf("List() after RemoveAll() returned %d states, want 0", len(afterStates)) - } -} - -func TestStateStore_RemoveAll_EmptyDirectory(t *testing.T) { - // Create a temp directory for the state store - tmpDir := t.TempDir() - stateDir := filepath.Join(tmpDir, "entire-sessions") - - // Create the directory but don't add any files - if err := os.MkdirAll(stateDir, 0o750); err != nil { - t.Fatalf("failed to create state dir: %v", err) - } - - store := NewStateStoreWithDir(stateDir) - - // Remove all on empty directory should succeed - if err := store.RemoveAll(); err != nil { - t.Fatalf("RemoveAll() on empty directory error = %v", err) - } - - // Directory should be removed - if _, err := os.Stat(stateDir); !os.IsNotExist(err) { - t.Error("state directory should not exist after RemoveAll()") - } -} - -func TestStateStore_RemoveAll_NonExistentDirectory(t *testing.T) { - // Create a temp directory for the state store - tmpDir := t.TempDir() - stateDir := filepath.Join(tmpDir, "nonexistent-sessions") - - store := NewStateStoreWithDir(stateDir) - - // RemoveAll on non-existent directory should succeed (no-op) - if err := store.RemoveAll(); err != nil { - t.Fatalf("RemoveAll() on non-existent directory error = %v", err) - } -} - -func TestFindLegacyEntireSessionID(t *testing.T) { - // Create a temp git repo - tmpDir := t.TempDir() - t.Chdir(tmpDir) - - // Initialize git repo - cmd := exec.CommandContext(context.Background(), "git", "init") - if err := cmd.Run(); err != nil { - t.Fatalf("failed to init git repo: %v", err) - } - - // Create state directory with legacy-format session files - stateDir := filepath.Join(tmpDir, ".git", sessionStateDirName) - if err := os.MkdirAll(stateDir, 0o750); err != nil { - t.Fatalf("failed to create state dir: %v", err) - } - - t.Run("finds legacy session", func(t *testing.T) { - agentID := "abc123-def456" - legacySessionID := "2026-01-20-" + agentID - - // Create a legacy-format state file - stateFile := filepath.Join(stateDir, legacySessionID+".json") - if err := os.WriteFile(stateFile, []byte(`{"session_id":"`+legacySessionID+`"}`), 0o600); err != nil { - t.Fatalf("failed to write state file: %v", err) - } - defer os.Remove(stateFile) - - found := FindLegacyEntireSessionID(agentID) - if found != legacySessionID { - t.Errorf("FindLegacyEntireSessionID(%q) = %q, want %q", agentID, found, legacySessionID) - } - }) - - t.Run("returns empty for non-existent session", func(t *testing.T) { - found := FindLegacyEntireSessionID("nonexistent-session-id") - if found != "" { - t.Errorf("FindLegacyEntireSessionID(nonexistent) = %q, want empty string", found) - } - }) - - t.Run("returns empty for new-format session", func(t *testing.T) { - // Create a new-format state file (no date prefix) - newSessionID := "new-format-session-id" - stateFile := filepath.Join(stateDir, newSessionID+".json") - if err := os.WriteFile(stateFile, []byte(`{"session_id":"`+newSessionID+`"}`), 0o600); err != nil { - t.Fatalf("failed to write state file: %v", err) - } - defer os.Remove(stateFile) - - // Should not find it as "legacy" since it doesn't have date prefix - found := FindLegacyEntireSessionID(newSessionID) - if found != "" { - t.Errorf("FindLegacyEntireSessionID(new-format) = %q, want empty string", found) - } - }) - - t.Run("returns empty for empty agent ID", func(t *testing.T) { - found := FindLegacyEntireSessionID("") - if found != "" { - t.Errorf("FindLegacyEntireSessionID('') = %q, want empty string", found) - } - }) - - t.Run("returns empty for path traversal attempts", func(t *testing.T) { - // Should reject IDs with path traversal sequences - maliciousIDs := []string{ - "../../../etc/passwd", - "session/../../../etc", - "session/id", - "session.json/../..", - } - for _, id := range maliciousIDs { - found := FindLegacyEntireSessionID(id) - if found != "" { - t.Errorf("FindLegacyEntireSessionID(%q) = %q, want empty string (should be rejected)", id, found) - } - } - }) - - t.Run("ignores tmp files", func(t *testing.T) { - agentID := "tmp-test-id" - legacySessionID := "2026-01-21-" + agentID - - // Create a .tmp file (should be ignored) - tmpFile := filepath.Join(stateDir, legacySessionID+".json.tmp") - if err := os.WriteFile(tmpFile, []byte(`{"session_id":"`+legacySessionID+`"}`), 0o600); err != nil { - t.Fatalf("failed to write tmp file: %v", err) - } - defer os.Remove(tmpFile) - - found := FindLegacyEntireSessionID(agentID) - if found != "" { - t.Errorf("FindLegacyEntireSessionID should ignore .tmp files, got %q", found) - } - }) -} diff --git a/cmd/entire/cli/session/state.go b/cmd/entire/cli/session/state.go index 610081f80..f23359cd0 100644 --- a/cmd/entire/cli/session/state.go +++ b/cmd/entire/cli/session/state.go @@ -261,38 +261,6 @@ func (s *StateStore) List(ctx context.Context) ([]*State, error) { return states, nil } -// FindByBaseCommit finds all sessions based on the given commit hash. -func (s *StateStore) FindByBaseCommit(ctx context.Context, baseCommit string) ([]*State, error) { - allStates, err := s.List(ctx) - if err != nil { - return nil, err - } - - var matching []*State - for _, state := range allStates { - if state.BaseCommit == baseCommit { - matching = append(matching, state) - } - } - return matching, nil -} - -// FindByWorktree finds all sessions for the given worktree path. -func (s *StateStore) FindByWorktree(ctx context.Context, worktreePath string) ([]*State, error) { - allStates, err := s.List(ctx) - if err != nil { - return nil, err - } - - var matching []*State - for _, state := range allStates { - if state.WorktreePath == worktreePath { - matching = append(matching, state) - } - } - return matching, nil -} - // stateFilePath returns the path to a session state file. func (s *StateStore) stateFilePath(sessionID string) string { return filepath.Join(s.stateDir, sessionID+".json") @@ -320,69 +288,3 @@ func getGitCommonDir() (string, error) { return filepath.Clean(commonDir), nil } - -// GetWorktreePath returns the absolute path to the current worktree root. -func GetWorktreePath() (string, error) { - ctx := context.Background() - cmd := exec.CommandContext(ctx, "git", "rev-parse", "--show-toplevel") - output, err := cmd.Output() - if err != nil { - return "", fmt.Errorf("failed to get worktree path: %w", err) - } - return strings.TrimSpace(string(output)), nil -} - -// FindLegacyEntireSessionID checks for existing session state files with a legacy date-prefixed format. -// Takes an agent session ID and returns the corresponding entire session ID if found -// (e.g., "2026-01-20-abc123" for agent ID "abc123"), or empty string if no legacy session exists. -// -// This provides backward compatibility when resuming sessions that were created before -// the session ID format change (when EntireSessionID added a date prefix). -func FindLegacyEntireSessionID(agentSessionID string) string { - if agentSessionID == "" { - return "" - } - - // Validate ID format to prevent path traversal attacks - if err := validation.ValidateAgentSessionID(agentSessionID); err != nil { - return "" - } - - commonDir, err := getGitCommonDir() - if err != nil { - return "" - } - - stateDir := filepath.Join(commonDir, sessionStateDirName) - entries, err := os.ReadDir(stateDir) - if err != nil { - return "" - } - - // Look for state files with legacy date-prefixed format matching this agent ID - for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") { - continue - } - if strings.HasSuffix(entry.Name(), ".tmp") { - continue - } - - existingSessionID := strings.TrimSuffix(entry.Name(), ".json") - - // Check if this is a legacy format (has date prefix) that matches our agent ID - // Legacy format: YYYY-MM-DD- (11 char prefix) - if len(existingSessionID) > 11 && - existingSessionID[4] == '-' && - existingSessionID[7] == '-' && - existingSessionID[10] == '-' { - // Extract the agent ID portion and compare - extractedAgentID := existingSessionID[11:] - if extractedAgentID == agentSessionID { - return existingSessionID - } - } - } - - return "" -} From 6d382cdc9844b5d35ae7f6ee2b21a2e3ef1627ac Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 10:28:41 +1100 Subject: [PATCH 06/18] delete unneeded backwards compatibility on session lenght check --- cmd/entire/cli/strategy/manual_commit_rewind.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cmd/entire/cli/strategy/manual_commit_rewind.go b/cmd/entire/cli/strategy/manual_commit_rewind.go index 8430ab542..88998f9e4 100644 --- a/cmd/entire/cli/strategy/manual_commit_rewind.go +++ b/cmd/entire/cli/strategy/manual_commit_rewind.go @@ -685,9 +685,6 @@ func (s *ManualCommitStrategy) RestoreLogsOnly(point RewindPoint, force bool) er // Count sessions to restore totalSessions := len(summary.Sessions) - if totalSessions == 0 { - totalSessions = 1 // backwards compatibility - } if totalSessions > 1 { fmt.Fprintf(os.Stderr, "Restoring %d sessions from checkpoint:\n", totalSessions) } @@ -796,10 +793,6 @@ func (s *ManualCommitStrategy) classifySessionsForRestore(ctx context.Context, c var sessions []SessionRestoreInfo totalSessions := len(summary.Sessions) - if totalSessions == 0 { - totalSessions = 1 // backwards compatibility - } - // Check all sessions (0-based indexing) for i := range totalSessions { content, err := store.ReadSessionContent(ctx, checkpointID, i) From fb4cef604ab86e79850fb8b14ade33649873b1fe Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 10:30:55 +1100 Subject: [PATCH 07/18] delete dead code --- cmd/entire/cli/checkpoint/checkpoint_test.go | 56 -------------------- cmd/entire/cli/checkpoint/committed.go | 42 --------------- 2 files changed, 98 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index e89adeb38..b983f7f40 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -487,62 +487,6 @@ func verifyBranchInMetadata(t *testing.T, repo *git.Repository, checkpointID id. } } -// TestArchiveExistingSession_ChunkedTranscript verifies that when archiving -// a session with chunked transcripts, all chunk files are moved to the archive folder. -func TestArchiveExistingSession_ChunkedTranscript(t *testing.T) { - repo, _ := setupBranchTestRepo(t) - store := NewGitStore(repo) - - basePath := "a1/b2c3d4e5f6/" - - // Simulate existing checkpoint with chunked transcript - // Chunk 0 is the base file (full.jsonl), chunks 1+ have suffixes (.001, .002) - entries := map[string]object.TreeEntry{ - basePath + paths.MetadataFileName: {Name: basePath + paths.MetadataFileName, Hash: plumbing.NewHash("aaa")}, - basePath + paths.TranscriptFileName: {Name: basePath + paths.TranscriptFileName, Hash: plumbing.NewHash("bbb")}, // chunk 0 - basePath + paths.TranscriptFileName + ".001": {Name: basePath + paths.TranscriptFileName + ".001", Hash: plumbing.NewHash("ccc")}, // chunk 1 - basePath + paths.TranscriptFileName + ".002": {Name: basePath + paths.TranscriptFileName + ".002", Hash: plumbing.NewHash("ddd")}, // chunk 2 - basePath + paths.PromptFileName: {Name: basePath + paths.PromptFileName, Hash: plumbing.NewHash("eee")}, - basePath + paths.ContextFileName: {Name: basePath + paths.ContextFileName, Hash: plumbing.NewHash("fff")}, - basePath + paths.ContentHashFileName: {Name: basePath + paths.ContentHashFileName, Hash: plumbing.NewHash("ggg")}, - } - - // Archive the existing session (sessionCount = 1) - store.archiveExistingSession(basePath, 1, entries) - - archivePath := basePath + "1/" - - // Verify standard files were archived - if _, ok := entries[archivePath+paths.MetadataFileName]; !ok { - t.Error("metadata.json should be archived to 1/") - } - if _, ok := entries[archivePath+paths.TranscriptFileName]; !ok { - t.Error("full.jsonl (chunk 0) should be archived to 1/") - } - if _, ok := entries[archivePath+paths.PromptFileName]; !ok { - t.Error("prompt.txt should be archived to 1/") - } - - // Verify chunk files were archived - if _, ok := entries[archivePath+paths.TranscriptFileName+".001"]; !ok { - t.Error("full.jsonl.001 (chunk 1) should be archived to 1/") - } - if _, ok := entries[archivePath+paths.TranscriptFileName+".002"]; !ok { - t.Error("full.jsonl.002 (chunk 2) should be archived to 1/") - } - - // Verify original locations are cleared - if _, ok := entries[basePath+paths.TranscriptFileName]; ok { - t.Error("original full.jsonl should be removed from base path") - } - if _, ok := entries[basePath+paths.TranscriptFileName+".001"]; ok { - t.Error("original full.jsonl.001 should be removed from base path") - } - if _, ok := entries[basePath+paths.TranscriptFileName+".002"]; ok { - t.Error("original full.jsonl.002 should be removed from base path") - } -} - // TestWriteCommitted_BranchField verifies that the Branch field is correctly // captured in metadata.json when on a branch, and is empty when in detached HEAD. func TestWriteCommitted_BranchField(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index ac28242e9..fee4071a4 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -506,48 +506,6 @@ func (s *GitStore) readMetadataFromBlob(hash plumbing.Hash) (*CommittedMetadata, return readJSONFromBlob[CommittedMetadata](s.repo, hash) } -// archiveExistingSession moves existing session files to a numbered subfolder. -// The subfolder number is based on the current session count (so first archived session goes to "1/"). -func (s *GitStore) archiveExistingSession(basePath string, sessionCount int, entries map[string]object.TreeEntry) { - archivePath := fmt.Sprintf("%s%d/", basePath, sessionCount) - - // Files to archive (standard checkpoint files at basePath, excluding tasks/ subfolder) - filesToArchive := []string{ - paths.MetadataFileName, - paths.TranscriptFileName, - paths.PromptFileName, - paths.ContextFileName, - paths.ContentHashFileName, - } - - // Also include transcript chunk files (full.jsonl.001, full.jsonl.002, etc.) - chunkPrefix := basePath + paths.TranscriptFileName + "." - for srcPath := range entries { - if strings.HasPrefix(srcPath, chunkPrefix) { - chunkSuffix := strings.TrimPrefix(srcPath, basePath+paths.TranscriptFileName) - if idx := agent.ParseChunkIndex(paths.TranscriptFileName+chunkSuffix, paths.TranscriptFileName); idx > 0 { - filesToArchive = append(filesToArchive, paths.TranscriptFileName+chunkSuffix) - } - } - } - - // Move each file to archive folder - for _, filename := range filesToArchive { - srcPath := basePath + filename - if entry, exists := entries[srcPath]; exists { - // Add to archive location - dstPath := archivePath + filename - entries[dstPath] = object.TreeEntry{ - Name: dstPath, - Mode: entry.Mode, - Hash: entry.Hash, - } - // Remove from original location (will be overwritten by new session) - delete(entries, srcPath) - } - } -} - // buildCommitMessage constructs the commit message with proper trailers. // The commit subject is always "Checkpoint: " for consistency. // If CommitSubject is provided (e.g., for task checkpoints), it's included in the body. From 9571a23e043af41c8cc6daf522ed74931a2d21ea Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 10:37:34 +1100 Subject: [PATCH 08/18] add checkpoint tests --- cmd/entire/cli/checkpoint/checkpoint_test.go | 698 +++++++++++++++++++ 1 file changed, 698 insertions(+) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index b983f7f40..b9e1c6fcd 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -843,3 +843,701 @@ func TestGetCheckpointAuthor_NoSessionsBranch(t *testing.T) { t.Errorf("expected empty author when sessions branch doesn't exist, got Name=%q, Email=%q", author.Name, author.Email) } } + +// ============================================================================= +// Multi-Session Tests - Tests for checkpoint structure with CheckpointSummary +// at root level and sessions stored in numbered subfolders (0-based: 0/, 1/, 2/) +// ============================================================================= + +// TestWriteCommitted_MultipleSessionsSameCheckpoint verifies that writing multiple +// sessions to the same checkpoint ID creates separate numbered subdirectories. +func TestWriteCommitted_MultipleSessionsSameCheckpoint(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("a1a2a3a4a5a6") + + // Write first session + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "session-one", + Strategy: "manual-commit", + Transcript: []byte(`{"message": "first session"}`), + Prompts: []string{"First prompt"}, + FilesTouched: []string{"file1.go"}, + CheckpointsCount: 3, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() first session error = %v", err) + } + + // Write second session to the same checkpoint ID + err = store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "session-two", + Strategy: "manual-commit", + Transcript: []byte(`{"message": "second session"}`), + Prompts: []string{"Second prompt"}, + FilesTouched: []string{"file2.go"}, + CheckpointsCount: 2, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() second session error = %v", err) + } + + // Read the checkpoint summary + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + if summary == nil { + t.Fatal("ReadCommitted() returned nil summary") + } + + // Verify Sessions array has 2 entries + if len(summary.Sessions) != 2 { + t.Errorf("len(summary.Sessions) = %d, want 2", len(summary.Sessions)) + } + + // Verify both sessions have correct file paths (0-based indexing) + if !strings.Contains(summary.Sessions[0].Transcript, "/0/") { + t.Errorf("session 0 transcript path should contain '/0/', got %s", summary.Sessions[0].Transcript) + } + if !strings.Contains(summary.Sessions[1].Transcript, "/1/") { + t.Errorf("session 1 transcript path should contain '/1/', got %s", summary.Sessions[1].Transcript) + } + + // Verify session content can be read from each subdirectory + content0, err := store.ReadSessionContent(context.Background(), checkpointID, 0) + if err != nil { + t.Fatalf("ReadSessionContent(0) error = %v", err) + } + if content0.Metadata.SessionID != "session-one" { + t.Errorf("session 0 SessionID = %q, want %q", content0.Metadata.SessionID, "session-one") + } + + content1, err := store.ReadSessionContent(context.Background(), checkpointID, 1) + if err != nil { + t.Fatalf("ReadSessionContent(1) error = %v", err) + } + if content1.Metadata.SessionID != "session-two" { + t.Errorf("session 1 SessionID = %q, want %q", content1.Metadata.SessionID, "session-two") + } +} + +// TestWriteCommitted_Aggregation verifies that CheckpointSummary correctly +// aggregates statistics (CheckpointsCount, FilesTouched, TokenUsage) from +// multiple sessions written to the same checkpoint. +func TestWriteCommitted_Aggregation(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("b1b2b3b4b5b6") + + // Write first session with specific stats + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "session-one", + Strategy: "manual-commit", + Transcript: []byte(`{"message": "first"}`), + FilesTouched: []string{"a.go", "b.go"}, + CheckpointsCount: 3, + TokenUsage: &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 50, + APICallCount: 5, + }, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() first session error = %v", err) + } + + // Write second session with overlapping and new files + err = store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "session-two", + Strategy: "manual-commit", + Transcript: []byte(`{"message": "second"}`), + FilesTouched: []string{"b.go", "c.go"}, // b.go overlaps + CheckpointsCount: 2, + TokenUsage: &agent.TokenUsage{ + InputTokens: 50, + OutputTokens: 25, + APICallCount: 3, + }, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() second session error = %v", err) + } + + // Read the checkpoint summary + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + if summary == nil { + t.Fatal("ReadCommitted() returned nil summary") + } + + // Verify aggregated CheckpointsCount = 3 + 2 = 5 + if summary.CheckpointsCount != 5 { + t.Errorf("summary.CheckpointsCount = %d, want 5", summary.CheckpointsCount) + } + + // Verify merged FilesTouched = ["a.go", "b.go", "c.go"] (sorted, deduplicated) + expectedFiles := []string{"a.go", "b.go", "c.go"} + if len(summary.FilesTouched) != len(expectedFiles) { + t.Errorf("len(summary.FilesTouched) = %d, want %d", len(summary.FilesTouched), len(expectedFiles)) + } + for i, want := range expectedFiles { + if i >= len(summary.FilesTouched) { + break + } + if summary.FilesTouched[i] != want { + t.Errorf("summary.FilesTouched[%d] = %q, want %q", i, summary.FilesTouched[i], want) + } + } + + // Verify aggregated TokenUsage + if summary.TokenUsage == nil { + t.Fatal("summary.TokenUsage should not be nil") + } + if summary.TokenUsage.InputTokens != 150 { + t.Errorf("summary.TokenUsage.InputTokens = %d, want 150", summary.TokenUsage.InputTokens) + } + if summary.TokenUsage.OutputTokens != 75 { + t.Errorf("summary.TokenUsage.OutputTokens = %d, want 75", summary.TokenUsage.OutputTokens) + } + if summary.TokenUsage.APICallCount != 8 { + t.Errorf("summary.TokenUsage.APICallCount = %d, want 8", summary.TokenUsage.APICallCount) + } +} + +// TestReadCommitted_ReturnsCheckpointSummary verifies that ReadCommitted returns +// a CheckpointSummary with the correct structure including Sessions array. +func TestReadCommitted_ReturnsCheckpointSummary(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("c1c2c3c4c5c6") + + // Write two sessions + for i, sessionID := range []string{"session-alpha", "session-beta"} { + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sessionID, + Strategy: "manual-commit", + Transcript: []byte(fmt.Sprintf(`{"session": %d}`, i)), + Prompts: []string{fmt.Sprintf("Prompt %d", i)}, + Context: []byte(fmt.Sprintf("Context %d", i)), + FilesTouched: []string{fmt.Sprintf("file%d.go", i)}, + CheckpointsCount: i + 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() session %d error = %v", i, err) + } + } + + // Read the checkpoint summary + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + if summary == nil { + t.Fatal("ReadCommitted() returned nil summary") + } + + // Verify basic summary fields + if summary.CheckpointID != checkpointID { + t.Errorf("summary.CheckpointID = %v, want %v", summary.CheckpointID, checkpointID) + } + if summary.Strategy != "manual-commit" { + t.Errorf("summary.Strategy = %q, want %q", summary.Strategy, "manual-commit") + } + + // Verify Sessions array + if len(summary.Sessions) != 2 { + t.Fatalf("len(summary.Sessions) = %d, want 2", len(summary.Sessions)) + } + + // Verify file paths point to correct locations + for i, session := range summary.Sessions { + expectedSubdir := fmt.Sprintf("/%d/", i) + if !strings.Contains(session.Metadata, expectedSubdir) { + t.Errorf("session %d Metadata path should contain %q, got %q", i, expectedSubdir, session.Metadata) + } + if !strings.Contains(session.Transcript, expectedSubdir) { + t.Errorf("session %d Transcript path should contain %q, got %q", i, expectedSubdir, session.Transcript) + } + } +} + +// TestReadSessionContent_ByIndex verifies that ReadSessionContent can read +// specific sessions by their 0-based index within a checkpoint. +func TestReadSessionContent_ByIndex(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("d1d2d3d4d5d6") + + // Write two sessions with distinct content + sessions := []struct { + id string + transcript string + prompt string + }{ + {"session-first", `{"order": "first"}`, "First user prompt"}, + {"session-second", `{"order": "second"}`, "Second user prompt"}, + } + + for _, s := range sessions { + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: s.id, + Strategy: "manual-commit", + Transcript: []byte(s.transcript), + Prompts: []string{s.prompt}, + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() session %s error = %v", s.id, err) + } + } + + // Read session 0 + content0, err := store.ReadSessionContent(context.Background(), checkpointID, 0) + if err != nil { + t.Fatalf("ReadSessionContent(0) error = %v", err) + } + if content0.Metadata.SessionID != "session-first" { + t.Errorf("session 0 SessionID = %q, want %q", content0.Metadata.SessionID, "session-first") + } + if !strings.Contains(string(content0.Transcript), "first") { + t.Errorf("session 0 transcript should contain 'first', got %s", string(content0.Transcript)) + } + if !strings.Contains(content0.Prompts, "First") { + t.Errorf("session 0 prompts should contain 'First', got %s", content0.Prompts) + } + + // Read session 1 + content1, err := store.ReadSessionContent(context.Background(), checkpointID, 1) + if err != nil { + t.Fatalf("ReadSessionContent(1) error = %v", err) + } + if content1.Metadata.SessionID != "session-second" { + t.Errorf("session 1 SessionID = %q, want %q", content1.Metadata.SessionID, "session-second") + } + if !strings.Contains(string(content1.Transcript), "second") { + t.Errorf("session 1 transcript should contain 'second', got %s", string(content1.Transcript)) + } +} + +// writeSingleSession is a test helper that creates a store with a single session +// and returns the store and checkpoint ID for further testing. +func writeSingleSession(t *testing.T, cpIDStr, sessionID, transcript string) (*GitStore, id.CheckpointID) { + t.Helper() + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID(cpIDStr) + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sessionID, + Strategy: "manual-commit", + Transcript: []byte(transcript), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + return store, checkpointID +} + +// TestReadSessionContent_InvalidIndex verifies that ReadSessionContent returns +// an error when requesting a session index that doesn't exist. +func TestReadSessionContent_InvalidIndex(t *testing.T) { + store, checkpointID := writeSingleSession(t, "e1e2e3e4e5e6", "only-session", `{"single": true}`) + + // Try to read session index 1 (doesn't exist) + _, err := store.ReadSessionContent(context.Background(), checkpointID, 1) + if err == nil { + t.Error("ReadSessionContent(1) should return error for non-existent session") + } + if !strings.Contains(err.Error(), "session 1 not found") { + t.Errorf("error should mention session not found, got: %v", err) + } +} + +// TestReadLatestSessionContent verifies that ReadLatestSessionContent returns +// the content of the most recently added session (highest index). +func TestReadLatestSessionContent(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("f1f2f3f4f5f6") + + // Write three sessions + for i := range 3 { + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: fmt.Sprintf("session-%d", i), + Strategy: "manual-commit", + Transcript: []byte(fmt.Sprintf(`{"index": %d}`, i)), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() session %d error = %v", i, err) + } + } + + // Read latest session content + content, err := store.ReadLatestSessionContent(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadLatestSessionContent() error = %v", err) + } + + // Should return session 2 (0-indexed, so latest is index 2) + if content.Metadata.SessionID != "session-2" { + t.Errorf("latest session SessionID = %q, want %q", content.Metadata.SessionID, "session-2") + } + if !strings.Contains(string(content.Transcript), `"index": 2`) { + t.Errorf("latest session transcript should contain index 2, got %s", string(content.Transcript)) + } +} + +// TestReadSessionContentByID verifies that ReadSessionContentByID can find +// a session by its session ID rather than by index. +func TestReadSessionContentByID(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("010203040506") + + // Write two sessions with distinct IDs + sessionIDs := []string{"unique-id-alpha", "unique-id-beta"} + for i, sid := range sessionIDs { + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sid, + Strategy: "manual-commit", + Transcript: []byte(fmt.Sprintf(`{"session_name": "%s"}`, sid)), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() session %d error = %v", i, err) + } + } + + // Read by session ID + content, err := store.ReadSessionContentByID(context.Background(), checkpointID, "unique-id-beta") + if err != nil { + t.Fatalf("ReadSessionContentByID() error = %v", err) + } + + if content.Metadata.SessionID != "unique-id-beta" { + t.Errorf("SessionID = %q, want %q", content.Metadata.SessionID, "unique-id-beta") + } + if !strings.Contains(string(content.Transcript), "unique-id-beta") { + t.Errorf("transcript should contain session name, got %s", string(content.Transcript)) + } +} + +// TestReadSessionContentByID_NotFound verifies that ReadSessionContentByID +// returns an error when the session ID doesn't exist in the checkpoint. +func TestReadSessionContentByID_NotFound(t *testing.T) { + store, checkpointID := writeSingleSession(t, "111213141516", "existing-session", `{"exists": true}`) + + // Try to read non-existent session ID + _, err := store.ReadSessionContentByID(context.Background(), checkpointID, "nonexistent-session") + if err == nil { + t.Error("ReadSessionContentByID() should return error for non-existent session ID") + } + if !strings.Contains(err.Error(), "not found") { + t.Errorf("error should mention 'not found', got: %v", err) + } +} + +// TestListCommitted_MultiSessionInfo verifies that ListCommitted returns correct +// information for checkpoints with multiple sessions. +func TestListCommitted_MultiSessionInfo(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("212223242526") + + // Write two sessions to the same checkpoint + for i, sid := range []string{"list-session-1", "list-session-2"} { + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sid, + Strategy: "manual-commit", + Agent: agent.AgentTypeClaudeCode, + Transcript: []byte(fmt.Sprintf(`{"i": %d}`, i)), + FilesTouched: []string{fmt.Sprintf("file%d.go", i)}, + CheckpointsCount: i + 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() session %d error = %v", i, err) + } + } + + // List all checkpoints + checkpoints, err := store.ListCommitted(context.Background()) + if err != nil { + t.Fatalf("ListCommitted() error = %v", err) + } + + // Find our checkpoint + var found *CommittedInfo + for i := range checkpoints { + if checkpoints[i].CheckpointID == checkpointID { + found = &checkpoints[i] + break + } + } + if found == nil { + t.Fatal("checkpoint not found in ListCommitted() results") + } + + // Verify SessionCount = 2 + if found.SessionCount != 2 { + t.Errorf("SessionCount = %d, want 2", found.SessionCount) + } + + // Verify SessionID is from the latest session + if found.SessionID != "list-session-2" { + t.Errorf("SessionID = %q, want %q (latest session)", found.SessionID, "list-session-2") + } + + // Verify Agent comes from latest session metadata + if found.Agent != agent.AgentTypeClaudeCode { + t.Errorf("Agent = %q, want %q", found.Agent, agent.AgentTypeClaudeCode) + } +} + +// TestWriteCommitted_SessionWithNoPrompts verifies that a session can be +// written without prompts and still be read correctly. +func TestWriteCommitted_SessionWithNoPrompts(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("313233343536") + + // Write session without prompts + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "no-prompts-session", + Strategy: "manual-commit", + Transcript: []byte(`{"no_prompts": true}`), + Prompts: nil, // No prompts + Context: []byte("Some context"), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + // Read the session content + content, err := store.ReadSessionContent(context.Background(), checkpointID, 0) + if err != nil { + t.Fatalf("ReadSessionContent() error = %v", err) + } + + // Verify session metadata is correct + if content.Metadata.SessionID != "no-prompts-session" { + t.Errorf("SessionID = %q, want %q", content.Metadata.SessionID, "no-prompts-session") + } + + // Verify transcript is present + if len(content.Transcript) == 0 { + t.Error("Transcript should not be empty") + } + + // Verify prompts is empty + if content.Prompts != "" { + t.Errorf("Prompts should be empty, got %q", content.Prompts) + } + + // Verify context is present + if content.Context != "Some context" { + t.Errorf("Context = %q, want %q", content.Context, "Some context") + } +} + +// TestWriteCommitted_SessionWithNoContext verifies that a session can be +// written without context and still be read correctly. +func TestWriteCommitted_SessionWithNoContext(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("414243444546") + + // Write session without context + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "no-context-session", + Strategy: "manual-commit", + Transcript: []byte(`{"no_context": true}`), + Prompts: []string{"A prompt"}, + Context: nil, // No context + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + // Read the session content + content, err := store.ReadSessionContent(context.Background(), checkpointID, 0) + if err != nil { + t.Fatalf("ReadSessionContent() error = %v", err) + } + + // Verify session metadata is correct + if content.Metadata.SessionID != "no-context-session" { + t.Errorf("SessionID = %q, want %q", content.Metadata.SessionID, "no-context-session") + } + + // Verify transcript is present + if len(content.Transcript) == 0 { + t.Error("Transcript should not be empty") + } + + // Verify prompts is present + if !strings.Contains(content.Prompts, "A prompt") { + t.Errorf("Prompts should contain 'A prompt', got %q", content.Prompts) + } + + // Verify context is empty + if content.Context != "" { + t.Errorf("Context should be empty, got %q", content.Context) + } +} + +// TestWriteCommitted_ThreeSessions verifies the structure with three sessions +// to ensure the 0-based indexing works correctly throughout. +func TestWriteCommitted_ThreeSessions(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("515253545556") + + // Write three sessions + for i := range 3 { + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: fmt.Sprintf("three-session-%d", i), + Strategy: "manual-commit", + Transcript: []byte(fmt.Sprintf(`{"session_number": %d}`, i)), + FilesTouched: []string{fmt.Sprintf("s%d.go", i)}, + CheckpointsCount: i + 1, + TokenUsage: &agent.TokenUsage{ + InputTokens: 100 * (i + 1), + }, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() session %d error = %v", i, err) + } + } + + // Read summary + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Fatalf("ReadCommitted() error = %v", err) + } + + // Verify 3 sessions + if len(summary.Sessions) != 3 { + t.Errorf("len(summary.Sessions) = %d, want 3", len(summary.Sessions)) + } + + // Verify aggregated stats + // CheckpointsCount = 1 + 2 + 3 = 6 + if summary.CheckpointsCount != 6 { + t.Errorf("summary.CheckpointsCount = %d, want 6", summary.CheckpointsCount) + } + + // FilesTouched = [s0.go, s1.go, s2.go] + if len(summary.FilesTouched) != 3 { + t.Errorf("len(summary.FilesTouched) = %d, want 3", len(summary.FilesTouched)) + } + + // TokenUsage.InputTokens = 100 + 200 + 300 = 600 + if summary.TokenUsage == nil { + t.Fatal("summary.TokenUsage should not be nil") + } + if summary.TokenUsage.InputTokens != 600 { + t.Errorf("summary.TokenUsage.InputTokens = %d, want 600", summary.TokenUsage.InputTokens) + } + + // Verify each session can be read by index + for i := range 3 { + content, err := store.ReadSessionContent(context.Background(), checkpointID, i) + if err != nil { + t.Errorf("ReadSessionContent(%d) error = %v", i, err) + continue + } + expectedID := fmt.Sprintf("three-session-%d", i) + if content.Metadata.SessionID != expectedID { + t.Errorf("session %d SessionID = %q, want %q", i, content.Metadata.SessionID, expectedID) + } + } +} + +// TestReadCommitted_NonexistentCheckpoint verifies that ReadCommitted returns +// nil (not an error) when the checkpoint doesn't exist. +func TestReadCommitted_NonexistentCheckpoint(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + + // Ensure sessions branch exists + err := store.ensureSessionsBranch() + if err != nil { + t.Fatalf("ensureSessionsBranch() error = %v", err) + } + + // Try to read non-existent checkpoint + checkpointID := id.MustCheckpointID("ffffffffffff") + summary, err := store.ReadCommitted(context.Background(), checkpointID) + if err != nil { + t.Errorf("ReadCommitted() error = %v, want nil", err) + } + if summary != nil { + t.Errorf("ReadCommitted() = %v, want nil for non-existent checkpoint", summary) + } +} + +// TestReadSessionContent_NonexistentCheckpoint verifies that ReadSessionContent +// returns ErrCheckpointNotFound when the checkpoint doesn't exist. +func TestReadSessionContent_NonexistentCheckpoint(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + + // Ensure sessions branch exists + err := store.ensureSessionsBranch() + if err != nil { + t.Fatalf("ensureSessionsBranch() error = %v", err) + } + + // Try to read from non-existent checkpoint + checkpointID := id.MustCheckpointID("eeeeeeeeeeee") + _, err = store.ReadSessionContent(context.Background(), checkpointID, 0) + if !errors.Is(err, ErrCheckpointNotFound) { + t.Errorf("ReadSessionContent() error = %v, want ErrCheckpointNotFound", err) + } +} From b5cf65b7596ea76b8e90f4a94f4009070b056b8b Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 11:47:18 +1100 Subject: [PATCH 09/18] fix: add explicit returns after t.Fatal for staticcheck SA5011 staticcheck SA5011 flags "possible nil pointer dereference" after t.Fatal() checks because it doesn't recognize t.Fatal as terminating. Adding explicit return statements makes the control flow clear to the analyzer without needing nolint directives. Co-Authored-By: Claude Opus 4.5 --- cmd/entire/cli/checkpoint/checkpoint_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index b9e1c6fcd..ed6387cee 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -895,6 +895,7 @@ func TestWriteCommitted_MultipleSessionsSameCheckpoint(t *testing.T) { } if summary == nil { t.Fatal("ReadCommitted() returned nil summary") + return } // Verify Sessions array has 2 entries @@ -983,6 +984,7 @@ func TestWriteCommitted_Aggregation(t *testing.T) { } if summary == nil { t.Fatal("ReadCommitted() returned nil summary") + return } // Verify aggregated CheckpointsCount = 3 + 2 = 5 @@ -1052,6 +1054,7 @@ func TestReadCommitted_ReturnsCheckpointSummary(t *testing.T) { } if summary == nil { t.Fatal("ReadCommitted() returned nil summary") + return } // Verify basic summary fields @@ -1310,6 +1313,7 @@ func TestListCommitted_MultiSessionInfo(t *testing.T) { } if found == nil { t.Fatal("checkpoint not found in ListCommitted() results") + return } // Verify SessionCount = 2 From 08ba616bbeb6595450dabb067eac7716fce0ccf9 Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 13:49:15 +1100 Subject: [PATCH 10/18] improve migration script with dry-run mode and single-checkpoint support - Add comprehensive usage documentation at top of script - Add dry-run mode as default (use --apply to execute) - Add ability to migrate a single checkpoint by passing ID as argument - Add detailed format descriptions for old and new checkpoint structures - Add examples and rollback instructions Co-Authored-By: Claude Opus 4.5 Entire-Checkpoint: bd247e41d7dd --- scripts/migrate-sessions.sh | 629 ++++++++++++++++++++++++++---------- 1 file changed, 455 insertions(+), 174 deletions(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 3f40ace6c..0f80f094c 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -1,6 +1,86 @@ #!/bin/bash set -e +# +# migrate-sessions.sh - Migrate checkpoint data to v1 format +# +# USAGE: +# ./scripts/migrate-sessions.sh [OPTIONS] [CHECKPOINT_ID] +# +# OPTIONS: +# -h, --help Show this help message +# --apply Actually perform the migration (default is dry-run) +# +# ARGUMENTS: +# CHECKPOINT_ID Optional. Migrate only this checkpoint (e.g., "a1b2c3d4e5f6") +# If omitted, migrates all checkpoints from entire/sessions branch. +# +# DESCRIPTION: +# Migrates checkpoint data from the old format (latest session at root, archived +# sessions in numbered folders 1/, 2/, etc.) to the new v1 format (all sessions +# in 0-indexed folders 0/, 1/, 2/, with a CheckpointSummary at the root). +# +# The script reads from 'entire/sessions' and writes to 'entire/sessions/v1', +# leaving the original branch untouched as a backup. +# +# By default, runs in dry-run mode showing what would be migrated. +# Use --apply to actually perform the migration. +# +# OLD FORMAT: +# // +# ├── metadata.json # Session metadata (has session_id) +# ├── full.jsonl # Latest session transcript +# ├── prompt.txt +# ├── context.md +# └── 1/ # Archived session +# └── ... +# +# NEW FORMAT (v1): +# // +# ├── metadata.json # CheckpointSummary (aggregated stats + session paths) +# ├── 0/ # First session (was at root) +# │ ├── metadata.json # Session-specific metadata +# │ ├── full.jsonl +# │ └── ... +# └── 1/ # Second session (was 1/) +# └── ... +# +# PREREQUISITES: +# - jq (JSON processor) must be installed +# - Clean working tree (no uncommitted changes) +# - The entire/sessions branch must exist +# +# EXAMPLES: +# # Preview what would be migrated (dry-run) +# ./scripts/migrate-sessions.sh +# +# # Migrate all checkpoints +# ./scripts/migrate-sessions.sh --apply +# +# # Preview a single checkpoint +# ./scripts/migrate-sessions.sh a1b2c3d4e5f6 +# +# # Migrate a single checkpoint +# ./scripts/migrate-sessions.sh a1b2c3d4e5f6 --apply +# +# AFTER MIGRATION: +# 1. Verify the migration: +# git log entire/sessions/v1 +# git show entire/sessions/v1:/metadata.json +# +# 2. To switch to the new branch (DESTRUCTIVE - backup first!): +# git branch -m entire/sessions entire/sessions-backup +# git branch -m entire/sessions/v1 entire/sessions +# +# 3. Push the new branch: +# git push origin entire/sessions/v1 +# +# ROLLBACK: +# The original entire/sessions branch is not modified. If migration fails +# or produces incorrect results, simply delete the v1 branch: +# git branch -D entire/sessions/v1 +# + # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' @@ -10,22 +90,395 @@ NC='\033[0m' # No Color SOURCE_BRANCH="entire/sessions" TARGET_BRANCH="entire/sessions/v1" +# Parse arguments +DRY_RUN=true +CHECKPOINT_FILTER="" + +show_help() { + sed -n '3,/^$/p' "$0" | sed 's/^# \?//' + exit 0 +} + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_help + ;; + --apply) + DRY_RUN=false + shift + ;; + -*) + echo -e "${RED}Unknown option: $1${NC}" >&2 + echo "Use --help for usage information" >&2 + exit 1 + ;; + *) + if [[ -z "$CHECKPOINT_FILTER" ]]; then + CHECKPOINT_FILTER="$1" + else + echo -e "${RED}Too many arguments${NC}" >&2 + echo "Use --help for usage information" >&2 + exit 1 + fi + shift + ;; + esac +done + +# Check prerequisites +if ! command -v jq &> /dev/null; then + echo -e "${RED}Error: jq is required but not installed${NC}" >&2 + echo "Install with: brew install jq (macOS) or apt-get install jq (Linux)" >&2 + exit 1 +fi + +if ! git rev-parse --is-inside-work-tree &> /dev/null; then + echo -e "${RED}Error: Not inside a git repository${NC}" >&2 + exit 1 +fi + +if ! git show-ref --verify --quiet "refs/heads/$SOURCE_BRANCH"; then + echo -e "${RED}Error: Branch '$SOURCE_BRANCH' does not exist${NC}" >&2 + exit 1 +fi + +if [[ "$DRY_RUN" == "false" ]] && [[ -n $(git status --porcelain) ]]; then + echo -e "${RED}Error: Working tree is not clean${NC}" >&2 + echo "Please commit or stash your changes first" >&2 + exit 1 +fi + echo -e "${GREEN}=== Checkpoint Migration Script ===${NC}" echo "Source: $SOURCE_BRANCH" echo "Target: $TARGET_BRANCH" +if [[ -n "$CHECKPOINT_FILTER" ]]; then + echo "Filter: checkpoint $CHECKPOINT_FILTER only" +fi +if [[ "$DRY_RUN" == "true" ]]; then + echo -e "${YELLOW}DRY RUN - no changes will be made (use --apply to migrate)${NC}" +fi echo "" # Save current branch ORIGINAL_BRANCH=$(git branch --show-current) +# Convert checkpoint ID to path pattern (e.g., "a1b2c3d4e5f6" -> "a1/b2c3d4e5f6") +checkpoint_to_path() { + local id="$1" + echo "${id:0:2}/${id:2}" +} + +# Migrate a single checkpoint directory +# Args: $1 = checkpoint path (e.g., "a1/b2c3d4e5f6"), $2 = source dir, $3 = target dir +migrate_checkpoint() { + local CHECKPOINT_DIR="$1" + local SOURCE_DIR="$2" + local TARGET_DIR="$3" + local CHECKPOINT_PATH="$SOURCE_DIR/$CHECKPOINT_DIR" + + if [[ ! -f "$CHECKPOINT_PATH/metadata.json" ]]; then + echo " Skipping: no metadata.json" + return + fi + + local ROOT_META="$CHECKPOINT_PATH/metadata.json" + + # Check if this is session metadata (has session_id) or already aggregated + if jq -e '.session_id' "$ROOT_META" > /dev/null 2>&1; then + # This is session metadata at root - needs migration + migrate_old_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" + else + # Already aggregated format - copy but still transform session metadata + migrate_new_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" + fi +} + +# Migrate checkpoint from old format (session files at root) +migrate_old_format() { + local CHECKPOINT_DIR="$1" + local CHECKPOINT_PATH="$2" + local TARGET_DIR="$3" + local ROOT_META="$CHECKPOINT_PATH/metadata.json" + + # Find existing numbered subdirs + local EXISTING_SUBDIRS + EXISTING_SUBDIRS=$(find "$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*' | sort -t'/' -k3 -n -r || true) + + # Calculate next session number (renumber existing + 1 for root) + local NEXT_NUM=0 + + # Renumber existing subdirs (in reverse to avoid conflicts) + for SUBDIR in $EXISTING_SUBDIRS; do + local OLD_NUM + OLD_NUM=$(basename "$SUBDIR") + local NEW_NUM=$((OLD_NUM + 1)) + + # Copy to target with new number + mkdir -p "$TARGET_DIR/$CHECKPOINT_DIR/$NEW_NUM" + # Copy non-metadata files + for FILE in context.md prompt.txt content_hash.txt full.jsonl; do + if [[ -f "$SUBDIR/$FILE" ]]; then + cp "$SUBDIR/$FILE" "$TARGET_DIR/$CHECKPOINT_DIR/$NEW_NUM/" + fi + done + # Transform metadata.json: remove session_ids and session_count, convert agents array to single agent + if [[ -f "$SUBDIR/metadata.json" ]]; then + jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ + "$SUBDIR/metadata.json" > "$TARGET_DIR/$CHECKPOINT_DIR/$NEW_NUM/metadata.json" + fi + + if [[ $NEW_NUM -gt $NEXT_NUM ]]; then + NEXT_NUM=$NEW_NUM + fi + done + + # Move root session files to /0 + mkdir -p "$TARGET_DIR/$CHECKPOINT_DIR/0" + # Copy non-metadata files + for FILE in context.md prompt.txt content_hash.txt full.jsonl; do + if [[ -f "$CHECKPOINT_PATH/$FILE" ]]; then + cp "$CHECKPOINT_PATH/$FILE" "$TARGET_DIR/$CHECKPOINT_DIR/0/" + fi + done + # Transform metadata.json + if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then + jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ + "$CHECKPOINT_PATH/metadata.json" > "$TARGET_DIR/$CHECKPOINT_DIR/0/metadata.json" + fi + + # Calculate total sessions (NEXT_NUM is highest 0-based index, so count = NEXT_NUM + 1) + local TOTAL_SESSIONS=$((NEXT_NUM + 1)) + + # Build sessions array and aggregate data + local SESSIONS_JSON="[]" + local FILES_TOUCHED="[]" + local CHECKPOINTS_COUNT=0 + local INPUT_TOKENS=0 + local CACHE_CREATION=0 + local CACHE_READ=0 + local OUTPUT_TOKENS=0 + local API_CALLS=0 + + for i in $(seq 0 $((TOTAL_SESSIONS - 1))); do + local SESSION_DIR="$TARGET_DIR/$CHECKPOINT_DIR/$i" + if [[ -d "$SESSION_DIR" ]]; then + local SESSION_META="$SESSION_DIR/metadata.json" + + # Build session entry (paths are absolute from branch root) + local SESSION_ENTRY + SESSION_ENTRY=$(jq -n \ + --arg meta "/$CHECKPOINT_DIR/$i/metadata.json" \ + --arg transcript "/$CHECKPOINT_DIR/$i/full.jsonl" \ + --arg context "/$CHECKPOINT_DIR/$i/context.md" \ + --arg hash "/$CHECKPOINT_DIR/$i/content_hash.txt" \ + --arg prompt "/$CHECKPOINT_DIR/$i/prompt.txt" \ + '{metadata: $meta, transcript: $transcript, context: $context, content_hash: $hash, prompt: $prompt}') + + SESSIONS_JSON=$(echo "$SESSIONS_JSON" | jq --argjson entry "$SESSION_ENTRY" '. + [$entry]') + + # Aggregate from session metadata + if [[ -f "$SESSION_META" ]]; then + # Files touched (union) + local SESSION_FILES + SESSION_FILES=$(jq -r '.files_touched // []' "$SESSION_META") + FILES_TOUCHED=$(echo "$FILES_TOUCHED" "$SESSION_FILES" | jq -s 'add | unique') + + # Checkpoints count (sum) + CHECKPOINTS_COUNT=$((CHECKPOINTS_COUNT + $(jq -r '.checkpoints_count // 0' "$SESSION_META"))) + + # Token usage (sum) + INPUT_TOKENS=$((INPUT_TOKENS + $(jq -r '.token_usage.input_tokens // 0' "$SESSION_META"))) + CACHE_CREATION=$((CACHE_CREATION + $(jq -r '.token_usage.cache_creation_tokens // 0' "$SESSION_META"))) + CACHE_READ=$((CACHE_READ + $(jq -r '.token_usage.cache_read_tokens // 0' "$SESSION_META"))) + OUTPUT_TOKENS=$((OUTPUT_TOKENS + $(jq -r '.token_usage.output_tokens // 0' "$SESSION_META"))) + API_CALLS=$((API_CALLS + $(jq -r '.token_usage.api_call_count // 0' "$SESSION_META"))) + fi + fi + done + + # Get base info from original root metadata + local CHECKPOINT_ID STRATEGY BRANCH + CHECKPOINT_ID=$(jq -r '.checkpoint_id // ""' "$ROOT_META") + STRATEGY=$(jq -r '.strategy // "manual-commit"' "$ROOT_META") + BRANCH=$(jq -r '.branch // ""' "$ROOT_META") + + # Create aggregated metadata.json + jq -n \ + --arg checkpoint_id "$CHECKPOINT_ID" \ + --arg strategy "$STRATEGY" \ + --arg branch "$BRANCH" \ + --argjson checkpoints_count "$CHECKPOINTS_COUNT" \ + --argjson files_touched "$FILES_TOUCHED" \ + --argjson sessions "$SESSIONS_JSON" \ + --argjson input_tokens "$INPUT_TOKENS" \ + --argjson cache_creation "$CACHE_CREATION" \ + --argjson cache_read "$CACHE_READ" \ + --argjson output_tokens "$OUTPUT_TOKENS" \ + --argjson api_calls "$API_CALLS" \ + '{ + checkpoint_id: $checkpoint_id, + strategy: $strategy, + branch: $branch, + checkpoints_count: $checkpoints_count, + files_touched: $files_touched, + sessions: $sessions, + token_usage: { + input_tokens: $input_tokens, + cache_creation_tokens: $cache_creation, + cache_read_tokens: $cache_read, + output_tokens: $output_tokens, + api_call_count: $api_calls + } + }' > "$TARGET_DIR/$CHECKPOINT_DIR/metadata.json" + + echo " Migrated: $TOTAL_SESSIONS session(s)" +} + +# Migrate checkpoint that's already in new format (just transform paths) +migrate_new_format() { + local CHECKPOINT_DIR="$1" + local CHECKPOINT_PATH="$2" + local TARGET_DIR="$3" + + mkdir -p "$TARGET_DIR/$CHECKPOINT_DIR" + + # Transform root metadata.json to have absolute paths in sessions array + jq --arg prefix "/$CHECKPOINT_DIR" \ + '.sessions = [.sessions[] | { + metadata: ($prefix + "/" + (.metadata | ltrimstr("/"))), + transcript: ($prefix + "/" + (.transcript | ltrimstr("/"))), + context: ($prefix + "/" + (.context | ltrimstr("/"))), + content_hash: ($prefix + "/" + (.content_hash | ltrimstr("/"))), + prompt: ($prefix + "/" + (.prompt | ltrimstr("/"))) + }]' "$CHECKPOINT_PATH/metadata.json" > "$TARGET_DIR/$CHECKPOINT_DIR/metadata.json" + + # Copy and transform each session subdir's metadata.json + for SUBDIR in $(find "$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*'); do + local SUBDIR_NUM + SUBDIR_NUM=$(basename "$SUBDIR") + mkdir -p "$TARGET_DIR/$CHECKPOINT_DIR/$SUBDIR_NUM" + + # Copy non-metadata files + for FILE in context.md prompt.txt content_hash.txt full.jsonl; do + if [[ -f "$SUBDIR/$FILE" ]]; then + cp "$SUBDIR/$FILE" "$TARGET_DIR/$CHECKPOINT_DIR/$SUBDIR_NUM/" + fi + done + + # Transform metadata.json + if [[ -f "$SUBDIR/metadata.json" ]]; then + jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ + "$SUBDIR/metadata.json" > "$TARGET_DIR/$CHECKPOINT_DIR/$SUBDIR_NUM/metadata.json" + fi + done + echo " Copied with session metadata transformed" +} + +# Single checkpoint migration mode +if [[ -n "$CHECKPOINT_FILTER" ]]; then + CHECKPOINT_PATH=$(checkpoint_to_path "$CHECKPOINT_FILTER") + echo -e "${GREEN}Migrating single checkpoint: $CHECKPOINT_FILTER${NC}" + echo " Path: $CHECKPOINT_PATH" + + # Create temp dir and checkout source + TEMP_DIR=$(mktemp -d) + git worktree add --detach "$TEMP_DIR" "$SOURCE_BRANCH" 2>/dev/null + + if [[ ! -d "$TEMP_DIR/$CHECKPOINT_PATH" ]]; then + git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" + echo -e "${RED}Error: Checkpoint $CHECKPOINT_FILTER not found on $SOURCE_BRANCH${NC}" >&2 + exit 1 + fi + + # Show checkpoint info + if jq -e '.session_id' "$TEMP_DIR/$CHECKPOINT_PATH/metadata.json" > /dev/null 2>&1; then + echo " Format: old (session files at root) -> needs migration" + SESSION_COUNT=$(find "$TEMP_DIR/$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*' | wc -l | tr -d ' ') + echo " Sessions: $((SESSION_COUNT + 1)) (1 at root + $SESSION_COUNT archived)" + else + echo " Format: new (already has CheckpointSummary)" + fi + + if [[ "$DRY_RUN" == "true" ]]; then + git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" + echo "" + echo -e "${YELLOW}Run with --apply to perform migration${NC}" + exit 0 + fi + + # Ensure target branch exists + if ! git show-ref --verify --quiet "refs/heads/$TARGET_BRANCH"; then + echo -e "${GREEN}Creating target branch $TARGET_BRANCH...${NC}" + git checkout "$SOURCE_BRANCH" + git checkout --orphan "$TARGET_BRANCH" + git commit --allow-empty -m "Initialize metadata branch (v1)" + git checkout "$SOURCE_BRANCH" + fi + + # Checkout target branch + git checkout "$TARGET_BRANCH" + + # Migrate the checkpoint + migrate_checkpoint "$CHECKPOINT_PATH" "$TEMP_DIR" "$(pwd)" + + # Cleanup + git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" + + # Commit + git add "$CHECKPOINT_PATH" + if ! git diff --cached --quiet; then + git commit -m "Migrate checkpoint: $CHECKPOINT_FILTER" + echo -e "${GREEN}Committed${NC}" + else + echo -e "${YELLOW}No changes${NC}" + fi + + git checkout "$ORIGINAL_BRANCH" 2>/dev/null || git checkout main + echo "" + echo -e "${GREEN}=== Migration Complete ===${NC}" + exit 0 +fi + +# Full migration mode - process all commits # Get list of commits from source branch (oldest first, excluding initial commit) COMMITS=$(git log --reverse --format="%H" "$SOURCE_BRANCH" | tail -n +2) INIT_COMMIT=$(git log --reverse --format="%H" "$SOURCE_BRANCH" | head -1) -echo -e "${YELLOW}Found commits to process:${NC}" +COMMIT_COUNT=$(echo "$COMMITS" | wc -l | tr -d ' ') +echo -e "${YELLOW}Found $COMMIT_COUNT commits to process:${NC}" git log --reverse --oneline "$SOURCE_BRANCH" | tail -n +2 echo "" +if [[ "$DRY_RUN" == "true" ]]; then + # In dry-run, show what checkpoints exist + TEMP_DIR=$(mktemp -d) + git worktree add --detach "$TEMP_DIR" "$SOURCE_BRANCH" 2>/dev/null + + cd "$TEMP_DIR" + CHECKPOINT_DIRS=$(find . -maxdepth 2 -mindepth 2 -type d | grep -E '^\./[0-9a-f]{2}/[0-9a-f]+$' || true) + CHECKPOINT_COUNT=$(echo "$CHECKPOINT_DIRS" | grep -c . || echo 0) + + echo -e "${YELLOW}Found $CHECKPOINT_COUNT checkpoints on $SOURCE_BRANCH:${NC}" + for CHECKPOINT_PATH in $CHECKPOINT_DIRS; do + CHECKPOINT_DIR="${CHECKPOINT_PATH#./}" + if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then + if jq -e '.session_id' "$CHECKPOINT_PATH/metadata.json" > /dev/null 2>&1; then + echo " $CHECKPOINT_DIR (old format)" + else + echo " $CHECKPOINT_DIR (new format)" + fi + fi + done + + cd "$OLDPWD" + git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" + + echo "" + echo -e "${YELLOW}Run with --apply to perform migration${NC}" + exit 0 +fi + # Create orphan target branch from init commit echo -e "${GREEN}Creating target branch $TARGET_BRANCH...${NC}" git checkout "$SOURCE_BRANCH" @@ -60,179 +513,7 @@ for COMMIT in $COMMITS; do # Track this directory for git add later PROCESSED_DIRS="$PROCESSED_DIRS $CHECKPOINT_DIR" - # Create checkpoint dir in target if not exists - mkdir -p "$OLDPWD/$CHECKPOINT_DIR" - - # Check if root has session files (metadata.json with session_id) - if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then - ROOT_META="$CHECKPOINT_PATH/metadata.json" - - # Check if this is session metadata (has session_id) or already aggregated - if jq -e '.session_id' "$ROOT_META" > /dev/null 2>&1; then - # This is session metadata at root - needs migration - - # Find existing numbered subdirs - EXISTING_SUBDIRS=$(find "$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*' | sort -t'/' -k3 -n -r || true) - - # Calculate next session number (renumber existing + 1 for root) - NEXT_NUM=0 - - # Renumber existing subdirs (in reverse to avoid conflicts) - for SUBDIR in $EXISTING_SUBDIRS; do - OLD_NUM=$(basename "$SUBDIR") - NEW_NUM=$((OLD_NUM + 1)) - - # Copy to target with new number - mkdir -p "$OLDPWD/$CHECKPOINT_DIR/$NEW_NUM" - # Copy non-metadata files - for FILE in context.md prompt.txt content_hash.txt full.jsonl; do - if [[ -f "$SUBDIR/$FILE" ]]; then - cp "$SUBDIR/$FILE" "$OLDPWD/$CHECKPOINT_DIR/$NEW_NUM/" - fi - done - # Transform metadata.json: agents to string, remove session_id and session_count - if [[ -f "$SUBDIR/metadata.json" ]]; then - jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ - "$SUBDIR/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/$NEW_NUM/metadata.json" - fi - - if [[ $NEW_NUM -gt $NEXT_NUM ]]; then - NEXT_NUM=$NEW_NUM - fi - done - - # Move root session files to /0 - mkdir -p "$OLDPWD/$CHECKPOINT_DIR/0" - # Copy non-metadata files - for FILE in context.md prompt.txt content_hash.txt full.jsonl; do - if [[ -f "$CHECKPOINT_PATH/$FILE" ]]; then - cp "$CHECKPOINT_PATH/$FILE" "$OLDPWD/$CHECKPOINT_DIR/0/" - fi - done - # Transform metadata.json: agents to string, remove session_id and session_count - if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then - jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ - "$CHECKPOINT_PATH/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/0/metadata.json" - fi - - # Calculate total sessions (NEXT_NUM is highest 0-based index, so count = NEXT_NUM + 1) - TOTAL_SESSIONS=$((NEXT_NUM + 1)) - - # Build sessions array and aggregate data - SESSIONS_JSON="[]" - FILES_TOUCHED="[]" - CHECKPOINTS_COUNT=0 - INPUT_TOKENS=0 - CACHE_CREATION=0 - CACHE_READ=0 - OUTPUT_TOKENS=0 - API_CALLS=0 - EARLIEST_DATE="" - - for i in $(seq 0 $((TOTAL_SESSIONS - 1))); do - SESSION_DIR="$OLDPWD/$CHECKPOINT_DIR/$i" - if [[ -d "$SESSION_DIR" ]]; then - SESSION_META="$SESSION_DIR/metadata.json" - - # Build session entry (paths are absolute from branch root) - SESSION_ENTRY=$(jq -n \ - --arg meta "/$CHECKPOINT_DIR/$i/metadata.json" \ - --arg transcript "/$CHECKPOINT_DIR/$i/full.jsonl" \ - --arg context "/$CHECKPOINT_DIR/$i/context.md" \ - --arg hash "/$CHECKPOINT_DIR/$i/content_hash.txt" \ - --arg prompt "/$CHECKPOINT_DIR/$i/prompt.txt" \ - '{metadata: $meta, transcript: $transcript, context: $context, content_hash: $hash, prompt: $prompt}') - - SESSIONS_JSON=$(echo "$SESSIONS_JSON" | jq --argjson entry "$SESSION_ENTRY" '. + [$entry]') - - # Aggregate from session metadata - if [[ -f "$SESSION_META" ]]; then - # Files touched (union) - SESSION_FILES=$(jq -r '.files_touched // []' "$SESSION_META") - FILES_TOUCHED=$(echo "$FILES_TOUCHED" "$SESSION_FILES" | jq -s 'add | unique') - - # Checkpoints count (sum) - CHECKPOINTS_COUNT=$((CHECKPOINTS_COUNT + $(jq -r '.checkpoints_count // 0' "$SESSION_META"))) - - # Token usage (sum) - INPUT_TOKENS=$((INPUT_TOKENS + $(jq -r '.token_usage.input_tokens // 0' "$SESSION_META"))) - CACHE_CREATION=$((CACHE_CREATION + $(jq -r '.token_usage.cache_creation_tokens // 0' "$SESSION_META"))) - CACHE_READ=$((CACHE_READ + $(jq -r '.token_usage.cache_read_tokens // 0' "$SESSION_META"))) - OUTPUT_TOKENS=$((OUTPUT_TOKENS + $(jq -r '.token_usage.output_tokens // 0' "$SESSION_META"))) - API_CALLS=$((API_CALLS + $(jq -r '.token_usage.api_call_count // 0' "$SESSION_META"))) - - fi - fi - done - - # Get base info from original root metadata - CHECKPOINT_ID=$(jq -r '.checkpoint_id // ""' "$ROOT_META") - STRATEGY=$(jq -r '.strategy // "manual-commit"' "$ROOT_META") - BRANCH=$(jq -r '.branch // ""' "$ROOT_META") - - # Create aggregated metadata.json - jq -n \ - --arg checkpoint_id "$CHECKPOINT_ID" \ - --arg strategy "$STRATEGY" \ - --arg branch "$BRANCH" \ - --argjson checkpoints_count "$CHECKPOINTS_COUNT" \ - --argjson files_touched "$FILES_TOUCHED" \ - --argjson sessions "$SESSIONS_JSON" \ - --argjson input_tokens "$INPUT_TOKENS" \ - --argjson cache_creation "$CACHE_CREATION" \ - --argjson cache_read "$CACHE_READ" \ - --argjson output_tokens "$OUTPUT_TOKENS" \ - --argjson api_calls "$API_CALLS" \ - '{ - checkpoint_id: $checkpoint_id, - strategy: $strategy, - branch: $branch, - checkpoints_count: $checkpoints_count, - files_touched: $files_touched, - sessions: $sessions, - token_usage: { - input_tokens: $input_tokens, - cache_creation_tokens: $cache_creation, - cache_read_tokens: $cache_read, - output_tokens: $output_tokens, - api_call_count: $api_calls - } - }' > "$OLDPWD/$CHECKPOINT_DIR/metadata.json" - - echo " Migrated: $TOTAL_SESSIONS session(s)" - else - # Already aggregated format - copy but still transform session metadata - # Transform root metadata.json to have absolute paths in sessions array - jq --arg prefix "/$CHECKPOINT_DIR" \ - '.sessions = [.sessions[] | { - metadata: ($prefix + "/" + (.metadata | ltrimstr("/"))), - transcript: ($prefix + "/" + (.transcript | ltrimstr("/"))), - context: ($prefix + "/" + (.context | ltrimstr("/"))), - content_hash: ($prefix + "/" + (.content_hash | ltrimstr("/"))), - prompt: ($prefix + "/" + (.prompt | ltrimstr("/"))) - }]' "$CHECKPOINT_PATH/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/metadata.json" - - # Copy and transform each session subdir's metadata.json - for SUBDIR in $(find "$CHECKPOINT_PATH" -maxdepth 1 -mindepth 1 -type d -name '[0-9]*'); do - SUBDIR_NUM=$(basename "$SUBDIR") - mkdir -p "$OLDPWD/$CHECKPOINT_DIR/$SUBDIR_NUM" - - # Copy non-metadata files - for FILE in context.md prompt.txt content_hash.txt full.jsonl; do - if [[ -f "$SUBDIR/$FILE" ]]; then - cp "$SUBDIR/$FILE" "$OLDPWD/$CHECKPOINT_DIR/$SUBDIR_NUM/" - fi - done - - # Transform metadata.json - if [[ -f "$SUBDIR/metadata.json" ]]; then - jq 'del(.session_ids, .session_count) | if .agents | type == "array" then .agents = .agents[0] else . end' \ - "$SUBDIR/metadata.json" > "$OLDPWD/$CHECKPOINT_DIR/$SUBDIR_NUM/metadata.json" - fi - done - echo " Copied with session metadata transformed" - fi - fi + migrate_checkpoint "$CHECKPOINT_DIR" "$TEMP_DIR" "$OLDPWD" done cd "$OLDPWD" From ddc186e73f53841407e0d95718983c8dad058028 Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 13:26:20 +1100 Subject: [PATCH 11/18] point at entire/sessions/v1 Entire-Checkpoint: be28ae5f33d2 --- .claude/skills/test-repo/test-harness.sh | 4 +- cmd/entire/cli/checkpoint/temporary_test.go | 12 ++++-- cmd/entire/cli/clean_test.go | 14 +++---- .../cli/integration_test/attribution_test.go | 7 ++-- .../auto_commit_checkpoint_fix_test.go | 5 ++- .../manual_commit_workflow_test.go | 42 +++++++++---------- cmd/entire/cli/paths/paths.go | 2 +- cmd/entire/cli/strategy/clean_test.go | 9 ++-- cmd/entire/cli/strategy/cleanup.go | 6 +-- scripts/test-attribution-e2e.sh | 24 +++++------ 10 files changed, 66 insertions(+), 59 deletions(-) diff --git a/.claude/skills/test-repo/test-harness.sh b/.claude/skills/test-repo/test-harness.sh index 3d997bc80..5fd36ba85 100755 --- a/.claude/skills/test-repo/test-harness.sh +++ b/.claude/skills/test-repo/test-harness.sh @@ -138,9 +138,9 @@ verify-metadata-branch) echo "==> Verifying metadata branch..." cd "$REPO_DIR" - if git branch -a | grep "entire/sessions"; then + if git branch -a | grep "entire/sessions/v1"; then echo "✓ Metadata branch exists" - git show entire/sessions --stat | head -20 + git show entire/sessions/v1 --stat | head -20 else echo "✗ Metadata branch not found" exit 1 diff --git a/cmd/entire/cli/checkpoint/temporary_test.go b/cmd/entire/cli/checkpoint/temporary_test.go index fbfaef854..0833b9c4a 100644 --- a/cmd/entire/cli/checkpoint/temporary_test.go +++ b/cmd/entire/cli/checkpoint/temporary_test.go @@ -1,6 +1,10 @@ package checkpoint -import "testing" +import ( + "testing" + + "entire.io/cli/cmd/entire/cli/paths" +) func TestHashWorktreeID(t *testing.T) { tests := []struct { @@ -129,9 +133,9 @@ func TestParseShadowBranchName(t *testing.T) { wantOK: false, }, { - name: "entire/sessions is not a shadow branch", - branchName: "entire/sessions", - wantCommit: "sessions", + name: "entire/sessions/v1 is not a shadow branch", + branchName: paths.MetadataBranchName, + wantCommit: "sessions/v1", wantWorktree: "", wantOK: true, // Parser doesn't validate content, just extracts }, diff --git a/cmd/entire/cli/clean_test.go b/cmd/entire/cli/clean_test.go index ff13d8610..3f3b46337 100644 --- a/cmd/entire/cli/clean_test.go +++ b/cmd/entire/cli/clean_test.go @@ -93,9 +93,9 @@ func TestRunClean_PreviewMode(t *testing.T) { } // Also create entire/sessions (should NOT be listed) - sessionsRef := plumbing.NewHashReference(plumbing.NewBranchReferenceName("entire/sessions"), commitHash) + sessionsRef := plumbing.NewHashReference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), commitHash) if err := repo.Storer.SetReference(sessionsRef); err != nil { - t.Fatalf("failed to create entire/sessions: %v", err) + t.Fatalf("failed to create %s: %v", paths.MetadataBranchName, err) } var stdout bytes.Buffer @@ -119,9 +119,9 @@ func TestRunClean_PreviewMode(t *testing.T) { t.Errorf("Expected 'entire/def5678' in output, got: %s", output) } - // Should NOT list entire/sessions - if strings.Contains(output, "entire/sessions") { - t.Errorf("Should not list 'entire/sessions', got: %s", output) + // Should NOT list entire/sessions/v1 + if strings.Contains(output, paths.MetadataBranchName) { + t.Errorf("Should not list '%s', got: %s", paths.MetadataBranchName, output) } // Should prompt to use --force @@ -181,7 +181,7 @@ func TestRunClean_SessionsBranchPreserved(t *testing.T) { t.Fatalf("failed to create shadow branch: %v", err) } - sessionsRef := plumbing.NewHashReference(plumbing.NewBranchReferenceName("entire/sessions"), commitHash) + sessionsRef := plumbing.NewHashReference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), commitHash) if err := repo.Storer.SetReference(sessionsRef); err != nil { t.Fatalf("failed to create entire/sessions: %v", err) } @@ -199,7 +199,7 @@ func TestRunClean_SessionsBranchPreserved(t *testing.T) { } // Sessions branch should still exist - sessionsRefName := plumbing.NewBranchReferenceName("entire/sessions") + sessionsRefName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) if _, err := repo.Reference(sessionsRefName, true); err != nil { t.Error("entire/sessions branch should be preserved") } diff --git a/cmd/entire/cli/integration_test/attribution_test.go b/cmd/entire/cli/integration_test/attribution_test.go index cb5dd0f56..23521d97e 100644 --- a/cmd/entire/cli/integration_test/attribution_test.go +++ b/cmd/entire/cli/integration_test/attribution_test.go @@ -8,6 +8,7 @@ import ( "entire.io/cli/cmd/entire/cli/checkpoint" "entire.io/cli/cmd/entire/cli/checkpoint/id" + "entire.io/cli/cmd/entire/cli/paths" "entire.io/cli/cmd/entire/cli/strategy" "entire.io/cli/cmd/entire/cli/trailers" "github.com/go-git/go-git/v5" @@ -135,7 +136,7 @@ func TestManualCommit_Attribution(t *testing.T) { t.Log("Verifying attribution in metadata") // Read metadata from entire/sessions branch - sessionsRef, err := repo.Reference(plumbing.NewBranchReferenceName("entire/sessions"), true) + sessionsRef, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) if err != nil { t.Fatalf("Failed to get entire/sessions branch: %v", err) } @@ -277,7 +278,7 @@ func TestManualCommit_AttributionDeletionOnly(t *testing.T) { // ======================================== t.Log("Verifying attribution for deletion-only commit") - sessionsRef, err := repo.Reference(plumbing.NewBranchReferenceName("entire/sessions"), true) + sessionsRef, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) if err != nil { t.Fatalf("Failed to get entire/sessions branch: %v", err) } @@ -518,7 +519,7 @@ func TestManualCommit_AttributionNoDoubleCount(t *testing.T) { func getAttributionFromMetadata(t *testing.T, repo *git.Repository, checkpointID id.CheckpointID) *checkpoint.InitialAttribution { t.Helper() - sessionsRef, err := repo.Reference(plumbing.NewBranchReferenceName("entire/sessions"), true) + sessionsRef, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) if err != nil { t.Fatalf("Failed to get entire/sessions branch: %v", err) } diff --git a/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go b/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go index 5eec4c7f6..a6afea686 100644 --- a/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go +++ b/cmd/entire/cli/integration_test/auto_commit_checkpoint_fix_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "entire.io/cli/cmd/entire/cli/paths" "entire.io/cli/cmd/entire/cli/sessionid" "entire.io/cli/cmd/entire/cli/strategy" ) @@ -160,7 +161,7 @@ func TestDualStrategy_IncrementalPromptContent(t *testing.T) { t.Logf("First checkpoint: %s (commit %s)", checkpoint1ID, commit1Hash[:7]) // Verify first checkpoint has prompt A (session files in numbered subdirectory) - prompt1Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint1ID, "prompt.txt")) + prompt1Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, SessionFilePath(checkpoint1ID, "prompt.txt")) if !found { t.Fatal("First checkpoint should have prompt.txt on entire/sessions branch") } @@ -208,7 +209,7 @@ func TestDualStrategy_IncrementalPromptContent(t *testing.T) { t.Log("Phase 3: Verify second checkpoint only has prompt B (incremental)") // Session files are now in numbered subdirectory (e.g., 0/prompt.txt) - prompt2Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint2ID, "prompt.txt")) + prompt2Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, SessionFilePath(checkpoint2ID, "prompt.txt")) if !found { t.Fatal("Second checkpoint should have prompt.txt on entire/sessions branch") } diff --git a/cmd/entire/cli/integration_test/manual_commit_workflow_test.go b/cmd/entire/cli/integration_test/manual_commit_workflow_test.go index f0e549a2c..debc33136 100644 --- a/cmd/entire/cli/integration_test/manual_commit_workflow_test.go +++ b/cmd/entire/cli/integration_test/manual_commit_workflow_test.go @@ -244,14 +244,14 @@ func TestShadow_FullWorkflow(t *testing.T) { t.Logf("Checkpoint 1 ID: %s", checkpoint1ID) // Verify entire/sessions branch exists with checkpoint folder - if !env.BranchExists("entire/sessions") { + if !env.BranchExists(paths.MetadataBranchName) { t.Error("entire/sessions branch should exist after condensation") } // Verify checkpoint folder contents (check via git show) // Uses sharded path: //metadata.json checkpointPath := ShardedCheckpointPath(checkpoint1ID) + "/metadata.json" - if !env.FileExistsInBranch("entire/sessions", checkpointPath) { + if !env.FileExistsInBranch(paths.MetadataBranchName, checkpointPath) { t.Errorf("Checkpoint folder should contain metadata.json at %s", checkpointPath) } @@ -328,7 +328,7 @@ func TestShadow_FullWorkflow(t *testing.T) { // Verify second checkpoint folder exists (uses sharded path) checkpoint2Path := ShardedCheckpointPath(checkpoint2ID) + "/metadata.json" - if !env.FileExistsInBranch("entire/sessions", checkpoint2Path) { + if !env.FileExistsInBranch(paths.MetadataBranchName, checkpoint2Path) { t.Errorf("Second checkpoint folder should exist at %s", checkpoint2Path) } @@ -663,19 +663,19 @@ func TestShadow_TranscriptCondensation(t *testing.T) { t.Logf("Checkpoint ID: %s", checkpointID) // Verify entire/sessions branch exists - if !env.BranchExists("entire/sessions") { + if !env.BranchExists(paths.MetadataBranchName) { t.Fatal("entire/sessions branch should exist after condensation") } // Verify root metadata.json (CheckpointSummary) exists summaryPath := CheckpointSummaryPath(checkpointID) - if !env.FileExistsInBranch("entire/sessions", summaryPath) { + if !env.FileExistsInBranch(paths.MetadataBranchName, summaryPath) { t.Errorf("root metadata.json should exist at %s", summaryPath) } // Verify transcript file exists in session subdirectory (new format: 0/full.jsonl) transcriptPath := SessionFilePath(checkpointID, paths.TranscriptFileName) - if !env.FileExistsInBranch("entire/sessions", transcriptPath) { + if !env.FileExistsInBranch(paths.MetadataBranchName, transcriptPath) { t.Errorf("Transcript (%s) should exist at %s", paths.TranscriptFileName, transcriptPath) } else { t.Log("✓ Transcript file exists in checkpoint") @@ -683,12 +683,12 @@ func TestShadow_TranscriptCondensation(t *testing.T) { // Verify content_hash.txt exists in session subdirectory hashPath := SessionFilePath(checkpointID, "content_hash.txt") - if !env.FileExistsInBranch("entire/sessions", hashPath) { + if !env.FileExistsInBranch(paths.MetadataBranchName, hashPath) { t.Errorf("content_hash.txt should exist at %s", hashPath) } // Verify root metadata.json can be read and parsed as CheckpointSummary - summaryContent, found := env.ReadFileFromBranch("entire/sessions", summaryPath) + summaryContent, found := env.ReadFileFromBranch(paths.MetadataBranchName, summaryPath) if !found { t.Fatal("root metadata.json should be readable") } @@ -706,7 +706,7 @@ func TestShadow_TranscriptCondensation(t *testing.T) { // Verify agent field is in session-level metadata (not root summary) sessionMetadataPath := SessionFilePath(checkpointID, paths.MetadataFileName) - sessionMetadataContent, found := env.ReadFileFromBranch("entire/sessions", sessionMetadataPath) + sessionMetadataContent, found := env.ReadFileFromBranch(paths.MetadataBranchName, sessionMetadataPath) if !found { t.Fatal("session metadata.json should be readable") } @@ -791,7 +791,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { // Verify first checkpoint has both prompts (uses session file path in numbered subdirectory) promptPath1 := SessionFilePath(checkpoint1ID, "prompt.txt") - prompt1Content, found := env.ReadFileFromBranch("entire/sessions", promptPath1) + prompt1Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, promptPath1) if !found { t.Errorf("prompt.txt should exist at %s", promptPath1) } else { @@ -806,7 +806,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { } contextPath1 := SessionFilePath(checkpoint1ID, "context.md") - context1Content, found := env.ReadFileFromBranch("entire/sessions", contextPath1) + context1Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, contextPath1) if !found { t.Errorf("context.md should exist at %s", contextPath1) } else { @@ -862,7 +862,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { // Verify second checkpoint has the FULL transcript (all three prompts) // Session files are now in numbered subdirectories (e.g., 0/prompt.txt) promptPath2 := SessionFilePath(checkpoint2ID, "prompt.txt") - prompt2Content, found := env.ReadFileFromBranch("entire/sessions", promptPath2) + prompt2Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, promptPath2) if !found { t.Errorf("prompt.txt should exist at %s", promptPath2) } else { @@ -881,7 +881,7 @@ func TestShadow_FullTranscriptContext(t *testing.T) { } contextPath2 := SessionFilePath(checkpoint2ID, "context.md") - context2Content, found := env.ReadFileFromBranch("entire/sessions", contextPath2) + context2Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, contextPath2) if !found { t.Errorf("context.md should exist at %s", contextPath2) } else { @@ -1010,7 +1010,7 @@ func TestShadow_RewindAndCondensation(t *testing.T) { // Check prompt.txt (uses session file path in numbered subdirectory) promptPath := SessionFilePath(checkpointID, "prompt.txt") - promptContent, found := env.ReadFileFromBranch("entire/sessions", promptPath) + promptContent, found := env.ReadFileFromBranch(paths.MetadataBranchName, promptPath) if !found { t.Errorf("prompt.txt should exist at %s", promptPath) } else { @@ -1029,7 +1029,7 @@ func TestShadow_RewindAndCondensation(t *testing.T) { // Check context.md contextPath := SessionFilePath(checkpointID, "context.md") - contextContent, found := env.ReadFileFromBranch("entire/sessions", contextPath) + contextContent, found := env.ReadFileFromBranch(paths.MetadataBranchName, contextPath) if !found { t.Errorf("context.md should exist at %s", contextPath) } else { @@ -1288,7 +1288,7 @@ func TestShadow_IntermediateCommitsWithoutPrompts(t *testing.T) { for _, cpID := range []string{checkpoint1ID, checkpoint3ID} { shardedPath := ShardedCheckpointPath(cpID) metadataPath := shardedPath + "/metadata.json" - if !env.FileExistsInBranch("entire/sessions", metadataPath) { + if !env.FileExistsInBranch(paths.MetadataBranchName, metadataPath) { t.Errorf("Checkpoint %s should have metadata.json at %s", cpID, metadataPath) } } @@ -1356,7 +1356,7 @@ func TestShadow_FullTranscriptCondensationWithIntermediateCommits(t *testing.T) t.Logf("First commit: %s, checkpoint: %s", commit1Hash[:7], checkpoint1ID) // Verify first checkpoint has prompts A and B (session files in numbered subdirectory) - prompt1Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint1ID, "prompt.txt")) + prompt1Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, SessionFilePath(checkpoint1ID, "prompt.txt")) if !found { t.Fatal("First checkpoint should have prompt.txt") } @@ -1400,7 +1400,7 @@ func TestShadow_FullTranscriptCondensationWithIntermediateCommits(t *testing.T) t.Log("Phase 5: Verify second checkpoint has full transcript (A, B, and C)") // Session files are now in numbered subdirectory (e.g., 0/prompt.txt) - prompt2Content, found := env.ReadFileFromBranch("entire/sessions", SessionFilePath(checkpoint2ID, "prompt.txt")) + prompt2Content, found := env.ReadFileFromBranch(paths.MetadataBranchName, SessionFilePath(checkpoint2ID, "prompt.txt")) if !found { t.Fatal("Second checkpoint should have prompt.txt") } @@ -1663,14 +1663,14 @@ func TestShadow_TrailerRemovalSkipsCondensation(t *testing.T) { } // Verify condensation happened for second commit - if !env.BranchExists("entire/sessions") { + if !env.BranchExists(paths.MetadataBranchName) { t.Fatal("entire/sessions branch should exist after second commit with trailer") } // Verify checkpoint exists shardedPath := ShardedCheckpointPath(checkpointID) metadataPath := shardedPath + "/metadata.json" - if !env.FileExistsInBranch("entire/sessions", metadataPath) { + if !env.FileExistsInBranch(paths.MetadataBranchName, metadataPath) { t.Errorf("Checkpoint should exist at %s", metadataPath) } else { t.Log("✓ Condensation happened for commit with trailer") @@ -1711,7 +1711,7 @@ func TestShadow_SessionsBranchCommitTrailers(t *testing.T) { env.GitCommitWithShadowHooks("Add main.go", "main.go") // Get the commit message on entire/sessions branch - sessionsCommitMsg := env.GetLatestCommitMessageOnBranch("entire/sessions") + sessionsCommitMsg := env.GetLatestCommitMessageOnBranch(paths.MetadataBranchName) t.Logf("entire/sessions commit message:\n%s", sessionsCommitMsg) // Verify required trailers are present diff --git a/cmd/entire/cli/paths/paths.go b/cmd/entire/cli/paths/paths.go index f3fe528c8..bed587819 100644 --- a/cmd/entire/cli/paths/paths.go +++ b/cmd/entire/cli/paths/paths.go @@ -35,7 +35,7 @@ const ( ) // MetadataBranchName is the orphan branch used by auto-commit and manual-commit strategies to store metadata -const MetadataBranchName = "entire/sessions" +const MetadataBranchName = "entire/sessions/v1" // CheckpointPath returns the sharded storage path for a checkpoint ID. // Uses first 2 characters as shard (256 buckets), remaining as folder name. diff --git a/cmd/entire/cli/strategy/clean_test.go b/cmd/entire/cli/strategy/clean_test.go index 62b1bb994..8ab71da2c 100644 --- a/cmd/entire/cli/strategy/clean_test.go +++ b/cmd/entire/cli/strategy/clean_test.go @@ -5,6 +5,7 @@ import ( "time" "entire.io/cli/cmd/entire/cli/checkpoint" + "entire.io/cli/cmd/entire/cli/paths" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -33,7 +34,7 @@ func TestIsShadowBranch(t *testing.T) { {"too short commit (6 chars)", "entire/abc123", false}, {"too short commit (1 char)", "entire/a", false}, {"non-hex chars in commit", "entire/ghijklm", false}, - {"sessions branch", "entire/sessions", false}, + {"sessions branch", paths.MetadataBranchName, false}, {"no prefix", "abc1234", false}, {"wrong prefix", "feature/abc1234", false}, {"main branch", "main", false}, @@ -91,7 +92,7 @@ func TestListShadowBranches(t *testing.T) { }{ {"entire/abc1234", true}, {"entire/def5678", true}, - {"entire/sessions", false}, // Should NOT be listed + {paths.MetadataBranchName, false}, // Should NOT be listed {"feature/foo", false}, {"main", false}, } @@ -126,8 +127,8 @@ func TestListShadowBranches(t *testing.T) { if !shadowSet["entire/def5678"] { t.Error("ListShadowBranches() missing 'entire/def5678'") } - if shadowSet["entire/sessions"] { - t.Error("ListShadowBranches() should not include 'entire/sessions'") + if shadowSet[paths.MetadataBranchName] { + t.Errorf("ListShadowBranches() should not include '%s'", paths.MetadataBranchName) } } diff --git a/cmd/entire/cli/strategy/cleanup.go b/cmd/entire/cli/strategy/cleanup.go index ce81ab0b6..bf4dcc9a9 100644 --- a/cmd/entire/cli/strategy/cleanup.go +++ b/cmd/entire/cli/strategy/cleanup.go @@ -62,10 +62,10 @@ var shadowBranchPattern = regexp.MustCompile(`^entire/[0-9a-fA-F]{7,}(-[0-9a-fA- // IsShadowBranch returns true if the branch name matches the shadow branch pattern. // Shadow branches have the format "entire/-" where the // commit hash is at least 7 hex characters and worktree hash is 6 hex characters. -// The "entire/sessions" branch is NOT a shadow branch. +// The "entire/sessions/v1" branch is NOT a shadow branch. func IsShadowBranch(branchName string) bool { // Explicitly exclude entire/sessions - if branchName == "entire/sessions" { + if branchName == paths.MetadataBranchName { return false } return shadowBranchPattern.MatchString(branchName) @@ -73,7 +73,7 @@ func IsShadowBranch(branchName string) bool { // ListShadowBranches returns all shadow branches in the repository. // Shadow branches match the pattern "entire/" (7+ hex chars). -// The "entire/sessions" branch is excluded as it stores permanent metadata. +// The "entire/sessions/v1" branch is excluded as it stores permanent metadata. // Returns an empty slice (not nil) if no shadow branches exist. func ListShadowBranches() ([]string, error) { repo, err := OpenRepository() diff --git a/scripts/test-attribution-e2e.sh b/scripts/test-attribution-e2e.sh index ff0ac9a3c..c17ed69f9 100755 --- a/scripts/test-attribution-e2e.sh +++ b/scripts/test-attribution-e2e.sh @@ -190,18 +190,18 @@ if [[ -n "$CHECKPOINT_ID" ]]; then METADATA_PATH="${SHARD_PREFIX}/${SHARD_SUFFIX}/metadata.json" echo "" - echo -e "${BLUE}=== Step 12: Inspect metadata on entire/sessions branch ===${NC}" + echo -e "${BLUE}=== Step 12: Inspect metadata on entire/sessions/v1 branch ===${NC}" echo "Looking for metadata at: $METADATA_PATH" - # Read metadata.json from entire/sessions branch - if git show "entire/sessions:${METADATA_PATH}" > /dev/null 2>&1; then + # Read metadata.json from entire/sessions/v1 branch + if git show "entire/sessions/v1:${METADATA_PATH}" > /dev/null 2>&1; then echo -e "${GREEN}Found metadata.json:${NC}" - git show "entire/sessions:${METADATA_PATH}" | jq . + git show "entire/sessions/v1:${METADATA_PATH}" | jq . # Extract and display attribution specifically echo "" echo -e "${BLUE}=== Step 13: Attribution Analysis ===${NC}" - ATTRIBUTION=$(git show "entire/sessions:${METADATA_PATH}" | jq -r '.initial_attribution // empty') + ATTRIBUTION=$(git show "entire/sessions/v1:${METADATA_PATH}" | jq -r '.initial_attribution // empty') if [[ -n "$ATTRIBUTION" && "$ATTRIBUTION" != "null" ]]; then echo -e "${GREEN}Attribution data:${NC}" echo "$ATTRIBUTION" | jq . @@ -236,19 +236,19 @@ if [[ -n "$CHECKPOINT_ID" ]]; then # Also show files_touched echo "" echo -e "${BLUE}Files touched (agent-modified):${NC}" - git show "entire/sessions:${METADATA_PATH}" | jq -r '.files_touched[]?' 2>/dev/null || echo "(none)" + git show "entire/sessions/v1:${METADATA_PATH}" | jq -r '.files_touched[]?' 2>/dev/null || echo "(none)" # Show prompt attributions from session state if available echo "" echo -e "${BLUE}=== Step 14: Check prompt attributions ===${NC}" # List all files in the checkpoint directory echo "Files in checkpoint directory:" - git ls-tree -r --name-only "entire/sessions" | grep "^${SHARD_PREFIX}/${SHARD_SUFFIX}/" | head -20 + git ls-tree -r --name-only "entire/sessions/v1" | grep "^${SHARD_PREFIX}/${SHARD_SUFFIX}/" | head -20 else echo -e "${RED}Could not find metadata at $METADATA_PATH${NC}" - echo "Checking what's on entire/sessions branch:" - git ls-tree -r --name-only "entire/sessions" 2>/dev/null | head -20 || echo "(branch may not exist)" + echo "Checking what's on entire/sessions/v1 branch:" + git ls-tree -r --name-only "entire/sessions/v1" 2>/dev/null | head -20 || echo "(branch may not exist)" fi else echo -e "${YELLOW}No Entire-Checkpoint trailer found (user may have removed it)${NC}" @@ -270,7 +270,7 @@ echo " 2. User created utils.py (non-agent file)" echo " 3. Agent modified utils.py (now agent-touched)" echo " 4. User edited main.py (agent-touched file)" echo " 5. Commit with attribution tracking" -echo " 6. Metadata inspection on entire/sessions branch" +echo " 6. Metadata inspection on entire/sessions/v1 branch" echo "" echo "Expected attribution behavior:" echo " - main.py: agent added lines, user added 2 lines after" @@ -282,6 +282,6 @@ if [[ "$KEEP_REPO" == "true" ]]; then echo "" echo "Useful inspection commands:" echo " cd $TEST_DIR" - echo " git log entire/sessions --oneline" - echo " git show entire/sessions:/metadata.json | jq ." + echo " git log entire/sessions/v1 --oneline" + echo " git show entire/sessions/v1:/metadata.json | jq ." fi From bde942df69f0125a2153b67606e53e53d889f8d4 Mon Sep 17 00:00:00 2001 From: Victor Gutierrez Calderon Date: Thu, 5 Feb 2026 13:54:21 +1100 Subject: [PATCH 12/18] miss this one Entire-Checkpoint: eb03327f76c6 --- cmd/entire/cli/integration_test/mid_session_commit_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/entire/cli/integration_test/mid_session_commit_test.go b/cmd/entire/cli/integration_test/mid_session_commit_test.go index e9e58d560..785dd54c4 100644 --- a/cmd/entire/cli/integration_test/mid_session_commit_test.go +++ b/cmd/entire/cli/integration_test/mid_session_commit_test.go @@ -71,7 +71,7 @@ func TestShadowStrategy_MidSessionCommit_FromTranscript(t *testing.T) { shadowBranches := env.ListBranchesWithPrefix("entire/") hasShadowBranch := false for _, b := range shadowBranches { - if b != paths.MetadataBranchName && b != "entire/sessions" { + if b != paths.MetadataBranchName { hasShadowBranch = true break } From 4e02ba8a1de7be095bafa7115148ad217ddecd9e Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 13:57:06 +1100 Subject: [PATCH 13/18] add idempotency support to migration script - Add checkpoint_exists_on_target() to detect already-migrated checkpoints - Skip checkpoints that already exist on target branch with valid v1 format - Handle existing target branch gracefully (use it instead of failing) - Update dry-run output to show migration status per checkpoint - Document idempotency behavior in script header Co-Authored-By: Claude Opus 4.5 Entire-Checkpoint: 8cb061b3e545 --- scripts/migrate-sessions.sh | 68 +++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 0f80f094c..1a66d04a5 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -26,6 +26,9 @@ set -e # By default, runs in dry-run mode showing what would be migrated. # Use --apply to actually perform the migration. # +# The script is idempotent - checkpoints already migrated to v1 are skipped. +# This allows running migration incrementally as new checkpoints are added. +# # OLD FORMAT: # // # ├── metadata.json # Session metadata (has session_id) @@ -169,8 +172,31 @@ checkpoint_to_path() { echo "${id:0:2}/${id:2}" } +# Check if a checkpoint already exists on target branch and is in v1 format +# Returns 0 if exists and valid, 1 otherwise +checkpoint_exists_on_target() { + local checkpoint_path="$1" + + if ! git show-ref --verify --quiet "refs/heads/$TARGET_BRANCH"; then + return 1 + fi + + # Check if metadata.json exists on target + if ! git show "$TARGET_BRANCH:$checkpoint_path/metadata.json" &>/dev/null; then + return 1 + fi + + # Check if it has sessions array (v1 format indicator) + if git show "$TARGET_BRANCH:$checkpoint_path/metadata.json" | jq -e '.sessions' &>/dev/null; then + return 0 + fi + + return 1 +} + # Migrate a single checkpoint directory # Args: $1 = checkpoint path (e.g., "a1/b2c3d4e5f6"), $2 = source dir, $3 = target dir +# Returns: 0 if migrated, 1 if skipped migrate_checkpoint() { local CHECKPOINT_DIR="$1" local SOURCE_DIR="$2" @@ -179,7 +205,13 @@ migrate_checkpoint() { if [[ ! -f "$CHECKPOINT_PATH/metadata.json" ]]; then echo " Skipping: no metadata.json" - return + return 1 + fi + + # Check if already migrated to target branch + if checkpoint_exists_on_target "$CHECKPOINT_DIR"; then + echo " Skipping: already exists on $TARGET_BRANCH" + return 1 fi local ROOT_META="$CHECKPOINT_PATH/metadata.json" @@ -192,6 +224,7 @@ migrate_checkpoint() { # Already aggregated format - copy but still transform session metadata migrate_new_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" fi + return 0 } # Migrate checkpoint from old format (session files at root) @@ -391,6 +424,13 @@ if [[ -n "$CHECKPOINT_FILTER" ]]; then exit 1 fi + # Check if already migrated + if checkpoint_exists_on_target "$CHECKPOINT_PATH"; then + git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" + echo -e " ${YELLOW}Already migrated to $TARGET_BRANCH - skipping${NC}" + exit 0 + fi + # Show checkpoint info if jq -e '.session_id' "$TEMP_DIR/$CHECKPOINT_PATH/metadata.json" > /dev/null 2>&1; then echo " Format: old (session files at root) -> needs migration" @@ -463,10 +503,12 @@ if [[ "$DRY_RUN" == "true" ]]; then for CHECKPOINT_PATH in $CHECKPOINT_DIRS; do CHECKPOINT_DIR="${CHECKPOINT_PATH#./}" if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then - if jq -e '.session_id' "$CHECKPOINT_PATH/metadata.json" > /dev/null 2>&1; then - echo " $CHECKPOINT_DIR (old format)" + if checkpoint_exists_on_target "$CHECKPOINT_DIR"; then + echo -e " $CHECKPOINT_DIR ${GREEN}(already migrated)${NC}" + elif jq -e '.session_id' "$CHECKPOINT_PATH/metadata.json" > /dev/null 2>&1; then + echo " $CHECKPOINT_DIR (old format -> will migrate)" else - echo " $CHECKPOINT_DIR (new format)" + echo " $CHECKPOINT_DIR (new format -> will migrate)" fi fi done @@ -479,13 +521,17 @@ if [[ "$DRY_RUN" == "true" ]]; then exit 0 fi -# Create orphan target branch from init commit -echo -e "${GREEN}Creating target branch $TARGET_BRANCH...${NC}" -git checkout "$SOURCE_BRANCH" -git checkout "$INIT_COMMIT" -git checkout --orphan "$TARGET_BRANCH" -git commit --allow-empty -m "Initialize metadata branch (v1)" -git checkout "$SOURCE_BRANCH" +# Create orphan target branch if it doesn't exist +if git show-ref --verify --quiet "refs/heads/$TARGET_BRANCH"; then + echo -e "${YELLOW}Target branch $TARGET_BRANCH already exists - will skip existing checkpoints${NC}" +else + echo -e "${GREEN}Creating target branch $TARGET_BRANCH...${NC}" + git checkout "$SOURCE_BRANCH" + git checkout "$INIT_COMMIT" + git checkout --orphan "$TARGET_BRANCH" + git commit --allow-empty -m "Initialize metadata branch (v1)" + git checkout "$SOURCE_BRANCH" +fi # Process each commit for COMMIT in $COMMITS; do From dd88384c11b834d2716e7caec20e90df9699563b Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 14:06:09 +1100 Subject: [PATCH 14/18] fix: branch path with conflict if we keep sessions/ --- scripts/migrate-sessions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 1a66d04a5..4b2d8abf0 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -90,7 +90,7 @@ GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color -SOURCE_BRANCH="entire/sessions" +SOURCE_BRANCH="entire/sessions-legacy" TARGET_BRANCH="entire/sessions/v1" # Parse arguments From 449250d17a44affaf44a30ac426f8e4683e4b3bf Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 14:17:33 +1100 Subject: [PATCH 15/18] fix: check for already migrated checkpoints was broken --- scripts/migrate-sessions.sh | 48 +++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 4b2d8abf0..2cb3d30e3 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -52,6 +52,7 @@ set -e # - jq (JSON processor) must be installed # - Clean working tree (no uncommitted changes) # - The entire/sessions branch must exist +# - DISABLE ALL YOUR ENTIRE GIT HOOKS FIRST (e.g., pre-commit, pre-push) to avoid issues during migration # # EXAMPLES: # # Preview what would be migrated (dry-run) @@ -208,12 +209,6 @@ migrate_checkpoint() { return 1 fi - # Check if already migrated to target branch - if checkpoint_exists_on_target "$CHECKPOINT_DIR"; then - echo " Skipping: already exists on $TARGET_BRANCH" - return 1 - fi - local ROOT_META="$CHECKPOINT_PATH/metadata.json" # Check if this is session metadata (has session_id) or already aggregated @@ -536,6 +531,29 @@ fi # Process each commit for COMMIT in $COMMITS; do COMMIT_MSG=$(git log -1 --format="%s" "$COMMIT") + + # Find checkpoint directories in this commit (without creating worktree) + CHECKPOINT_DIRS=$(git ls-tree -d --name-only -r "$COMMIT" | grep -E '^[0-9a-f]{2}/[0-9a-f]+$' || true) + + if [[ -z "$CHECKPOINT_DIRS" ]]; then + echo -e "${YELLOW}Skipping commit (no checkpoints): $COMMIT_MSG${NC}" + continue + fi + + # Check if any checkpoints need migration + NEEDS_MIGRATION=false + for CHECKPOINT_DIR in $CHECKPOINT_DIRS; do + if ! checkpoint_exists_on_target "$CHECKPOINT_DIR"; then + NEEDS_MIGRATION=true + break + fi + done + + if [[ "$NEEDS_MIGRATION" == "false" ]]; then + echo -e "${YELLOW}Skipping commit (all migrated): $COMMIT_MSG${NC}" + continue + fi + echo -e "${GREEN}Processing commit: $COMMIT_MSG${NC}" # Checkout source commit in temp worktree @@ -548,22 +566,16 @@ for COMMIT in $COMMITS; do # Track which checkpoint directories we process PROCESSED_DIRS="" - # Find all checkpoint directories (pattern: XX/YYYYYYYY/) - cd "$TEMP_DIR" - CHECKPOINT_DIRS=$(find . -maxdepth 2 -mindepth 2 -type d | grep -E '^\./[0-9a-f]{2}/[0-9a-f]+$' || true) - - for CHECKPOINT_PATH in $CHECKPOINT_DIRS; do - CHECKPOINT_DIR="${CHECKPOINT_PATH#./}" + # Process checkpoints + for CHECKPOINT_DIR in $CHECKPOINT_DIRS; do echo " Processing checkpoint: $CHECKPOINT_DIR" - # Track this directory for git add later - PROCESSED_DIRS="$PROCESSED_DIRS $CHECKPOINT_DIR" - - migrate_checkpoint "$CHECKPOINT_DIR" "$TEMP_DIR" "$OLDPWD" + if migrate_checkpoint "$CHECKPOINT_DIR" "$TEMP_DIR" "$(pwd)"; then + # Track this directory for git add later + PROCESSED_DIRS="$PROCESSED_DIRS $CHECKPOINT_DIR" + fi done - cd "$OLDPWD" - # Cleanup worktree git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" From 6cd2059a31ae4cfbd6be12a975e033814b1ba668 Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 14:33:02 +1100 Subject: [PATCH 16/18] track source commits and preserve authors in migration - Add migration_source_commit field to migrated checkpoint metadata - Change skip check from existence to source commit comparison - Re-migrate checkpoints when source commit differs (handles updates) - Preserve original commit author using git commit --author - Update dry-run output to show up-to-date status Co-Authored-By: Claude Opus 4.5 --- scripts/migrate-sessions.sh | 84 +++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 2cb3d30e3..4432d02df 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -173,10 +173,12 @@ checkpoint_to_path() { echo "${id:0:2}/${id:2}" } -# Check if a checkpoint already exists on target branch and is in v1 format -# Returns 0 if exists and valid, 1 otherwise -checkpoint_exists_on_target() { +# Check if a checkpoint is up-to-date on target branch (same source commit) +# Args: $1 = checkpoint path, $2 = source commit hash +# Returns 0 if exists and up-to-date, 1 otherwise +checkpoint_up_to_date_on_target() { local checkpoint_path="$1" + local source_commit="$2" if ! git show-ref --verify --quiet "refs/heads/$TARGET_BRANCH"; then return 1 @@ -187,8 +189,11 @@ checkpoint_exists_on_target() { return 1 fi - # Check if it has sessions array (v1 format indicator) - if git show "$TARGET_BRANCH:$checkpoint_path/metadata.json" | jq -e '.sessions' &>/dev/null; then + # Check if same source commit (up-to-date) + local target_source_commit + target_source_commit=$(git show "$TARGET_BRANCH:$checkpoint_path/metadata.json" 2>/dev/null | jq -r '.migration_source_commit // ""') + + if [[ -n "$target_source_commit" && "$target_source_commit" == "$source_commit" ]]; then return 0 fi @@ -196,12 +201,13 @@ checkpoint_exists_on_target() { } # Migrate a single checkpoint directory -# Args: $1 = checkpoint path (e.g., "a1/b2c3d4e5f6"), $2 = source dir, $3 = target dir +# Args: $1 = checkpoint path (e.g., "a1/b2c3d4e5f6"), $2 = source dir, $3 = target dir, $4 = source commit # Returns: 0 if migrated, 1 if skipped migrate_checkpoint() { local CHECKPOINT_DIR="$1" local SOURCE_DIR="$2" local TARGET_DIR="$3" + local SOURCE_COMMIT="$4" local CHECKPOINT_PATH="$SOURCE_DIR/$CHECKPOINT_DIR" if [[ ! -f "$CHECKPOINT_PATH/metadata.json" ]]; then @@ -214,10 +220,10 @@ migrate_checkpoint() { # Check if this is session metadata (has session_id) or already aggregated if jq -e '.session_id' "$ROOT_META" > /dev/null 2>&1; then # This is session metadata at root - needs migration - migrate_old_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" + migrate_old_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" "$SOURCE_COMMIT" else # Already aggregated format - copy but still transform session metadata - migrate_new_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" + migrate_new_format "$CHECKPOINT_DIR" "$CHECKPOINT_PATH" "$TARGET_DIR" "$SOURCE_COMMIT" fi return 0 } @@ -227,6 +233,7 @@ migrate_old_format() { local CHECKPOINT_DIR="$1" local CHECKPOINT_PATH="$2" local TARGET_DIR="$3" + local SOURCE_COMMIT="$4" local ROOT_META="$CHECKPOINT_PATH/metadata.json" # Find existing numbered subdirs @@ -336,6 +343,7 @@ migrate_old_format() { --arg checkpoint_id "$CHECKPOINT_ID" \ --arg strategy "$STRATEGY" \ --arg branch "$BRANCH" \ + --arg migration_source_commit "$SOURCE_COMMIT" \ --argjson checkpoints_count "$CHECKPOINTS_COUNT" \ --argjson files_touched "$FILES_TOUCHED" \ --argjson sessions "$SESSIONS_JSON" \ @@ -348,6 +356,7 @@ migrate_old_format() { checkpoint_id: $checkpoint_id, strategy: $strategy, branch: $branch, + migration_source_commit: $migration_source_commit, checkpoints_count: $checkpoints_count, files_touched: $files_touched, sessions: $sessions, @@ -368,12 +377,13 @@ migrate_new_format() { local CHECKPOINT_DIR="$1" local CHECKPOINT_PATH="$2" local TARGET_DIR="$3" + local SOURCE_COMMIT="$4" mkdir -p "$TARGET_DIR/$CHECKPOINT_DIR" - # Transform root metadata.json to have absolute paths in sessions array - jq --arg prefix "/$CHECKPOINT_DIR" \ - '.sessions = [.sessions[] | { + # Transform root metadata.json to have absolute paths in sessions array and add source commit + jq --arg prefix "/$CHECKPOINT_DIR" --arg source_commit "$SOURCE_COMMIT" \ + '.migration_source_commit = $source_commit | .sessions = [.sessions[] | { metadata: ($prefix + "/" + (.metadata | ltrimstr("/"))), transcript: ($prefix + "/" + (.transcript | ltrimstr("/"))), context: ($prefix + "/" + (.context | ltrimstr("/"))), @@ -409,9 +419,18 @@ if [[ -n "$CHECKPOINT_FILTER" ]]; then echo -e "${GREEN}Migrating single checkpoint: $CHECKPOINT_FILTER${NC}" echo " Path: $CHECKPOINT_PATH" + # Find the most recent commit that modified this checkpoint + SOURCE_COMMIT=$(git log -1 --format="%H" "$SOURCE_BRANCH" -- "$CHECKPOINT_PATH") + if [[ -z "$SOURCE_COMMIT" ]]; then + echo -e "${RED}Error: Checkpoint $CHECKPOINT_FILTER not found on $SOURCE_BRANCH${NC}" >&2 + exit 1 + fi + COMMIT_AUTHOR=$(git log -1 --format="%an <%ae>" "$SOURCE_COMMIT") + echo " Source commit: ${SOURCE_COMMIT:0:7} (by $COMMIT_AUTHOR)" + # Create temp dir and checkout source TEMP_DIR=$(mktemp -d) - git worktree add --detach "$TEMP_DIR" "$SOURCE_BRANCH" 2>/dev/null + git worktree add --detach "$TEMP_DIR" "$SOURCE_COMMIT" 2>/dev/null if [[ ! -d "$TEMP_DIR/$CHECKPOINT_PATH" ]]; then git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" @@ -419,10 +438,10 @@ if [[ -n "$CHECKPOINT_FILTER" ]]; then exit 1 fi - # Check if already migrated - if checkpoint_exists_on_target "$CHECKPOINT_PATH"; then + # Check if already up-to-date + if checkpoint_up_to_date_on_target "$CHECKPOINT_PATH" "$SOURCE_COMMIT"; then git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" - echo -e " ${YELLOW}Already migrated to $TARGET_BRANCH - skipping${NC}" + echo -e " ${YELLOW}Already up-to-date on $TARGET_BRANCH - skipping${NC}" exit 0 fi @@ -455,16 +474,16 @@ if [[ -n "$CHECKPOINT_FILTER" ]]; then git checkout "$TARGET_BRANCH" # Migrate the checkpoint - migrate_checkpoint "$CHECKPOINT_PATH" "$TEMP_DIR" "$(pwd)" + migrate_checkpoint "$CHECKPOINT_PATH" "$TEMP_DIR" "$(pwd)" "$SOURCE_COMMIT" # Cleanup git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" - # Commit + # Commit with original author git add "$CHECKPOINT_PATH" if ! git diff --cached --quiet; then - git commit -m "Migrate checkpoint: $CHECKPOINT_FILTER" - echo -e "${GREEN}Committed${NC}" + git commit --author="$COMMIT_AUTHOR" -m "Migrate checkpoint: $CHECKPOINT_FILTER" + echo -e "${GREEN}Committed (author: $COMMIT_AUTHOR)${NC}" else echo -e "${YELLOW}No changes${NC}" fi @@ -498,8 +517,10 @@ if [[ "$DRY_RUN" == "true" ]]; then for CHECKPOINT_PATH in $CHECKPOINT_DIRS; do CHECKPOINT_DIR="${CHECKPOINT_PATH#./}" if [[ -f "$CHECKPOINT_PATH/metadata.json" ]]; then - if checkpoint_exists_on_target "$CHECKPOINT_DIR"; then - echo -e " $CHECKPOINT_DIR ${GREEN}(already migrated)${NC}" + # Get the source commit for this checkpoint + SOURCE_COMMIT=$(git log -1 --format="%H" "$SOURCE_BRANCH" -- "$CHECKPOINT_DIR") + if checkpoint_up_to_date_on_target "$CHECKPOINT_DIR" "$SOURCE_COMMIT"; then + echo -e " $CHECKPOINT_DIR ${GREEN}(up-to-date)${NC}" elif jq -e '.session_id' "$CHECKPOINT_PATH/metadata.json" > /dev/null 2>&1; then echo " $CHECKPOINT_DIR (old format -> will migrate)" else @@ -540,10 +561,10 @@ for COMMIT in $COMMITS; do continue fi - # Check if any checkpoints need migration + # Check if any checkpoints need migration (compare source commits) NEEDS_MIGRATION=false for CHECKPOINT_DIR in $CHECKPOINT_DIRS; do - if ! checkpoint_exists_on_target "$CHECKPOINT_DIR"; then + if ! checkpoint_up_to_date_on_target "$CHECKPOINT_DIR" "$COMMIT"; then NEEDS_MIGRATION=true break fi @@ -556,6 +577,9 @@ for COMMIT in $COMMITS; do echo -e "${GREEN}Processing commit: $COMMIT_MSG${NC}" + # Get original author for preserving authorship + COMMIT_AUTHOR=$(git log -1 --format="%an <%ae>" "$COMMIT") + # Checkout source commit in temp worktree TEMP_DIR=$(mktemp -d) git worktree add --detach "$TEMP_DIR" "$COMMIT" 2>/dev/null @@ -568,9 +592,15 @@ for COMMIT in $COMMITS; do # Process checkpoints for CHECKPOINT_DIR in $CHECKPOINT_DIRS; do + # Skip if already up-to-date on target + if checkpoint_up_to_date_on_target "$CHECKPOINT_DIR" "$COMMIT"; then + echo " Skipping checkpoint (up-to-date): $CHECKPOINT_DIR" + continue + fi + echo " Processing checkpoint: $CHECKPOINT_DIR" - if migrate_checkpoint "$CHECKPOINT_DIR" "$TEMP_DIR" "$(pwd)"; then + if migrate_checkpoint "$CHECKPOINT_DIR" "$TEMP_DIR" "$(pwd)" "$COMMIT"; then # Track this directory for git add later PROCESSED_DIRS="$PROCESSED_DIRS $CHECKPOINT_DIR" fi @@ -584,10 +614,10 @@ for COMMIT in $COMMITS; do git add "$DIR" done - # Commit changes + # Commit changes with original author if ! git diff --cached --quiet; then - git commit -m "$COMMIT_MSG" - echo -e " ${GREEN}Committed${NC}" + git commit --author="$COMMIT_AUTHOR" -m "$COMMIT_MSG" + echo -e " ${GREEN}Committed (author: $COMMIT_AUTHOR)${NC}" else echo -e " ${YELLOW}No changes${NC}" fi From 69d7674f3a08fce64a0a815f5f8fc428f26f8fb8 Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 14:35:37 +1100 Subject: [PATCH 17/18] add git retry with exponential backoff for index.lock races Adds git_retry() helper that retries git commands up to 5 times with exponential backoff (0.2s, 0.4s, 0.8s, 1.6s, 3.2s) to handle transient index.lock file race conditions during migration. Co-Authored-By: Claude Opus 4.5 --- scripts/migrate-sessions.sh | 38 ++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 4432d02df..7090dafd6 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -173,6 +173,30 @@ checkpoint_to_path() { echo "${id:0:2}/${id:2}" } +# Retry a git command with exponential backoff (handles index.lock race conditions) +# Args: $@ = command to run +git_retry() { + local max_attempts=5 + local attempt=1 + local wait_time=0.2 + + while [[ $attempt -le $max_attempts ]]; do + if "$@" 2>&1; then + return 0 + fi + + if [[ $attempt -lt $max_attempts ]]; then + echo " Retrying in ${wait_time}s (attempt $attempt/$max_attempts)..." >&2 + sleep "$wait_time" + wait_time=$(echo "$wait_time * 2" | bc) + fi + ((attempt++)) + done + + echo " Failed after $max_attempts attempts" >&2 + return 1 +} + # Check if a checkpoint is up-to-date on target branch (same source commit) # Args: $1 = checkpoint path, $2 = source commit hash # Returns 0 if exists and up-to-date, 1 otherwise @@ -479,10 +503,10 @@ if [[ -n "$CHECKPOINT_FILTER" ]]; then # Cleanup git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" - # Commit with original author - git add "$CHECKPOINT_PATH" + # Commit with original author (with retry for lock issues) + git_retry git add "$CHECKPOINT_PATH" if ! git diff --cached --quiet; then - git commit --author="$COMMIT_AUTHOR" -m "Migrate checkpoint: $CHECKPOINT_FILTER" + git_retry git commit --author="$COMMIT_AUTHOR" -m "Migrate checkpoint: $CHECKPOINT_FILTER" echo -e "${GREEN}Committed (author: $COMMIT_AUTHOR)${NC}" else echo -e "${YELLOW}No changes${NC}" @@ -609,14 +633,14 @@ for COMMIT in $COMMITS; do # Cleanup worktree git worktree remove "$TEMP_DIR" --force 2>/dev/null || rm -rf "$TEMP_DIR" - # Only add the specific checkpoint directories we processed + # Only add the specific checkpoint directories we processed (with retry for lock issues) for DIR in $PROCESSED_DIRS; do - git add "$DIR" + git_retry git add "$DIR" done - # Commit changes with original author + # Commit changes with original author (with retry for lock issues) if ! git diff --cached --quiet; then - git commit --author="$COMMIT_AUTHOR" -m "$COMMIT_MSG" + git_retry git commit --author="$COMMIT_AUTHOR" -m "$COMMIT_MSG" echo -e " ${GREEN}Committed (author: $COMMIT_AUTHOR)${NC}" else echo -e " ${YELLOW}No changes${NC}" From 1c77b2cf844989d28b913a3bac446dc8cc7b0efc Mon Sep 17 00:00:00 2001 From: Alex Ong Date: Thu, 5 Feb 2026 14:53:31 +1100 Subject: [PATCH 18/18] use git diff-tree to only process checkpoints modified per commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes from O(N×M) to O(M) complexity by using git diff-tree instead of git ls-tree. Now only processes checkpoints actually modified in each commit rather than scanning all checkpoints in the tree. Co-Authored-By: Claude Opus 4.5 --- scripts/migrate-sessions.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/migrate-sessions.sh b/scripts/migrate-sessions.sh index 7090dafd6..9e179fdc6 100755 --- a/scripts/migrate-sessions.sh +++ b/scripts/migrate-sessions.sh @@ -577,15 +577,15 @@ fi for COMMIT in $COMMITS; do COMMIT_MSG=$(git log -1 --format="%s" "$COMMIT") - # Find checkpoint directories in this commit (without creating worktree) - CHECKPOINT_DIRS=$(git ls-tree -d --name-only -r "$COMMIT" | grep -E '^[0-9a-f]{2}/[0-9a-f]+$' || true) + # Find checkpoint directories MODIFIED in this commit (not all checkpoints in tree) + CHECKPOINT_DIRS=$(git diff-tree --no-commit-id --name-only -r "$COMMIT" | grep -E '^[0-9a-f]{2}/[0-9a-f]+/' | cut -d'/' -f1-2 | sort -u || true) if [[ -z "$CHECKPOINT_DIRS" ]]; then - echo -e "${YELLOW}Skipping commit (no checkpoints): $COMMIT_MSG${NC}" + echo -e "${YELLOW}Skipping commit (no checkpoint changes): $COMMIT_MSG${NC}" continue fi - # Check if any checkpoints need migration (compare source commits) + # Check if any of the modified checkpoints need migration NEEDS_MIGRATION=false for CHECKPOINT_DIR in $CHECKPOINT_DIRS; do if ! checkpoint_up_to_date_on_target "$CHECKPOINT_DIR" "$COMMIT"; then @@ -595,7 +595,7 @@ for COMMIT in $COMMITS; do done if [[ "$NEEDS_MIGRATION" == "false" ]]; then - echo -e "${YELLOW}Skipping commit (all migrated): $COMMIT_MSG${NC}" + echo -e "${YELLOW}Skipping commit (all up-to-date): $COMMIT_MSG${NC}" continue fi