Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .claude/skills/test-repo/test-harness.sh
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ verify-metadata-branch)
echo "==> Verifying metadata branch..."
cd "$REPO_DIR"

if git branch -a | grep "entire/sessions"; then
if git branch -a | grep "entire/sessions/v1"; then
echo "✓ Metadata branch exists"
git show entire/sessions --stat | head -20
git show entire/sessions/v1 --stat | head -20
else
echo "✗ Metadata branch not found"
exit 1
Expand Down
32 changes: 17 additions & 15 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,19 +315,21 @@ All strategies implement:
**Both Strategies** - Metadata branch (`entire/sessions`) - sharded checkpoint format:
```
<checkpoint-id[:2]>/<checkpoint-id[2:]>/
├── metadata.json # Checkpoint info (see below)
├── full.jsonl # Current/latest session transcript
├── prompt.txt # User prompts
├── context.md # Generated context
├── content_hash.txt # SHA256 of transcript (shadow only)
├── tasks/<tool-use-id>/ # Task checkpoints (if applicable)
│ ├── checkpoint.json # UUID mapping
│ └── agent-<id>.jsonl # Subagent transcript
└── 1/ # Archived session (if multiple sessions)
├── metadata.json # Archived session metadata
├── full.jsonl # Archived session transcript
├── prompt.txt
└── ...
├── metadata.json # CheckpointSummary (aggregated stats)
├── 0/ # First session (0-based indexing)
│ ├── metadata.json # Session-specific metadata
│ ├── full.jsonl # Session transcript
│ ├── prompt.txt # User prompts
│ ├── context.md # Generated context
│ ├── content_hash.txt # SHA256 of transcript
│ └── tasks/<tool-use-id>/ # Task checkpoints (if applicable)
│ ├── checkpoint.json # UUID mapping
│ └── agent-<id>.jsonl # Subagent transcript
├── 1/ # Second session (if multiple sessions)
│ ├── metadata.json
│ ├── full.jsonl
│ └── ...
└── ...
```

**Multi-session metadata.json format:**
Expand All @@ -344,8 +346,8 @@ All strategies implement:
```

When multiple sessions are condensed to the same checkpoint (same base commit):
- Latest session files go at the root level
- Previous sessions are archived to numbered subfolders (`1/`, `2/`, etc.)
- Sessions are stored in numbered subfolders using 0-based indexing (`0/`, `1/`, `2/`, etc.)
- Latest session is always in the highest-numbered folder
- `session_ids` array tracks all sessions, `session_count` increments

**Session State** (filesystem, `.git/entire-sessions/`):
Expand Down
10 changes: 5 additions & 5 deletions cmd/entire/cli/checkpoint/backwards_compat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ func TestReadCommitted_MissingTokenUsage(t *testing.T) {
}

// Reading should succeed with nil TokenUsage
result, err := store.ReadCommitted(context.Background(), checkpointID)
summary, err := store.ReadCommitted(context.Background(), checkpointID)
if err != nil {
t.Fatalf("ReadCommitted() error = %v", err)
}

if result.Metadata.CheckpointID != checkpointID {
t.Errorf("CheckpointID = %v, want %v", result.Metadata.CheckpointID, checkpointID)
if summary.CheckpointID != checkpointID {
t.Errorf("CheckpointID = %v, want %v", summary.CheckpointID, checkpointID)
}

// TokenUsage should be nil for old checkpoints without token tracking
if result.Metadata.TokenUsage != nil {
t.Errorf("TokenUsage should be nil for metadata without token_usage field, got %+v", result.Metadata.TokenUsage)
if summary.TokenUsage != nil {
t.Errorf("TokenUsage should be nil for metadata without token_usage field, got %+v", summary.TokenUsage)
}
}
116 changes: 71 additions & 45 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,20 @@ type Store interface {
// Checkpoints are stored at sharded paths: <id[:2]>/<id[2:]>/
WriteCommitted(ctx context.Context, opts WriteCommittedOptions) error

// ReadCommitted reads a committed checkpoint by ID.
// ReadCommitted reads a committed checkpoint's summary by ID.
// Returns only the CheckpointSummary (paths + aggregated stats), not actual content.
// Use ReadSessionContent to read actual transcript/prompts/context.
// Returns nil, nil if the checkpoint does not exist.
ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*ReadCommittedResult, error)
ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*CheckpointSummary, error)

// ReadSessionContent reads the actual content for a specific session within a checkpoint.
// sessionIndex is 0-based (0 for first session, 1 for second, etc.).
// Returns the session's metadata, transcript, prompts, and context.
ReadSessionContent(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*SessionContent, error)

// ReadSessionContentByID reads a session's content by its session ID.
// Useful when you have the session ID but don't know its index within the checkpoint.
ReadSessionContentByID(ctx context.Context, checkpointID id.CheckpointID, sessionID string) (*SessionContent, error)

// ListCommitted lists all committed checkpoints.
ListCommitted(ctx context.Context) ([]CommittedInfo, error)
Expand Down Expand Up @@ -264,42 +275,6 @@ type WriteCommittedOptions struct {
Summary *Summary
}

// ReadCommittedResult contains the result of reading a committed checkpoint.
type ReadCommittedResult struct {
// Metadata contains the checkpoint metadata
Metadata CommittedMetadata

// Transcript is the session transcript content (most recent session)
Transcript []byte

// Prompts contains user prompts (most recent session)
Prompts string

// Context is the context.md content
Context string

// ArchivedSessions contains transcripts from previous sessions when multiple
// sessions were condensed to the same checkpoint. Ordered from oldest to newest
// (1/, 2/, etc.). The root-level Transcript is the most recent session.
ArchivedSessions []ArchivedSession
}

// ArchivedSession contains transcript data from a previous session
// that was archived when multiple sessions contributed to the same checkpoint.
type ArchivedSession struct {
// SessionID is the session identifier for this archived session
SessionID string

// Transcript is the session transcript content
Transcript []byte

// Prompts contains user prompts from this session
Prompts string

// FolderIndex is the archive folder number (1, 2, etc.)
FolderIndex int
}

// CommittedInfo contains summary information about a committed checkpoint.
type CommittedInfo struct {
// CheckpointID is the stable 12-hex-char identifier
Expand Down Expand Up @@ -331,6 +306,23 @@ type CommittedInfo struct {
SessionIDs []string // All session IDs that contributed
}

// SessionContent contains the actual content for a session.
// This is used when reading full session data (transcript, prompts, context)
// as opposed to just the metadata/summary.
type SessionContent struct {
// Metadata contains the session-specific metadata
Metadata CommittedMetadata

// Transcript is the session transcript content
Transcript []byte

// Prompts contains user prompts from this session
Prompts string

// Context is the context.md content
Context string
}

// CommittedMetadata contains the metadata stored in metadata.json for each checkpoint.
type CommittedMetadata struct {
CheckpointID id.CheckpointID `json:"checkpoint_id"`
Expand All @@ -342,13 +334,7 @@ type CommittedMetadata struct {
FilesTouched []string `json:"files_touched"`

// Agent identifies the agent that created this checkpoint (e.g., "Claude Code", "Cursor")
// For multi-session checkpoints, this is the first agent (see Agents for all)
Agent agent.AgentType `json:"agent,omitempty"`
Agents []agent.AgentType `json:"agents,omitempty"` // All agents that contributed (multi-session, deduplicated)

// Multi-session support: when multiple sessions contribute to the same checkpoint
SessionCount int `json:"session_count,omitempty"` // Number of sessions (1 if omitted for backwards compat)
SessionIDs []string `json:"session_ids,omitempty"` // All session IDs that contributed
Agent agent.AgentType `json:"agent,omitempty"`

// Task checkpoint fields (only populated for task checkpoints)
IsTask bool `json:"is_task,omitempty"`
Expand All @@ -368,6 +354,46 @@ type CommittedMetadata struct {
InitialAttribution *InitialAttribution `json:"initial_attribution,omitempty"`
}

// SessionFilePaths contains the absolute paths to session files from the git tree root.
// Paths include the full checkpoint path prefix (e.g., "/a1/b2c3d4e5f6/1/metadata.json").
// Used in CheckpointSummary.Sessions to map session IDs to their file locations.
type SessionFilePaths struct {
Metadata string `json:"metadata"`
Transcript string `json:"transcript"`
Context string `json:"context"`
ContentHash string `json:"content_hash"`
Prompt string `json:"prompt"`
}

// CheckpointSummary is the root-level metadata.json for a checkpoint.
// It contains aggregated statistics from all sessions and a map of session IDs
// to their file paths. Session-specific data (including initial_attribution)
// is stored in the session's subdirectory metadata.json.
//
// Structure on entire/sessions branch:
//
// <checkpoint-id[:2]>/<checkpoint-id[2:]>/
// ├── metadata.json # This CheckpointSummary
// ├── 1/ # First session
// │ ├── metadata.json # Session-specific CommittedMetadata
// │ ├── full.jsonl
// │ ├── prompt.txt
// │ ├── context.md
// │ └── content_hash.txt
// ├── 2/ # Second session
// └── 3/ # Third session...
//
//nolint:revive // Named CheckpointSummary to avoid conflict with existing Summary struct
type CheckpointSummary struct {
CheckpointID id.CheckpointID `json:"checkpoint_id"`
Strategy string `json:"strategy"`
Branch string `json:"branch,omitempty"`
CheckpointsCount int `json:"checkpoints_count"`
FilesTouched []string `json:"files_touched"`
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
}

// Summary contains AI-generated summary of a checkpoint.
type Summary struct {
Intent string `json:"intent"` // What user wanted to accomplish
Expand Down
Loading