Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
0cff956
🤖 feat: add best-of-n support for sub-agents
ammar-agent Mar 12, 2026
0056bbf
🤖 fix: handle partial best-of task creation
ammar-agent Mar 12, 2026
10e3086
🤖 fix: preserve grouped task results across interruptions
ammar-agent Mar 12, 2026
b35849c
🤖 fix: tighten best-of recovery test typing
ammar-agent Mar 12, 2026
6d2604e
🤖 fix: avoid duplicate best-of recovery reports
ammar-agent Mar 12, 2026
6e59dd1
🤖 fix: recover interrupted partial best-of spawns
ammar-agent Mar 12, 2026
fdcbcfe
🤖 fix: preserve partial best-of task results
ammar-agent Mar 12, 2026
c7b2dc0
🤖 refactor: simplify best-of task helpers
ammar-agent Mar 12, 2026
7c16b51
🤖 fix: recover best-of progress in parent task UI
ammar-agent Mar 12, 2026
8027ebe
🤖 fix: update recovered best-of task progress UI
ammar-agent Mar 12, 2026
15571fc
🤖 fix: recheck best-of child cleanup after parent streams end
ammar-agent Mar 12, 2026
88ea512
🤖 fix: cover best-of recovery edge cases
ammar-agent Mar 12, 2026
40d38f4
🤖 fix: allow best-of fallback after partial finalize failures
ammar-agent Mar 12, 2026
d5bd126
🤖 fix: tighten best-of UI recovery binding
ammar-agent Mar 12, 2026
3121a43
🤖 fix: format grouped task outputs in the CLI
ammar-agent Mar 12, 2026
f4addc0
🤖 fix: require task-created evidence for best-of recovery
ammar-agent Mar 12, 2026
c100226
🤖 fix: discriminate recovered best-of groups by start time
ammar-agent Mar 12, 2026
8a5f060
🤖 fix: keep grouped metadata for partial best-of spawns
ammar-agent Mar 12, 2026
b85f223
🤖 fix: retry deferred best-of fallback reports
ammar-agent Mar 12, 2026
fba0645
🤖 fix: handle interrupted best-of fallback edge cases
ammar-agent Mar 12, 2026
5864a8b
🤖 fix: avoid duplicate deferred best-of reports
ammar-agent Mar 12, 2026
f4e3f7a
🤖 fix: keep interrupted best-of tasks observable
ammar-agent Mar 12, 2026
3b7160c
🤖 fix: finalize and simplify best-of task recovery
ammar-agent Mar 13, 2026
8e313f2
🤖 fix: harden best-of restart and transcript recovery
ammar-agent Mar 13, 2026
9f36ceb
🤖 fix: serialize deferred best-of fallback delivery
ammar-agent Mar 13, 2026
77abb9e
🤖 fix: lock best-of report delivery per parent
ammar-agent Mar 13, 2026
6fccffc
🤖 fix: avoid best-of cleanup lock inversion
ammar-agent Mar 13, 2026
3a70f93
🤖 fix: gate best-of recovery to pending partials
ammar-agent Mar 13, 2026
7381b95
🤖 fix: target pending best-of recovery group
ammar-agent Mar 13, 2026
eaa19b0
🤖 fix: clarify and harden best-of delegation
ammar-agent Mar 14, 2026
4aaa9d8
🤖 tests: factor best-of task service helpers
ammar-agent Mar 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions docs/hooks/tools.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -525,15 +525,16 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
</details>

<details>
<summary>task (5)</summary>

| Env var | JSON path | Type | Description |
| ---------------------------------- | ------------------- | ------- | ----------- |
| `MUX_TOOL_INPUT_AGENT_ID` | `agentId` | string | — |
| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | — |
| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — |
| `MUX_TOOL_INPUT_SUBAGENT_TYPE` | `subagent_type` | string | — |
| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |
<summary>task (6)</summary>

| Env var | JSON path | Type | Description |
| ---------------------------------- | ------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `MUX_TOOL_INPUT_AGENT_ID` | `agentId` | string | — |
| `MUX_TOOL_INPUT_N` | `n` | number | Optional best-of count. Defaults to 1 when omitted. Usually leave unset unless the developer explicitly asks for best-of-n work. Only use this for sub-agents without interfering side effects, such as read-only agents like explore. |
| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | — |
| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — |
| `MUX_TOOL_INPUT_SUBAGENT_TYPE` | `subagent_type` | string | — |
| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |

</details>

Expand Down
96 changes: 96 additions & 0 deletions src/browser/components/ProjectSidebar/BestOfGroupListItem.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { ChevronRight, Layers3 } from "lucide-react";

import { cn } from "@/common/lib/utils";

interface BestOfGroupListItemProps {
groupId: string;
title: string;
depth: number;
totalCount: number;
visibleCount: number;
completedCount: number;
runningCount: number;
queuedCount: number;
interruptedCount: number;
isExpanded: boolean;
isSelected: boolean;
onToggle: () => void;
}

function getItemPaddingLeft(depth: number): number {
return 12 + Math.min(32, Math.max(0, depth)) * 12;
}

export function BestOfGroupListItem(props: BestOfGroupListItemProps) {
const paddingLeft = getItemPaddingLeft(props.depth);
const statusParts: string[] = [];
if (props.runningCount > 0) {
statusParts.push(`${props.runningCount} running`);
}
if (props.queuedCount > 0) {
statusParts.push(`${props.queuedCount} queued`);
}
if (props.completedCount > 0) {
statusParts.push(`${props.completedCount} completed`);
}
if (props.interruptedCount > 0) {
statusParts.push(`${props.interruptedCount} interrupted`);
}
if (props.visibleCount !== props.totalCount) {
statusParts.push(`${props.visibleCount}/${props.totalCount} visible`);
}

return (
<div
role="button"
tabIndex={0}
aria-expanded={props.isExpanded}
aria-label={`${props.isExpanded ? "Collapse" : "Expand"} best-of group ${props.title}`}
data-testid={`best-of-group-${props.groupId}`}
className={cn(
"bg-surface-primary relative flex items-start gap-1.5 rounded-l-sm py-2 pr-2 pl-1 select-none transition-all duration-150 hover:bg-surface-secondary",
props.isSelected && "bg-surface-secondary"
)}
style={{ paddingLeft }}
onClick={() => {
props.onToggle();
}}
onKeyDown={(event) => {
if (event.key === "Enter" || event.key === " ") {
event.preventDefault();
props.onToggle();
}
}}
>
<span
aria-hidden="true"
className="text-muted mt-0.5 inline-flex h-4 w-4 shrink-0 items-center justify-center"
>
<ChevronRight
className="h-3 w-3 transition-transform duration-150"
style={{ transform: props.isExpanded ? "rotate(90deg)" : "rotate(0deg)" }}
/>
</span>
<div className="text-muted mt-[3px] flex h-4 w-4 shrink-0 items-center justify-center">
<Layers3 className="h-3 w-3" />
</div>
<div className="flex min-w-0 flex-1 flex-col gap-0.5">
<div className="grid min-w-0 grid-cols-[minmax(0,1fr)_auto] items-center gap-1.5">
<span className="text-foreground min-w-0 truncate text-left text-[14px] leading-6">
Best of {props.totalCount} · {props.title}
</span>
<span className="text-muted text-[11px]">
{props.completedCount}/{props.totalCount}
</span>
</div>
<div className="text-muted flex min-w-0 flex-wrap items-center gap-1.5 text-xs leading-4">
{statusParts.length > 0 ? (
statusParts.map((part) => <span key={part}>{part}</span>)
) : (
<span>{props.totalCount} candidates</span>
)}
</div>
</div>
</div>
);
}
215 changes: 215 additions & 0 deletions src/browser/components/ProjectSidebar/ProjectSidebar.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ function createWorkspace(
parentWorkspaceId?: string;
taskStatus?: FrontendWorkspaceMetadata["taskStatus"];
title?: string;
bestOf?: FrontendWorkspaceMetadata["bestOf"];
}
): FrontendWorkspaceMetadata {
return {
Expand All @@ -303,6 +304,7 @@ function createWorkspace(
runtimeConfig: DEFAULT_RUNTIME_CONFIG,
parentWorkspaceId: opts?.parentWorkspaceId,
taskStatus: opts?.taskStatus,
bestOf: opts?.bestOf,
};
}

Expand Down Expand Up @@ -394,4 +396,217 @@ describe("ProjectSidebar multi-project completed-subagent toggles", () => {
expect(childRow.dataset.rowKind).toBe("subagent");
expect(childRow.dataset.depth).toBe("1");
});

test("coalesces best-of sub-agents into a single sidebar row until expanded", async () => {
window.localStorage.setItem(EXPANDED_PROJECTS_KEY, JSON.stringify(["/projects/demo-project"]));

const singleProjectRefs = [
{ projectPath: "/projects/demo-project", projectName: "demo-project" },
];
const parentWorkspace = {
...createWorkspace("parent", { title: "Parent workspace" }),
projects: singleProjectRefs,
};
const bestOfGroup = { groupId: "best-of-demo", index: 0, total: 3 } as const;
const childOne = {
...createWorkspace("child-1", {
parentWorkspaceId: "parent",
taskStatus: "running",
title: "Compare implementation options",
bestOf: bestOfGroup,
}),
projects: singleProjectRefs,
};
const childTwo = {
...createWorkspace("child-2", {
parentWorkspaceId: "parent",
taskStatus: "queued",
title: "Compare implementation options",
bestOf: { ...bestOfGroup, index: 1 },
}),
projects: singleProjectRefs,
};
const childThree = {
...createWorkspace("child-3", {
parentWorkspaceId: "parent",
taskStatus: "running",
title: "Compare implementation options",
bestOf: { ...bestOfGroup, index: 2 },
}),
projects: singleProjectRefs,
};

const sortedWorkspacesByProject = new Map([
["/projects/demo-project", [parentWorkspace, childOne, childTwo, childThree]],
]);

const projectConfig = { workspaces: [] };
spyOn(ProjectContextModule, "useProjectContext").mockImplementation(() => ({
userProjects: new Map([["/projects/demo-project", projectConfig]]),
systemProjectPath: null,
resolveProjectPath: () => null,
getProjectConfig: () => projectConfig,
loading: false,
refreshProjects: () => Promise.resolve(),
addProject: () => undefined,
removeProject: () => Promise.resolve({ success: true }),
isProjectCreateModalOpen: false,
openProjectCreateModal: () => undefined,
closeProjectCreateModal: () => undefined,
workspaceModalState: {
isOpen: false,
projectPath: null,
projectName: "",
branches: [],
defaultTrunkBranch: undefined,
loadErrorMessage: null,
isLoading: false,
},
openWorkspaceModal: () => Promise.resolve(),
closeWorkspaceModal: () => undefined,
getBranchesForProject: () => Promise.resolve({ branches: [], recommendedTrunk: null }),
getSecrets: () => Promise.resolve([]),
updateSecrets: () => Promise.resolve(),
createSection: () =>
Promise.resolve({ success: true, data: { id: "section-1", name: "Section" } }),
updateSection: () => resolveVoidResult(),
removeSection: () => resolveVoidResult(),
reorderSections: () => resolveVoidResult(),
assignWorkspaceToSection: () => resolveVoidResult(),
hasAnyProject: true,
resolveNewChatProjectPath: () => "/projects/demo-project",
}));

const workspaceRecency = {
parent: Date.now(),
"child-1": Date.now(),
"child-2": Date.now(),
"child-3": Date.now(),
};

const view = render(
<ProjectSidebar
collapsed={false}
onToggleCollapsed={() => undefined}
sortedWorkspacesByProject={sortedWorkspacesByProject}
workspaceRecency={workspaceRecency}
/>
);

expect(view.getByTestId(agentItemTestId("parent"))).toBeTruthy();
const groupRow = view.getByTestId("best-of-group-best-of-demo");
expect(groupRow.textContent).toContain("Best of 3");
expect(view.queryByTestId(agentItemTestId("child-1"))).toBeNull();
expect(view.queryByTestId(agentItemTestId("child-2"))).toBeNull();
expect(view.queryByTestId(agentItemTestId("child-3"))).toBeNull();

fireEvent.click(groupRow);

await waitFor(() => {
expect(view.getByTestId(agentItemTestId("child-1"))).toBeTruthy();
expect(view.getByTestId(agentItemTestId("child-2"))).toBeTruthy();
expect(view.getByTestId(agentItemTestId("child-3"))).toBeTruthy();
});
});

test("does not coalesce a best-of group when one candidate still has hidden child tasks", () => {
window.localStorage.setItem(EXPANDED_PROJECTS_KEY, JSON.stringify(["/projects/demo-project"]));

const singleProjectRefs = [
{ projectPath: "/projects/demo-project", projectName: "demo-project" },
];
const parentWorkspace = {
...createWorkspace("parent", { title: "Parent workspace" }),
projects: singleProjectRefs,
};
const bestOfGroup = { groupId: "best-of-non-leaf", index: 0, total: 2 } as const;
const childOne = {
...createWorkspace("child-1", {
parentWorkspaceId: "parent",
taskStatus: "running",
title: "Compare implementation options",
bestOf: bestOfGroup,
}),
projects: singleProjectRefs,
};
const hiddenGrandchild = {
...createWorkspace("grandchild-1", {
parentWorkspaceId: "child-1",
taskStatus: "reported",
title: "Nested follow-up",
}),
projects: singleProjectRefs,
};
const childTwo = {
...createWorkspace("child-2", {
parentWorkspaceId: "parent",
taskStatus: "running",
title: "Compare implementation options",
bestOf: { ...bestOfGroup, index: 1 },
}),
projects: singleProjectRefs,
};

const sortedWorkspacesByProject = new Map([
["/projects/demo-project", [parentWorkspace, childOne, hiddenGrandchild, childTwo]],
]);

const projectConfig = { workspaces: [] };
spyOn(ProjectContextModule, "useProjectContext").mockImplementation(() => ({
userProjects: new Map([["/projects/demo-project", projectConfig]]),
systemProjectPath: null,
resolveProjectPath: () => null,
getProjectConfig: () => projectConfig,
loading: false,
refreshProjects: () => Promise.resolve(),
addProject: () => undefined,
removeProject: () => Promise.resolve({ success: true }),
isProjectCreateModalOpen: false,
openProjectCreateModal: () => undefined,
closeProjectCreateModal: () => undefined,
workspaceModalState: {
isOpen: false,
projectPath: null,
projectName: "",
branches: [],
defaultTrunkBranch: undefined,
loadErrorMessage: null,
isLoading: false,
},
openWorkspaceModal: () => Promise.resolve(),
closeWorkspaceModal: () => undefined,
getBranchesForProject: () => Promise.resolve({ branches: [], recommendedTrunk: null }),
getSecrets: () => Promise.resolve([]),
updateSecrets: () => Promise.resolve(),
createSection: () =>
Promise.resolve({ success: true, data: { id: "section-1", name: "Section" } }),
updateSection: () => resolveVoidResult(),
removeSection: () => resolveVoidResult(),
reorderSections: () => resolveVoidResult(),
assignWorkspaceToSection: () => resolveVoidResult(),
hasAnyProject: true,
resolveNewChatProjectPath: () => "/projects/demo-project",
}));

const workspaceRecency = {
parent: Date.now(),
"child-1": Date.now(),
"grandchild-1": Date.now(),
"child-2": Date.now(),
};

const view = render(
<ProjectSidebar
collapsed={false}
onToggleCollapsed={() => undefined}
sortedWorkspacesByProject={sortedWorkspacesByProject}
workspaceRecency={workspaceRecency}
/>
);

expect(view.queryByTestId("best-of-group-best-of-non-leaf")).toBeNull();
expect(view.getByTestId(agentItemTestId("child-1"))).toBeTruthy();
expect(view.getByTestId(agentItemTestId("child-2"))).toBeTruthy();
expect(view.queryByTestId(agentItemTestId("grandchild-1"))).toBeNull();
});
});
Loading
Loading