diff --git a/AGENTS.md b/AGENTS.md index f78fb0e9..081249a2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -100,6 +100,7 @@ Co-Authored-By: (agent model name) - `specs/chat-architecture-spec.md` (chat composition, service, and test-seam architecture contract) - `specs/slack-agent-delivery-spec.md` (Slack entry surfaces, reply delivery, continuation, files, images, and resume behavior contract) - `specs/slack-outbound-contract-spec.md` (Slack outbound boundary, message/file/reaction safety rules, and markdown-to-`mrkdwn` ownership) +- `specs/slack-rendering-spec.md` (Slack `mrkdwn` output contract: allow-list / forbid-list for the Slack surface — draft) - `specs/skill-capabilities-spec.md` (capability declaration + broker/injection contract) - `specs/oauth-flows-spec.md` (OAuth authorization code flow + Slack UX contract) - `specs/harness-agent-spec.md` (agent loop and output contract) diff --git a/packages/junior-evals/evals/core/slack-mrkdwn-hygiene.eval.ts b/packages/junior-evals/evals/core/slack-mrkdwn-hygiene.eval.ts new file mode 100644 index 00000000..5823ca98 --- /dev/null +++ b/packages/junior-evals/evals/core/slack-mrkdwn-hygiene.eval.ts @@ -0,0 +1,62 @@ +import { describe } from "vitest"; +import { mention, rubric, slackEval } from "../helpers"; + +describe("Slack mrkdwn hygiene", () => { + slackEval( + "uses single-asterisk bold, single-tilde strike, and Slack link syntax", + { + events: [ + mention( + "In one short Slack reply, bold the word 'ready', strike through the word 'draft', and link the label 'docs' to https://docs.slack.dev/ .", + ), + ], + overrides: { + reply_timeout_ms: 120_000, + }, + requireSandboxReady: false, + taskTimeout: 150_000, + timeout: 210_000, + criteria: rubric({ + contract: + "Emphasis and link syntax follow Slack `mrkdwn`: single-asterisk bold, single-tilde strike, and `` links. CommonMark/GFM equivalents are forbidden.", + pass: [ + "assistant_posts contains a single reply that addresses the bold, strike, and link asks.", + "Bold uses `*ready*` (single asterisks).", + "Strike uses `~draft~` (single tildes).", + "The docs link appears as `` or the bare URL.", + ], + fail: [ + "Do not emit `**ready**` (CommonMark bold).", + "Do not emit `~~draft~~` (CommonMark strike).", + "Do not emit `[docs](https://docs.slack.dev/)` (CommonMark link).", + ], + }), + }, + ); + + slackEval("uses bold section labels instead of markdown headings", { + events: [ + mention( + "Give me a two-section Slack reply with short headings 'Summary' and 'Next steps', each with one sentence under it.", + ), + ], + overrides: { + reply_timeout_ms: 120_000, + }, + requireSandboxReady: false, + taskTimeout: 150_000, + timeout: 210_000, + criteria: rubric({ + contract: + "Section structure uses a bold label on its own line. Markdown heading syntax is forbidden because Slack does not render it.", + pass: [ + "assistant_posts contains a single reply with two sections.", + "Each section label appears as `*Summary*` and `*Next steps*` on their own lines (bold labels), followed by a sentence.", + ], + fail: [ + "Do not emit `# Summary`, `## Summary`, `### Summary`, or any other markdown heading syntax.", + "Do not emit `**Summary**` (CommonMark bold).", + ], + }), + }); +}); diff --git a/packages/junior-evals/evals/helpers.ts b/packages/junior-evals/evals/helpers.ts index 4752d431..1c760612 100644 --- a/packages/junior-evals/evals/helpers.ts +++ b/packages/junior-evals/evals/helpers.ts @@ -9,7 +9,7 @@ import { runEvalScenario, } from "./behavior-harness"; -configure({ model: gateway("openai/gpt-5.2") }); +configure({ model: gateway("openai/gpt-5.4") }); // ── Eval output schema ───────────────────────────────────── diff --git a/packages/junior/src/chat/prompt.ts b/packages/junior/src/chat/prompt.ts index 458a0767..597db29e 100644 --- a/packages/junior/src/chat/prompt.ts +++ b/packages/junior/src/chat/prompt.ts @@ -252,6 +252,41 @@ function baseSystemPrompt(): string { ].join("\n"); } +function buildSlackOutputContract(params: { + maxInlineChars: number; + maxInlineLines: number; +}): string { + return [ + ``, + "Your reply is delivered as plain Slack `mrkdwn` text. Slack `mrkdwn` is a strict, smaller syntax than CommonMark or GitHub-Flavored Markdown — anything outside the allow-list below renders as literal characters.", + "", + "Allowed mrkdwn (you may use these):", + "- `*bold*` — surround with single asterisks. Slack does NOT support `**bold**`; it renders the asterisks literally.", + "- `_italic_` — surround with single underscores.", + "- `~strike~` — surround with single tildes. Slack does NOT support `~~strike~~`.", + "- `` `inline code` `` and triple-backtick fenced code blocks for code, commands, and monospaced snippets.", + "- `> quoted text` at the start of a line for block quotes. A blank line ends the quote.", + "- `` for hyperlinks with a label. A bare `https://example.com` auto-links without a label. Slack does NOT support `[Label](https://example.com)` — it renders literally.", + "- `<@USERID>`, `<#CHANNELID>`, `` for user, channel, and group mentions. Use the raw IDs exposed elsewhere in this prompt.", + "- `- item` or `* item` at the start of a line for bullet lists. Numbered lists (`1. item`) render but indent awkwardly — prefer bullets.", + "- A bold label on its own line (`*Section*`) in place of markdown headings.", + "", + "Forbidden (do NOT emit these — they render as literal characters or broken formatting):", + "- Markdown tables using pipes and dashes (`| col | col |` / `|---|---|`). Slack renders the pipes verbatim. When you need tabular data, use short bulleted lists grouped by row, or a fenced code block with manually aligned columns.", + "- Markdown headings (`#`, `##`, `###`, and so on). Use a bold label on its own line instead.", + "- Markdown link syntax (`[label](url)`). Rewrite as `` or a bare URL.", + "- CommonMark bold/strike doubles (`**bold**`, `~~strike~~`). Use the single-delimiter forms above.", + "- HTML tags, image embeds, and raw Slack Block Kit JSON.", + "", + "Other response rules:", + "- Keep responses brief and scannable. Lead with the answer; add detail only when depth is warranted.", + "- For tool-heavy research, discovery, or source-checking requests, do not send an initial acknowledgement. Start the visible reply only once you can present the actual answer.", + "- Do not narrate tool execution or emit repeated status updates in the visible reply.", + "- End every turn with a single final user-facing response in the format above.", + "", + ].join("\n"); +} + function formatReferenceFilesSection(): string[] { const files = listReferenceFiles(); if (files.length === 0) { @@ -578,21 +613,10 @@ export function buildSystemPrompt(params: { "- If no skill is a clear fit, continue with normal tool usage.", ].join("\n"), ), - renderTag( - "output-contract", - [ - "Always produce output that follows this contract:", - ``, - "- Use Slack-friendly markdown, not full CommonMark. Prefer bold section labels over markdown headings, and use bullets and short code blocks when helpful.", - "- Keep normal responses brief and scannable.", - "- If depth is needed, start with a concise summary and then provide fuller detail.", - "- For tool-heavy research, discovery, or source-checking requests, do not send an initial acknowledgment. Start the visible reply only once you can present the actual answer.", - "- Do not narrate tool execution or repeated status updates in the visible reply.", - "- Avoid tables and markdown links like `[label](url)` unless explicitly requested. Prefer plain URLs or Slack-native entities when exact rendering matters.", - "- End every turn with a final user-facing markdown response.", - "", - ].join("\n"), - ), + buildSlackOutputContract({ + maxInlineChars: slackOutputPolicy.maxInlineChars, + maxInlineLines: slackOutputPolicy.maxInlineLines, + }), availableSkillsSection, activeSkillsSection, ...(activeToolsSection ? [activeToolsSection] : []), diff --git a/packages/junior/src/chat/slack/footer.ts b/packages/junior/src/chat/slack/footer.ts index 9fcbcbee..3b52dd5f 100644 --- a/packages/junior/src/chat/slack/footer.ts +++ b/packages/junior/src/chat/slack/footer.ts @@ -1,21 +1,10 @@ +import type { + SlackContextBlock, + SlackMessageBlock, +} from "@/chat/slack/render/blocks"; import type { AgentTurnUsage } from "@/chat/usage"; -interface SlackMrkdwnTextObject { - text: string; - type: "mrkdwn"; -} - -interface SlackSectionBlock { - text: SlackMrkdwnTextObject; - type: "section"; -} - -interface SlackContextBlock { - elements: SlackMrkdwnTextObject[]; - type: "context"; -} - -export type SlackMessageBlock = SlackSectionBlock | SlackContextBlock; +export type { SlackMessageBlock }; export interface SlackReplyFooterItem { label: string; @@ -108,6 +97,27 @@ export function buildSlackReplyFooter(args: { return items.length > 0 ? { items } : undefined; } +/** + * Build the standalone footer `context` block (no surrounding section). + * Used when composing the footer onto an existing block-bearing message + * (e.g. an intent-rendered reply) so we don't double-render the body. + */ +export function buildSlackFooterContextBlock( + footer: SlackReplyFooter | undefined, +): SlackContextBlock | undefined { + if (!footer?.items.length) { + return undefined; + } + + return { + type: "context", + elements: footer.items.map((item) => ({ + type: "mrkdwn", + text: `*${escapeSlackMrkdwn(item.label)}:* ${escapeSlackMrkdwn(item.value)}`, + })), + }; +} + /** Build Slack blocks for a finalized reply plus its optional footer context block. */ export function buildSlackReplyBlocks( text: string, @@ -117,6 +127,11 @@ export function buildSlackReplyBlocks( return undefined; } + const footerBlock = buildSlackFooterContextBlock(footer); + if (!footerBlock) { + return undefined; + } + return [ { type: "section", @@ -125,12 +140,6 @@ export function buildSlackReplyBlocks( text, }, }, - { - type: "context", - elements: footer.items.map((item) => ({ - type: "mrkdwn", - text: `*${escapeSlackMrkdwn(item.label)}:* ${escapeSlackMrkdwn(item.value)}`, - })), - }, + footerBlock, ]; } diff --git a/packages/junior/src/chat/slack/render/blocks.ts b/packages/junior/src/chat/slack/render/blocks.ts new file mode 100644 index 00000000..f63ba3de --- /dev/null +++ b/packages/junior/src/chat/slack/render/blocks.ts @@ -0,0 +1,62 @@ +/** + * Slack Block Kit types used by the outbound reply boundary. This is a local + * subset of the Slack API surface — just the fields the repository actually + * emits when wrapping final `mrkdwn` replies in section/context envelopes. + */ + +export interface SlackMrkdwnText { + text: string; + type: "mrkdwn"; +} + +export interface SlackPlainText { + emoji?: boolean; + text: string; + type: "plain_text"; +} + +export interface SlackHeaderBlock { + text: SlackPlainText; + type: "header"; +} + +export interface SlackSectionBlock { + fields?: SlackMrkdwnText[]; + text?: SlackMrkdwnText; + type: "section"; +} + +export interface SlackDividerBlock { + type: "divider"; +} + +export interface SlackContextBlock { + elements: SlackMrkdwnText[]; + type: "context"; +} + +export interface SlackLinkButtonElement { + text: SlackPlainText; + type: "button"; + url: string; +} + +export interface SlackActionsBlock { + elements: SlackLinkButtonElement[]; + type: "actions"; +} + +export type SlackMessageBlock = + | SlackActionsBlock + | SlackContextBlock + | SlackDividerBlock + | SlackHeaderBlock + | SlackSectionBlock; + +/** Escape user-provided text for safe inclusion in Slack mrkdwn fields. */ +export function escapeSlackMrkdwnText(text: string): string { + return text + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">"); +} diff --git a/packages/junior/src/chat/slack/reply.ts b/packages/junior/src/chat/slack/reply.ts index 1c6ca7e1..b7eedfb6 100644 --- a/packages/junior/src/chat/slack/reply.ts +++ b/packages/junior/src/chat/slack/reply.ts @@ -152,6 +152,7 @@ export function planSlackReplyPosts(args: { args.reply, ); const interrupted = isInterruptedVisibleReply(args.reply); + const posts: PlannedSlackReplyPost[] = []; const textPosts = shouldPostThreadReply diff --git a/specs/index.md b/specs/index.md index 281fad49..467fff18 100644 --- a/specs/index.md +++ b/specs/index.md @@ -14,6 +14,7 @@ - 2026-03-21: Added canonical chat architecture spec. - 2026-04-15: Added canonical Slack agent delivery spec. - 2026-04-16: Added canonical Slack write contract spec. +- 2026-04-17: Added draft Slack output contract spec (`slack-rendering-spec.md`) covering the `mrkdwn` allow-list and forbid-list for the Slack surface. ## Status @@ -48,6 +49,7 @@ Define spec taxonomy, naming conventions, and canonical source-of-truth document - `specs/chat-architecture-spec.md` - `specs/slack-agent-delivery-spec.md` - `specs/slack-outbound-contract-spec.md` +- `specs/slack-rendering-spec.md` - `specs/skill-capabilities-spec.md` - `specs/oauth-flows-spec.md` - `specs/harness-agent-spec.md` diff --git a/specs/slack-rendering-spec.md b/specs/slack-rendering-spec.md new file mode 100644 index 00000000..fcc7e6fd --- /dev/null +++ b/specs/slack-rendering-spec.md @@ -0,0 +1,97 @@ +# Slack Output Contract + +## Metadata + +- Created: 2026-04-17 +- Last Edited: 2026-04-17 + +## Changelog + +- 2026-04-17: Initial draft of the render-intent layer, plugin renderer registry, and Work Object boundary for Slack delivery. +- 2026-04-17: Dropped Work Objects. Replaced the declarative plugin-template registry with a native-intent palette the model selects from; plugins now teach intent usage through SKILL.md rather than YAML templates. +- 2026-04-17: Added the Intent Delivery Mechanism section (ToolStrategy via the native `reply` tool, Renderer pattern). +- 2026-04-17: Removed the render-intent palette, the `reply` tool, and the plugin recipe layer. The spec now documents a single output contract: the final assistant reply is plain Slack `mrkdwn` text, and the prompt's job is to teach the model which `mrkdwn` features Slack actually renders. A structured-layout palette may return later if there is a concrete product reason to spend model tool-budget on presentation. + +## Status + +Draft + +## Purpose + +Define the canonical output contract between Junior's assistant turns and Slack delivery so every visible reply is well-formed Slack `mrkdwn` that Slack actually renders. + +Slack's `mrkdwn` is a strict, smaller syntax than CommonMark or GitHub-Flavored Markdown. CommonMark features that Slack silently ignores — pipe tables, `**bold**`, `[label](url)`, `##` headings — render as literal characters and degrade the reply. The output contract names the allow-list the model may use and forbids the CommonMark/GFM constructs that Slack does not support. + +This spec sits in front of `slack-agent-delivery-spec.md` (reply delivery semantics) and `slack-outbound-contract-spec.md` (outbound Slack API safety). It does not change either of those contracts. + +## Scope + +- The Slack `mrkdwn` syntax the model is allowed to emit in a final reply. +- The CommonMark/GFM constructs the model must not emit because Slack does not render them. +- How the prompt teaches these rules (a single `` section). + +## Non-Goals + +- Replacing the visible reply delivery contract defined in `slack-agent-delivery-spec.md`. +- Replacing the outbound boundary defined in `slack-outbound-contract-spec.md`. +- Introducing a render-intent palette, a `reply` tool, or any other structured-layout mechanism. Revisit if there is a concrete product reason to spend model tool-budget on layout. +- Letting the model author Slack Block Kit blocks directly. +- Specifying chart or image-generation surfaces. Slack still receives those as image attachments with a concise textual takeaway. + +## Contracts + +### 1. Output form + +Every final assistant reply is delivered to Slack as plain `mrkdwn` text. The outbound boundary continues to wrap that text in the shared reply envelope (section block for the body, optional context block for the diagnostic footer). The model never authors blocks and never emits raw JSON. + +### 2. Allowed Slack `mrkdwn` + +The prompt explicitly permits the following syntax. Anything not on this allow-list renders as literal characters. + +- `*bold*` — surround with single asterisks. Slack does not render `**bold**`. +- `_italic_` — surround with single underscores. +- `~strike~` — surround with single tildes. Slack does not render `~~strike~~`. +- `` `inline code` `` and triple-backtick fenced code blocks. +- `> quoted text` at the start of a line for block quotes. A blank line ends the quote. +- `` for hyperlinks with a label. A bare `https://example.com` auto-links without a label. Slack does not render `[Label](https://example.com)`. +- `<@USERID>`, `<#CHANNELID>`, `` for user, channel, and group mentions. The model uses the raw IDs provided elsewhere in the prompt. +- `- item` or `* item` at the start of a line for bullet lists. Numbered lists render but indent awkwardly — prefer bullets. +- A bold label on its own line (`*Section*`) in place of a markdown heading. + +### 3. Forbidden constructs + +The prompt explicitly forbids the following because Slack renders them as literal characters or broken formatting. + +- Markdown tables using pipes and dashes (`| col | col |` / `|---|---|`). Slack renders the pipes verbatim. When tabular data is needed, the model uses short bulleted lists grouped by row, or a fenced code block with manually aligned columns. +- Markdown headings (`#`, `##`, `###`, and so on). Use a bold label on its own line instead. +- Markdown link syntax (`[label](url)`). Rewrite as `` or a bare URL. +- CommonMark bold/strike doubles (`**bold**`, `~~strike~~`). Use the single-delimiter forms. +- HTML tags, image embeds, and raw Slack Block Kit JSON. + +### 4. Prompt surface + +These rules live in one place: the `` section built by `buildSlackOutputContract` in `packages/junior/src/chat/prompt.ts`. The section is the sole authority on what a Slack response may contain. Plugin `SKILL.md` content describes domain behavior (what to fetch, how to phrase a ticket) but does not restate or override these syntax rules. + +The section also carries the other per-reply guidance this surface requires: brevity, no initial-acknowledgement for tool-heavy research, no progress narration, one final reply per turn. + +## Failure Model + +1. The model emits a forbidden construct (table, `##` heading, `[label](url)`, `**bold**`). Slack renders it as literal characters. This is a prompt-adherence failure; the fix lives in the `` section and in any eval scenario that exercises the specific construct. +2. The model emits a correct `mrkdwn` construct that exceeds the envelope's length cap. The outbound boundary truncates or chunks per `slack-outbound-contract-spec.md`. No change here. +3. The model tries to author blocks or JSON directly. The prompt forbids it; if it slips through, the outbound boundary treats the raw string as text and the visible output degrades. + +## Verification + +Required verification coverage for this contract: + +1. Unit: prompt assembly emits the `` section with the expected allow-list and forbid-list. +2. Evals: realistic Slack conversations confirm the model does not emit forbidden constructs (pipe tables, `**bold**`, `[label](url)`, `##` headings) even when the user asks for a comparison, a heading, or a link. + +## Related Specs + +- `./slack-agent-delivery-spec.md` +- `./slack-outbound-contract-spec.md` +- `./chat-architecture-spec.md` +- `./plugin-spec.md` +- `./logging/index.md` +- `./testing/index.md`