Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions crates/jp_md/src/ansi.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! Shared ANSI SGR escape constants and state tracking.
//!
//! This module provides the escape sequences, state tracking, and visual
//! width computation used by both the terminal renderer (`render.rs`) and
//! the table formatter (`table.rs`).
//! This module provides the escape sequences, state tracking, and visual width
//! computation used by both the terminal renderer (`render.rs`) and the table
//! formatter (`table.rs`).

/// SGR: Bold on.
pub const BOLD_START: &str = "\x1b[1m";
Expand Down Expand Up @@ -39,9 +39,9 @@ pub const RESET: &str = "\x1b[0m";

/// Tracks which ANSI SGR attributes are currently active.
///
/// Used to close formatting at line breaks and re-open it on the next
/// line, both for the terminal renderer's incremental wrapping and the
/// table formatter's batch wrapping.
/// Used to close formatting at line breaks and re-open it on the next line,
/// both for the terminal renderer's incremental wrapping and the table
/// formatter's batch wrapping.
#[derive(Debug, Clone, Default)]
#[expect(clippy::struct_excessive_bools)]
pub struct AnsiState {
Expand Down Expand Up @@ -81,8 +81,8 @@ impl AnsiState {
|| self.background.is_some()
}

/// Update the tracked state from a complete ANSI escape sequence
/// (e.g. `"\x1b[1m"`).
/// Update the tracked state from a complete ANSI escape sequence (e.g.
/// `"\x1b[1m"`).
pub(crate) fn update(&mut self, esc: &str) {
match esc {
BOLD_START => self.bold = true,
Expand Down Expand Up @@ -161,10 +161,9 @@ impl AnsiState {

/// Calculate the visual width of a string, ignoring ANSI escape sequences.
///
/// Strips ANSI escape sequences, then delegates to
/// `UnicodeWidthStr::width()` which correctly handles multi-codepoint
/// sequences like emoji presentation (VS16), ZWJ sequences, and
/// script-specific ligatures.
/// Strips ANSI escape sequences, then delegates to `UnicodeWidthStr::width()`
/// which correctly handles multi-codepoint sequences like emoji presentation
/// (VS16), ZWJ sequences, and script-specific ligatures.
pub fn visual_width(s: &str) -> usize {
use unicode_width::UnicodeWidthStr as _;

Expand Down
212 changes: 134 additions & 78 deletions crates/jp_md/src/buffer.rs

Large diffs are not rendered by default.

47 changes: 26 additions & 21 deletions crates/jp_md/src/buffer/fixup.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
//! Post-processing fixups for buffer events.
//!
//! Fixups are stateful transformers that sit between the [`Buffer`] iterator
//! and the consumer. They handle LLM-specific quirks that don't belong in the
//! core markdown parsing logic.
//! and the consumer.
//! They handle LLM-specific quirks that don't belong in the core markdown
//! parsing logic.
//!
//! [`Buffer`]: super::Buffer

use super::Event;

/// A stateful event transformer.
///
/// Each fixup inspects events as they pass through and can rewrite,
/// suppress, or pass them unchanged. Fixups may hold state across
/// events (e.g. remembering properties of the previous block).
/// Each fixup inspects events as they pass through and can rewrite, suppress,
/// or pass them unchanged.
/// Fixups may hold state across events (e.g. remembering properties of the
/// previous block).
pub trait EventFixup {
/// Process a single event. Returns `None` to suppress the event,
/// or `Some(event)` (possibly modified) to pass it through.
/// Process a single event.
/// Returns `None` to suppress the event, or `Some(event)` (possibly
/// modified) to pass it through.
fn process(&mut self, event: Event) -> Option<Event>;
}

/// Wraps a buffer iterator and applies a chain of [`EventFixup`]s to
/// each emitted event.
/// Wraps a buffer iterator and applies a chain of [`EventFixup`]s to each
/// emitted event.
pub struct FixupChain<I> {
/// The underlying event source.
inner: I,
Expand Down Expand Up @@ -55,8 +58,9 @@ impl<I: Iterator<Item = Event>> Iterator for FixupChain<I> {
}

/// Check if a block contains a fence pattern embedded mid-line (not at the
/// start). This indicates the LLM started a code block at the end of a
/// paragraph line, and a subsequent bare fence is likely the orphaned close.
/// start).
/// This indicates the LLM started a code block at the end of a paragraph line,
/// and a subsequent bare fence is likely the orphaned close.
fn has_embedded_fence(block: &str) -> bool {
for line in block.lines() {
let trimmed = line.trim_start();
Expand All @@ -74,17 +78,18 @@ fn has_embedded_fence(block: &str) -> bool {

/// Fixes orphaned closing fences from mid-line code fence patterns.
///
/// When an LLM produces backticks mid-line (e.g. `text:```lang`),
/// the bare closing fence on the next line gets misinterpreted as a
/// new code block opening. This fixup detects when a `Block` contains
/// such an embedded fence pattern and converts the following bare
/// `FencedCodeStart` (no language tag) into a `Block` instead.
/// When an LLM produces backticks mid-line (e.g.
/// `text:```lang`), the bare closing fence on the next line gets misinterpreted
/// as a new code block opening.
/// This fixup detects when a `Block` contains such an embedded fence pattern
/// and converts the following bare `FencedCodeStart` (no language tag) into a
/// `Block` instead.
pub struct OrphanedFenceFixup {
/// Whether the previous block had an embedded fence pattern.
prev_had_embedded_fence: bool,
/// When true, we're inside a fake code block from an orphaned fence.
/// All `FencedCodeLine` events become `Block` events, and
/// `FencedCodeEnd` is suppressed.
/// All `FencedCodeLine` events become `Block` events, and `FencedCodeEnd`
/// is suppressed.
suppressing: bool,
}

Expand Down Expand Up @@ -146,9 +151,9 @@ impl EventFixup for OrphanedFenceFixup {

/// Escalates fence lengths so rendered output safely contains inner fences.
///
/// Rewrites `FencedCodeStart` and `FencedCodeEnd` events to use at least
/// 5 backticks/tildes, so 3-backtick inner fences render as literal
/// content in the output.
/// Rewrites `FencedCodeStart` and `FencedCodeEnd` events to use at least 5
/// backticks/tildes, so 3-backtick inner fences render as literal content in
/// the output.
pub struct FenceEscalationFixup;

impl EventFixup for FenceEscalationFixup {
Expand Down
60 changes: 29 additions & 31 deletions crates/jp_md/src/buffer/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,30 @@ pub enum State {
///
/// While in this state, indented content at any column greater than
/// `marker_column` is treated as continuation of the current item.
/// In particular, content at column 4 is *not* treated as an indented
/// code block — that classification only applies at block boundaries
/// outside a list.
/// In particular, content at column 4 is *not* treated as an indented code
/// block — that classification only applies at block boundaries outside a
/// list.
///
/// When a fence or a deeper list marker appears inside the item,
/// the buffer pushes the current `InList` state onto its parents
/// stack and switches to the inner state. On close, the parent is
/// popped back.
/// When a fence or a deeper list marker appears inside the item, the buffer
/// pushes the current `InList` state onto its parents stack and switches to
/// the inner state.
/// On close, the parent is popped back.
InList {
/// Column where the list's markers appear. Sibling items must
/// share this column.
/// Column where the list's markers appear.
/// Sibling items must share this column.
marker_column: usize,
/// Column where item content (post-marker) starts. Deeper
/// markers seen at this column or beyond start a nested list.
/// Column where item content (post-marker) starts.
/// Deeper markers seen at this column or beyond start a nested list.
content_column: usize,
/// Whether the list is ordered (digit + delim). Bullet otherwise.
/// Whether the list is ordered (digit + delim).
/// Bullet otherwise.
is_ordered: bool,
/// The marker delimiter character: `.` or `)` for ordered, `-`/
/// `*`/`+` for bullet.
/// The marker delimiter character: `.` or `)` for ordered, `-`/ `*`/`+`
/// for bullet.
delimiter: u8,
/// For ordered lists, the marker number on the first item. Used
/// to renumber emitted items so the output is consistent with
/// CommonMark's renumbering even though we stream items
/// individually.
/// For ordered lists, the marker number on the first item.
/// Used to renumber emitted items so the output is consistent with
/// CommonMark's renumbering even though we stream items individually.
start_number: u32,
/// Number of items already flushed from this list.
items_flushed: u32,
Expand All @@ -58,15 +58,15 @@ pub enum State {

/// The indentation of the opening fence.
///
/// When the fence is inside a list item, this is the parent
/// list's `content_column`; code lines have this many leading
/// spaces stripped before emission.
/// When the fence is inside a list item, this is the parent list's
/// `content_column`; code lines have this many leading spaces stripped
/// before emission.
indent: usize,

/// Nesting depth of inner fenced code blocks. When an inner fence
/// opening (backticks + language tag) is seen, this increments;
/// a bare closing fence decrements. The outer block only closes
/// when depth reaches 0.
/// Nesting depth of inner fenced code blocks.
/// When an inner fence opening (backticks + language tag) is seen, this
/// increments; a bare closing fence decrements.
/// The outer block only closes when depth reaches 0.
depth: usize,
},

Expand Down Expand Up @@ -101,8 +101,8 @@ impl FenceType {
/// Represents the 7 types of HTML blocks defined by the CommonMark spec.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HtmlBlockRule {
/// Type 1: `<script>`, `<pre>`, `<style>`, `<textarea>`
/// Ends with matching closing tag.
/// Type 1: `<script>`, `<pre>`, `<style>`, `<textarea>` Ends with matching
/// closing tag.
Type1(HtmlType1Tag),
/// Type 2: `<!-- ... -->`
Type2,
Expand All @@ -112,11 +112,9 @@ pub enum HtmlBlockRule {
Type4,
/// Type 5: `<![CDATA[ ... ]]>`
Type5,
/// Type 6: `<div...` etc.
/// Ends with a blank line.
/// Type 6: `<div...` etc. Ends with a blank line.
Type6(HtmlType6Tag),
/// Type 7: `<foo...`
/// Ends with a blank line, cannot interrupt a paragraph.
/// Type 7: `<foo...` Ends with a blank line, cannot interrupt a paragraph.
Type7,
}

Expand Down
Loading
Loading