From 826e701b4f99d8bafbda8aea95b6b9d1d7b68df7 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 8 Sep 2020 21:32:30 +0200 Subject: [PATCH] Add line highlighting to code blocks (#1131) * Add line highlighting to code blocks * Fix highlighting of lines Apparently every line to be highlighted is provided in one chunk. * Add more documentation to codeblock.rs * Turn FenceIter into an Iterator * Move Range to fence.rs * Add tests --- components/config/src/highlighting.rs | 7 +- components/rendering/src/markdown.rs | 61 ++-- .../rendering/src/markdown/codeblock.rs | 196 +++++++++++ components/rendering/src/markdown/fence.rs | 102 ++++++ .../rendering/tests/codeblock_hl_lines.rs | 312 ++++++++++++++++++ 5 files changed, 642 insertions(+), 36 deletions(-) create mode 100644 components/rendering/src/markdown/codeblock.rs create mode 100644 components/rendering/src/markdown/fence.rs create mode 100644 components/rendering/tests/codeblock_hl_lines.rs diff --git a/components/config/src/highlighting.rs b/components/config/src/highlighting.rs index 263b5f985..595addb7b 100644 --- a/components/config/src/highlighting.rs +++ b/components/config/src/highlighting.rs @@ -17,11 +17,14 @@ lazy_static! { } /// Returns the highlighter and whether it was found in the extra or not -pub fn get_highlighter<'a>(info: &str, config: &Config) -> (HighlightLines<'a>, bool) { +pub fn get_highlighter( + language: Option<&str>, + config: &Config +) -> (HighlightLines<'static>, bool) { let theme = &THEME_SET.themes[&config.highlight_theme]; let mut in_extra = false; - if let Some(ref lang) = info.split(' ').next() { + if let Some(ref lang) = language { let syntax = SYNTAX_SET .find_syntax_by_token(lang) .or_else(|| { diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 75b644e00..dd06371ea 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -1,14 +1,11 @@ use lazy_static::lazy_static; use pulldown_cmark as cmark; use regex::Regex; -use syntect::easy::HighlightLines; -use syntect::html::{ - start_highlighted_html_snippet, styled_line_to_highlighted_html, IncludeBackground, -}; +use syntect::html::{start_highlighted_html_snippet, IncludeBackground}; use crate::context::RenderContext; use crate::table_of_contents::{make_table_of_contents, Heading}; -use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET}; +use config::highlighting::THEME_SET; use errors::{Error, Result}; use front_matter::InsertAnchor; use utils::site::resolve_internal_link; @@ -18,6 +15,10 @@ use utils::vec::InsertMany; use self::cmark::{Event, LinkType, Options, Parser, Tag}; use pulldown_cmark::CodeBlockKind; +mod codeblock; +mod fence; +use self::codeblock::CodeBlock; + const CONTINUE_READING: &str = ""; const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html"; @@ -172,8 +173,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result = None; + let mut highlighter: Option = None; let mut inserted_anchors: Vec = vec![]; let mut headings: Vec = vec![]; @@ -192,26 +192,14 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result { - // if we are in the middle of a code block - if let Some((ref mut highlighter, in_extra)) = highlighter { - let highlighted = if in_extra { - if let Some(ref extra) = context.config.extra_syntax_set { - highlighter.highlight(&text, &extra) - } else { - unreachable!( - "Got a highlighter from extra syntaxes but no extra?" - ); - } - } else { - highlighter.highlight(&text, &SYNTAX_SET) - }; - //let highlighted = &highlighter.highlight(&text, ss); - let html = styled_line_to_highlighted_html(&highlighted, background); - return Event::Html(html.into()); + // if we are in the middle of a highlighted code block + if let Some(ref mut code_block) = highlighter { + let html = code_block.highlight(&text); + Event::Html(html.into()) + } else { + // Business as usual + Event::Text(text) } - - // Business as usual - Event::Text(text) } Event::Start(Tag::CodeBlock(ref kind)) => { if !context.config.highlight_code { @@ -221,16 +209,21 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result (), - CodeBlockKind::Fenced(info) => { - highlighter = Some(get_highlighter(info, &context.config)); + CodeBlockKind::Fenced(fence_info) => { + // This selects the background color the same way that + // start_coloured_html_snippet does + let color = theme + .settings + .background + .unwrap_or(::syntect::highlighting::Color::WHITE); + + highlighter = Some(CodeBlock::new( + fence_info, + &context.config, + IncludeBackground::IfDifferent(color), + )); } }; - // This selects the background color the same way that start_coloured_html_snippet does - let color = theme - .settings - .background - .unwrap_or(::syntect::highlighting::Color::WHITE); - background = IncludeBackground::IfDifferent(color); let snippet = start_highlighted_html_snippet(theme); let mut html = snippet.0; html.push_str(""); diff --git a/components/rendering/src/markdown/codeblock.rs b/components/rendering/src/markdown/codeblock.rs new file mode 100644 index 000000000..1113f5fa5 --- /dev/null +++ b/components/rendering/src/markdown/codeblock.rs @@ -0,0 +1,196 @@ +use syntect::html::{IncludeBackground, styled_line_to_highlighted_html}; +use syntect::easy::HighlightLines; +use syntect::parsing::SyntaxSet; +use syntect::highlighting::{Color, Theme, Style}; +use config::Config; +use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET}; +use std::cmp::min; +use std::collections::HashSet; + +use super::fence::{FenceSettings, Range}; + +pub struct CodeBlock<'config> { + highlighter: HighlightLines<'static>, + extra_syntax_set: Option<&'config SyntaxSet>, + background: IncludeBackground, + theme: &'static Theme, + + /// List of ranges of lines to highlight. + highlight_lines: Vec, + /// The number of lines in the code block being processed. + num_lines: usize, +} + +impl<'config> CodeBlock<'config> { + pub fn new( + fence_info: &str, + config: &'config Config, + background: IncludeBackground, + ) -> Self { + let fence_info = FenceSettings::new(fence_info); + let theme = &THEME_SET.themes[&config.highlight_theme]; + let (highlighter, in_extra) = get_highlighter(fence_info.language, config); + Self { + highlighter, + extra_syntax_set: match in_extra { + true => config.extra_syntax_set.as_ref(), + false => None, + }, + background, + theme, + + highlight_lines: fence_info.highlight_lines, + num_lines: 0, + } + } + + pub fn highlight(&mut self, text: &str) -> String { + let highlighted = self.highlighter.highlight( + text, + self.extra_syntax_set.unwrap_or(&SYNTAX_SET), + ); + let line_boundaries = self.find_line_boundaries(&highlighted); + + // First we make sure that `highlighted` is split at every line + // boundary. The `styled_line_to_highlighted_html` function will + // merge split items with identical styles, so this is not a + // problem. + // + // Note that this invalidates the values in `line_boundaries`. + // The `perform_split` function takes it by value to ensure that + // we don't use it later. + let mut highlighted = perform_split(&highlighted, line_boundaries); + + let hl_background = self.theme.settings.line_highlight + .unwrap_or(Color { r: 255, g: 255, b: 0, a: 0 }); + + + let hl_lines = self.get_highlighted_lines(); + color_highlighted_lines(&mut highlighted, &hl_lines, hl_background); + + styled_line_to_highlighted_html(&highlighted, self.background) + } + + fn find_line_boundaries(&mut self, styled: &[(Style, &str)]) -> Vec { + let mut boundaries = Vec::new(); + for (vec_idx, (_style, s)) in styled.iter().enumerate() { + for (str_idx, character) in s.char_indices() { + if character == '\n' { + boundaries.push(StyledIdx { + vec_idx, + str_idx, + }); + } + } + } + self.num_lines = boundaries.len() + 1; + boundaries + } + + fn get_highlighted_lines(&self) -> HashSet { + let mut lines = HashSet::new(); + for range in &self.highlight_lines { + for line in range.from ..= min(range.to, self.num_lines) { + // Ranges are one-indexed + lines.insert(line.saturating_sub(1)); + } + } + lines + } +} + +/// This is an index of a character in a `&[(Style, &'b str)]`. The `vec_idx` is the +/// index in the slice, and `str_idx` is the byte index of the character in the +/// corresponding string slice. +/// +/// The `Ord` impl on this type sorts lexiographically on `vec_idx`, and then `str_idx`. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct StyledIdx { + vec_idx: usize, + str_idx: usize, +} + +/// This is a utility used by `perform_split`. If the `vec_idx` in the `StyledIdx` is +/// equal to the provided value, return the `str_idx`, otherwise return `None`. +fn get_str_idx_if_vec_idx_is(idx: Option<&StyledIdx>, vec_idx: usize) -> Option { + match idx { + Some(idx) if idx.vec_idx == vec_idx => Some(idx.str_idx), + _ => None, + } +} + +/// This function assumes that `line_boundaries` is sorted according to the `Ord` impl on +/// the `StyledIdx` type. +fn perform_split<'b>( + split: &[(Style, &'b str)], + line_boundaries: Vec +) -> Vec<(Style, &'b str)> { + let mut result = Vec::new(); + + let mut idxs_iter = line_boundaries.into_iter().peekable(); + + for (split_idx, item) in split.iter().enumerate() { + let mut last_split = 0; + + // Since `line_boundaries` is sorted, we know that any remaining indexes in + // `idxs_iter` have `vec_idx >= split_idx`, and that if there are any with + // `vec_idx == split_idx`, they will be first. + // + // Using the `get_str_idx_if_vec_idx_is` utility, this loop will keep consuming + // indexes from `idxs_iter` as long as `vec_idx == split_idx` holds. Once + // `vec_idx` becomes larger than `split_idx`, the loop will finish without + // consuming that index. + // + // If `idxs_iter` is empty, or there are no indexes with `vec_idx == split_idx`, + // the loop does nothing. + while let Some(str_idx) = get_str_idx_if_vec_idx_is(idxs_iter.peek(), split_idx) { + // Consume the value we just peeked. + idxs_iter.next(); + + // This consumes the index to split at. We add one to include the newline + // together with its own line, rather than as the first character in the next + // line. + let split_at = min(str_idx + 1, item.1.len()); + + // This will fail if `line_boundaries` is not sorted. + debug_assert!(split_at >= last_split); + + // Skip splitting if the string slice would be empty. + if last_split != split_at { + result.push((item.0, &item.1[last_split..split_at])); + last_split = split_at; + } + } + + // Now append the remainder. If the current item was not split, this will + // append the entire item. + if last_split != item.1.len() { + result.push((item.0, &item.1[last_split..])); + } + } + + result +} + +fn color_highlighted_lines( + data: &mut [(Style, &str)], + lines: &HashSet, + background: Color, +) { + if lines.is_empty() { + return; + } + + let mut current_line = 0; + + for item in data { + if lines.contains(¤t_line) { + item.0.background = background; + } + + // We split the lines such that every newline is at the end of an item. + if item.1.ends_with('\n') { + current_line += 1; + } + } +} diff --git a/components/rendering/src/markdown/fence.rs b/components/rendering/src/markdown/fence.rs new file mode 100644 index 000000000..88ef33cc4 --- /dev/null +++ b/components/rendering/src/markdown/fence.rs @@ -0,0 +1,102 @@ +#[derive(Copy, Clone, Debug)] +pub struct Range { + pub from: usize, + pub to: usize, +} + +impl Range { + fn parse(s: &str) -> Option { + match s.find('-') { + Some(dash) => { + let mut from = s[..dash].parse().ok()?; + let mut to = s[dash+1..].parse().ok()?; + if to < from { + std::mem::swap(&mut from, &mut to); + } + Some(Range { + from, + to, + }) + }, + None => { + let val = s.parse().ok()?; + Some(Range { + from: val, + to: val, + }) + }, + } + } +} + +#[derive(Debug)] +pub struct FenceSettings<'a> { + pub language: Option<&'a str>, + pub line_numbers: bool, + pub highlight_lines: Vec, +} +impl<'a> FenceSettings<'a> { + pub fn new(fence_info: &'a str) -> Self { + let mut me = Self { + language: None, + line_numbers: false, + highlight_lines: Vec::new(), + }; + + for token in FenceIter::new(fence_info) { + match token { + FenceToken::Language(lang) => me.language = Some(lang), + FenceToken::EnableLineNumbers => me.line_numbers = true, + FenceToken::HighlightLines(lines) => me.highlight_lines.extend(lines), + } + } + + me + } +} + +#[derive(Debug)] +enum FenceToken<'a> { + Language(&'a str), + EnableLineNumbers, + HighlightLines(Vec), +} + +struct FenceIter<'a> { + split: std::str::Split<'a, char>, +} +impl<'a> FenceIter<'a> { + fn new(fence_info: &'a str) -> Self { + Self { + split: fence_info.split(','), + } + } +} + +impl<'a> Iterator for FenceIter<'a> { + type Item = FenceToken<'a>; + + fn next(&mut self) -> Option> { + loop { + let tok = self.split.next()?.trim(); + + let mut tok_split = tok.split('='); + match tok_split.next().unwrap_or("").trim() { + "" => continue, + "linenos" => return Some(FenceToken::EnableLineNumbers), + "hl_lines" => { + let mut ranges = Vec::new(); + for range in tok_split.next().unwrap_or("").split(' ') { + if let Some(range) = Range::parse(range) { + ranges.push(range); + } + } + return Some(FenceToken::HighlightLines(ranges)); + }, + lang => { + return Some(FenceToken::Language(lang)); + }, + } + } + } +} diff --git a/components/rendering/tests/codeblock_hl_lines.rs b/components/rendering/tests/codeblock_hl_lines.rs new file mode 100644 index 000000000..e61e8d74a --- /dev/null +++ b/components/rendering/tests/codeblock_hl_lines.rs @@ -0,0 +1,312 @@ +use std::collections::HashMap; + +use tera::Tera; + +use config::Config; +use front_matter::InsertAnchor; +use rendering::{render_content, RenderContext}; +use templates::ZOLA_TERA; +use utils::slugs::SlugifyStrategy; + +macro_rules! colored_html_line { + ( @no $s:expr ) => {{ + let mut result = "".to_string(); + result.push_str($s); + result.push_str("\n"); + result + }}; + ( @hl $s:expr ) => {{ + let mut result = "".to_string(); + result.push_str($s); + result.push_str("\n"); + result + }}; +} + +macro_rules! colored_html { + ( $(@$kind:tt $s:expr),* $(,)* ) => {{ + let mut result = "
\n".to_string();
+        $(
+            result.push_str(colored_html_line!(@$kind $s).as_str());
+        )*
+        result.push_str("
"); + result + }}; +} + +#[test] +fn hl_lines_simple() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=2 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @no "foo", + @hl "bar", + @no "bar\nbaz", + )); +} + +#[test] +fn hl_lines_in_middle() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=2-3 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @no "foo", + @hl "bar\nbar", + @no "baz", + )); +} + +#[test] +fn hl_lines_all() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=1-4 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo\nbar\nbar\nbaz", + )); +} + +#[test] +fn hl_lines_start_from_one() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=1-3 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo\nbar\nbar", + @no "baz", + )); +} + +#[test] +fn hl_lines_start_from_zero() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=0-3 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo\nbar\nbar", + @no "baz", + )); +} + +#[test] +fn hl_lines_end() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=3-4 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @no "foo\nbar", + @hl "bar\nbaz", + )); +} + +#[test] +fn hl_lines_end_out_of_bounds() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=3-4294967295 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @no "foo\nbar", + @hl "bar\nbaz", + )); +} + +#[test] +fn hl_lines_overlap() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=2-3 1-2 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo\nbar\nbar", + @no "baz", + )); +} +#[test] +fn hl_lines_multiple() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=2-3,hl_lines=1-2 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo\nbar\nbar", + @no "baz", + )); +} + +#[test] +fn hl_lines_extra_spaces() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +``` hl_lines = 2 - 3 1 - 2 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo\nbar\nbar", + @no "baz", + )); +} + +#[test] +fn hl_lines_int_and_range() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=1 3-4 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @hl "foo", + @no "bar", + @hl "bar\nbaz", + )); +} + +#[test] +fn hl_lines_single_line_range() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=2-2 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @no "foo", + @hl "bar", + @no "bar\nbaz", + )); +} + + +#[test] +fn hl_lines_reverse_range() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let mut config = Config::default(); + config.highlight_code = true; + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content(r#" +```hl_lines=3-2 +foo +bar +bar +baz +``` + "#, &context).unwrap(); + assert_eq!(res.body, colored_html!( + @no "foo", + @hl "bar\nbar", + @no "baz", + )); +}