From e9b5c97b40b121bafd5c765c202b58e0e1bb8f4a Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 15:48:06 -0700 Subject: [PATCH 01/34] Uncomment tests --- tests/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.rs b/tests/tests.rs index 200ef6b..7215557 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,5 +1,5 @@ mod commonmark { - // mod atx_headings; + mod atx_headings; // mod autolinks; // mod backslash_escapes; // mod blank_lines; From 5bba68ec13cd7aacbe60e6d5c8c2ba6c3460e645 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 15:57:09 -0700 Subject: [PATCH 02/34] Passing 62 --- src/lib.rs | 18 ++++++++++-------- tests/e2e.rs | 18 +++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 000dff4..6495802 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,15 +197,17 @@ pub fn parse(tokens: &[Token]) -> String { } }, Token::Header(l, t, lbl) => { - let id = match lbl { - Some(text) => text.to_ascii_lowercase(), - None => t.to_ascii_lowercase(), + match lbl { + Some(lbl_text) => html.push_str(format!("{text}", + level=l, + text=sanitize_display_text(t), + id=sanitize_display_text(&lbl_text.replace(" ", "-"))) + .as_str()), + None => html.push_str(format!("{text}", + level=l, + text=sanitize_display_text(t)) + .as_str()), }; - html.push_str(format!("{text}\n", - level=l, - text=sanitize_display_text(t), - id=sanitize_display_text(&id.replace(" ", "-"))) - .as_str()) }, Token::TaskListItem(c,t) => { if in_task_list == false { diff --git a/tests/e2e.rs b/tests/e2e.rs index a132f63..be953e7 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -7,13 +7,13 @@ use mini_markdown::lexer::Token; fn test_simple_render() { let mut tests = Vec::new(); tests.extend(vec![ - ("# Heading level 1", "

Heading level 1

\n"), - ("## Heading level 2", "

Heading level 2

\n"), - ("### Heading level 3", "

Heading level 3

\n"), - ("#### Heading level 4", "

Heading level 4

\n"), - ("##### Heading level 5", "
Heading level 5
\n"), - ("###### Heading level 6", "
Heading level 6
\n"), - ("####### Invalid Heading level 7", "
Invalid Heading level 7
\n"), + ("# Heading level 1", "

Heading level 1

"), + ("## Heading level 2", "

Heading level 2

"), + ("### Heading level 3", "

Heading level 3

"), + ("#### Heading level 4", "

Heading level 4

"), + ("##### Heading level 5", "
Heading level 5
"), + ("###### Heading level 6", "
Heading level 6
"), + ("####### Invalid Heading level 7", "
Invalid Heading level 7
"), ("Some text _with italics_ in the same paragraph\n", "

Some text with italics in the same paragraph

\n"), ("Some text! With exclamations!", "

Some text! With exclamations!

\n"), @@ -205,9 +205,9 @@ fn test_paragraphs(){ let mut tests = Vec::new(); tests.extend(vec![ ("Paragraph 1.\n\n```\nBlock text should end a paragraph.\n```\n\nThis is paragraph two.\n\n## Heading\n\nParagraph the third.", - "

Paragraph 1.

\n
Block text should end a paragraph.\n
\n

This is paragraph two.

\n

Heading

\n\n

Paragraph the third.

\n"), + "

Paragraph 1.

\n
Block text should end a paragraph.\n
\n

This is paragraph two.

\n

Heading

\n

Paragraph the third.

\n"), ("# Post title\nSection text\n# Second section\nGood content", - "

Post title

\n\n

Section text

\n

Second section

\n\n

Good content

\n") + "

Post title

\n

Section text

\n

Second section

\n

Good content

\n") ]); for test in tests.iter(){ From 9fdfe1b5811627e111a974b4520c6983321c42d7 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:00:48 -0700 Subject: [PATCH 03/34] Treat runs of more than 6 hashes as text also use usize rather than u8 for hash count --- src/lexer.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 7c572f7..7df18bd 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -6,7 +6,7 @@ pub enum Token { /// String: Body of unstructured text Plaintext(String), /// u8: Header level (1..=6). str: Header text. Option: html label - Header(u8, String, Option), + Header(usize, String, Option), /// str: Text for list entry UnorderedListEntry(String), /// str: Text for list entry @@ -109,6 +109,9 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result 6 { + return Err(ParseError{content: hashes}); + } let level = std::cmp::min(6, hashes.len() as u8); let line = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or(""); if line.contains("{#") && @@ -117,9 +120,9 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result(char_iter: &mut MiniIter<'a>) -> Result> { From 68d6f0f578d7054dbf21850515636c6980ffa8a6 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:03:24 -0700 Subject: [PATCH 04/34] Reorder to avoid consuming space --- src/lexer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 7df18bd..3353100 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -106,10 +106,10 @@ pub(crate) fn push_str<'a>(t: &mut Vec, s: &'a str) { pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result> { let hashes = char_iter.consume_while_case_holds(&|c| c == "#").unwrap_or(""); - if char_iter.next_if_eq(&" ").is_none(){ + if hashes.len() > 6 { return Err(ParseError{content: hashes}); } - if hashes.len() > 6 { + if char_iter.next_if_eq(&" ").is_none(){ return Err(ParseError{content: hashes}); } let level = std::cmp::min(6, hashes.len() as u8); From 9670d89daa5a51503cd912d0d9c76be2d692e5ec Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:08:36 -0700 Subject: [PATCH 05/34] Require space or tab --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index 3353100..8b08828 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -109,7 +109,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result 6 { return Err(ParseError{content: hashes}); } - if char_iter.next_if_eq(&" ").is_none(){ + if char_iter.next_if_eq(&" ").is_none() && char_iter.next_if_eq(&"\t").is_none(){ return Err(ParseError{content: hashes}); } let level = std::cmp::min(6, hashes.len() as u8); From 7d89006cd0f68cf07831a02dfc57acda7fc25b7e Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:12:14 -0700 Subject: [PATCH 06/34] Consume # as plaintext in the event that one is escaped --- src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6495802..65ecde0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -103,7 +103,10 @@ pub fn lex(source: &str) -> Vec{ // Parse "\" to escape a markdown control character "\\" => { char_iter.next(); - if char_iter.peek().is_some(){ + if char_iter.peek() == Some(&"#"){ + let hashes = char_iter.consume_while_case_holds(&|c| c == "#").unwrap_or(""); + push_str(&mut tokens, hashes); + } else if char_iter.peek().is_some(){ push_str(&mut tokens, char_iter.next().unwrap()); } } From 6d4e5b8cf50b833eaad835c241f1f509c70fa215 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:30:05 -0700 Subject: [PATCH 07/34] Parse headings as markdown --- src/lexer.rs | 6 ++++-- src/lib.rs | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 8b08828..4b8abfa 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -112,7 +112,6 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") + .strip_suffix("

\n").unwrap_or("").to_string(); + return Ok(Token::Header(hashes.len(), parsed_line, None)); } pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Result> { diff --git a/src/lib.rs b/src/lib.rs index 65ecde0..7982a7e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -203,12 +203,12 @@ pub fn parse(tokens: &[Token]) -> String { match lbl { Some(lbl_text) => html.push_str(format!("{text}", level=l, - text=sanitize_display_text(t), + text=t, id=sanitize_display_text(&lbl_text.replace(" ", "-"))) .as_str()), None => html.push_str(format!("{text}", level=l, - text=sanitize_display_text(t)) + text=t) .as_str()), }; }, From 74b0104d369cf08bd77f052f98a6162f7ae493ba Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:31:33 -0700 Subject: [PATCH 08/34] Handle markdown headings in both codepaths --- src/lexer.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 4b8abfa..4808e57 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -117,9 +117,12 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") + .strip_suffix("

\n").unwrap_or("").to_string(); + return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } let parsed_line = crate::render(line) .strip_prefix("

").unwrap_or("") From b61b7d0154bc950230de1195ae6d565b78a75a7c Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:34:20 -0700 Subject: [PATCH 09/34] Trim heading --- src/lexer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 4808e57..f7b04c7 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,12 +121,12 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") - .strip_suffix("

\n").unwrap_or("").to_string(); + .strip_suffix("

\n").unwrap_or("").trim().to_string(); return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } let parsed_line = crate::render(line) .strip_prefix("

").unwrap_or("") - .strip_suffix("

\n").unwrap_or("").to_string(); + .strip_suffix("

\n").unwrap_or("").trim().to_string(); return Ok(Token::Header(hashes.len(), parsed_line, None)); } From 11b43c9a696ee3792287338e0a5a41f94b8ab263 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:36:32 -0700 Subject: [PATCH 10/34] Port merged tab and space lexing --- src/lexer.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/lexer.rs b/src/lexer.rs index f7b04c7..45c9241 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -198,6 +198,31 @@ pub(crate) fn lex_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result(char_iter: &mut MiniIter<'a>) -> Result, ParseError<'a>> { + let whitespace = char_iter.consume_while_case_holds(&|c| c == "\t" || c == " "); + match whitespace { + None => return Err(ParseError{content: ""}), + Some(s) if (1..=3).contains(&s.len()) && !s.contains("\t") => return Err(ParseError{content: s}), + Some(s) if s.len() >= 2 && + !s.contains("\t") && + char_iter.peek() == Some("\n") => return Ok(Token::LineBreak), + Some(_s) => {}, + } + let whitespace = whitespace.unwrap_or(""); + let start_index = char_iter.get_index(); + let line = char_iter.consume_until_tail_is("\n").unwrap_or(""); + if char_iter.peek() == Some("\t") || char_iter.peek() == Some(" ") { + match lex_tabs_spaces(char_iter) { + Ok(Token::CodeBlock(_content, _lang)) => { + return Ok(Token::CodeBlock(char_iter.get_substring_from(start_index).unwrap_or(""),""))}, + Err(e) => return Err(e), + Ok(_) => return Err(ParseError{content: ""}), + } + } + return Ok(Token::CodeBlock(line, "")) +} + pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result> { let start_index = char_iter.get_index(); let leading_ticks = char_iter.consume_while_case_holds(&|c| c == "`").unwrap_or(""); From 03c8c839b44981df4131bab72b7a2b03cb4e8b28 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:39:28 -0700 Subject: [PATCH 11/34] Port merged tab and space lexing part 2 --- src/lexer.rs | 36 ++++-------------------------------- src/lib.rs | 10 ++-------- 2 files changed, 6 insertions(+), 40 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 45c9241..63a4a1e 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -179,27 +179,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul } } -pub(crate) fn lex_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result>{ - let spaces = char_iter.consume_while_case_holds(&|c| c == " ").unwrap_or(""); - // Case 1: space in text => return char - if spaces.len() == 1 { - return Err(ParseError{content: spaces}) - } - // Case 2: two or more spaces followed by \n => line break - if char_iter.next_if_eq("\n").is_some() { - return Ok(Token::LineBreak); - } - // Case 3: Tokenize for parser - match spaces.len(){ - 4 => return Ok(Token::Tab), - 8 => return Ok(Token::DoubleTab), - _ => {} - } - Err(ParseError{content: spaces}) -} - - -pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result, ParseError<'a>> { +pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result> { let whitespace = char_iter.consume_while_case_holds(&|c| c == "\t" || c == " "); match whitespace { None => return Err(ParseError{content: ""}), @@ -211,16 +191,16 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result { - return Ok(Token::CodeBlock(char_iter.get_substring_from(start_index).unwrap_or(""),""))}, + return Ok(Token::CodeBlock(char_iter.get_substring_from(start_index).unwrap_or("").to_string(),"".to_string()))}, Err(e) => return Err(e), Ok(_) => return Err(ParseError{content: ""}), } } - return Ok(Token::CodeBlock(line, "")) + return Ok(Token::CodeBlock(line, "".to_string())) } pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result> { @@ -266,14 +246,6 @@ pub(crate) fn lex_newlines<'a>(char_iter: &mut MiniIter<'a>) -> Result(char_iter: &mut MiniIter<'a>) -> Result> { - match char_iter.consume_while_case_holds(&|c| c == "\t") { - Some(s) if s.len() > 1 => return Ok(Token::DoubleTab), - Some(s) if s.len() == 1 => return Ok(Token::Tab), - _ => return Err(ParseError{content: ""}), - } -} - pub(crate) fn lex_blockquotes<'a>(char_iter: &mut MiniIter<'a>) -> Result> { let right_arrows = char_iter.consume_while_case_holds(&|c| c == ">").unwrap_or(""); match char_iter.next_if_eq(" ") { diff --git a/src/lib.rs b/src/lib.rs index 7982a7e..6f5ef66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,8 +38,8 @@ pub fn lex(source: &str) -> Vec{ Err(e) => push_str(&mut tokens, e.content), } }, - " " => { - match lex_spaces(&mut char_iter) { + " " | "\t" => { + match lex_tabs_spaces(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), } @@ -56,12 +56,6 @@ pub fn lex(source: &str) -> Vec{ Err(e) => push_str(&mut tokens, e.content), } }, - "\t" => { - match lex_tabs(&mut char_iter) { - Ok(t) => tokens.push(t), - Err(e) => push_str(&mut tokens, e.content), - } - }, ">" => { match lex_blockquotes(&mut char_iter) { Ok(t) => { From fd7cb9995a876c97dc2589f02ddd2b13d9c6029b Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:45:39 -0700 Subject: [PATCH 12/34] Fix test that broke --- src/lexer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 63a4a1e..927421b 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -124,9 +124,11 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result\n").unwrap_or("").trim().to_string(); return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } + println!(">? {:?}", line); let parsed_line = crate::render(line) .strip_prefix("

").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); + println!(">? {:?}", parsed_line); return Ok(Token::Header(hashes.len(), parsed_line, None)); } @@ -180,6 +182,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul } pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result> { + let start_index = char_iter.get_index(); let whitespace = char_iter.consume_while_case_holds(&|c| c == "\t" || c == " "); match whitespace { None => return Err(ParseError{content: ""}), @@ -190,7 +193,6 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result {}, } let whitespace = whitespace.unwrap_or(""); - let start_index = char_iter.get_index(); let line = char_iter.consume_until_tail_is("\n").unwrap_or("").to_string(); if char_iter.peek() == Some("\t") || char_iter.peek() == Some(" ") { match lex_tabs_spaces(char_iter) { @@ -200,7 +202,7 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result return Err(ParseError{content: ""}), } } - return Ok(Token::CodeBlock(line, "".to_string())) + return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")}) } pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result> { From dbc268b9dc97a33e7abfcc97ac45372651ad5c89 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 16:46:27 -0700 Subject: [PATCH 13/34] Remove some prints --- src/lexer.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 927421b..0792b9a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -124,11 +124,9 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result\n").unwrap_or("").trim().to_string(); return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } - println!(">? {:?}", line); let parsed_line = crate::render(line) .strip_prefix("

").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); - println!(">? {:?}", parsed_line); return Ok(Token::Header(hashes.len(), parsed_line, None)); } From b41acbb25f815fd0c99702dfbb755eb96e352fa7 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Tue, 2 Aug 2022 17:00:49 -0700 Subject: [PATCH 14/34] factor out line parsing --- src/lexer.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 0792b9a..d6af10a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -113,20 +113,20 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") - .strip_suffix("

\n").unwrap_or("").trim().to_string(); - return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } let parsed_line = crate::render(line) .strip_prefix("

").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); + if heading != "" { + return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); + } return Ok(Token::Header(hashes.len(), parsed_line, None)); } From b6d4fe9c954ab0f743d42b7258e8fa486f8fb287 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 13:56:28 -0700 Subject: [PATCH 15/34] Passing 69 --- src/lexer.rs | 4 ++++ src/lib.rs | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index d6af10a..11f5f39 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -192,6 +192,10 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result return Ok(Token::Code(line)), + _ => {}, + } if char_iter.peek() == Some("\t") || char_iter.peek() == Some(" ") { match lex_tabs_spaces(char_iter) { Ok(Token::CodeBlock(_content, _lang)) => { diff --git a/src/lib.rs b/src/lib.rs index 6f5ef66..5855950 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -157,7 +157,7 @@ pub fn parse(tokens: &[Token]) -> String { in_paragraph = false; html.push_str("

") }, - Token::Plaintext(_) | Token::Italic(_) | Token::Bold(_) | Token::BoldItalic(_) | Token::Strikethrough(_) | Token::Code(_) if !in_paragraph => { + Token::Plaintext(_) | Token::Italic(_) | Token::Bold(_) | Token::BoldItalic(_) | Token::Strikethrough(_) if !in_paragraph => { for _i in 0..quote_level { html.push_str(""); quote_level-=1; @@ -245,7 +245,7 @@ pub fn parse(tokens: &[Token]) -> String { Token::HorizontalRule => {html.push_str("
")}, Token::Strikethrough(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::Code(t) => { - html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, + html.push_str(format!("
{}
", sanitize_display_text(t)).as_str())}, Token::CodeBlock(t, lang) => { html.push_str("
");
                 match lang.as_str() {
@@ -369,6 +369,7 @@ pub fn parse(tokens: &[Token]) -> String {
         }
     }
 
+
     // Add references
     if references.len() > 0{
         html.push_str("
\n"); @@ -383,6 +384,9 @@ pub fn parse(tokens: &[Token]) -> String { html.push_str("\t\n"); html.push_str("
\n"); } + if html.chars().last().unwrap() != '\n' { + html.push('\n'); + } html } From 09cfb6868c6393e02277fe5a4c139b4054eef70b Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 14:11:26 -0700 Subject: [PATCH 16/34] Strip optional closing #'s --- src/lexer.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index 11f5f39..db91711 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,8 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); if heading != "" { From 3156ffbf9f6fffcdd244eb09510d3443baaf055d Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 14:19:52 -0700 Subject: [PATCH 17/34] Also strip trailing spaces --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index db91711..beb7234 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -122,7 +122,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); if heading != "" { From 79e243c51fbe531a21dfb980454dfe584c41b7fa Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 14:21:57 -0700 Subject: [PATCH 18/34] Strip but better and order independent --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index beb7234..6139b0a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -122,7 +122,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); if heading != "" { From e2fec24dae76d4083957adc6e3ea9070fe82ec49 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:06:21 -0700 Subject: [PATCH 19/34] Check if on newline for header parsing --- src/lexer.rs | 1 - src/lib.rs | 13 ++++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 6139b0a..b35424c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,6 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); diff --git a/src/lib.rs b/src/lib.rs index 5855950..475c6e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,13 +8,20 @@ pub(crate) struct SanitizationError{ pub(crate) content: String, } +fn on_newline(tokens: &Vec) -> bool { + match tokens.last() { + Some(Token::Plaintext(t)) => t.ends_with("\n"), + _ => true, + } +} + /// Convert source markdown to an ordered vector of tokens pub fn lex(source: &str) -> Vec{ let mut char_iter = MiniIter::new(source); let mut tokens = Vec::new(); while char_iter.peek().is_some(){ match char_iter.peek().unwrap(){ - "#" => { + "#" if on_newline(&tokens) => { match lex_heading(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), @@ -396,6 +403,10 @@ pub fn render(source: &str) -> String { parse(&lex(source)) } +pub fn render_ignore(source: &str, ignore: &[char]) -> String { + parse(&lex(source)) +} + /// Replace potentially unsafe characters with html entities pub(crate) fn sanitize_display_text(source: &str) -> String { source.replace('&', "&") From 97dacb324519115f9a28ccd5af6e61ca9893fb78 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:08:39 -0700 Subject: [PATCH 20/34] fixup! Check if on newline for header parsing Nevermind it breaks things --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 475c6e7..860b7a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ pub fn lex(source: &str) -> Vec{ let mut tokens = Vec::new(); while char_iter.peek().is_some(){ match char_iter.peek().unwrap(){ - "#" if on_newline(&tokens) => { + "#" => { match lex_heading(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), From f7263c9eff9f3108b67717b73db9b743e384f8e4 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:22:42 -0700 Subject: [PATCH 21/34] Remove newline check --- src/lib.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 860b7a0..5bdf5ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,13 +8,6 @@ pub(crate) struct SanitizationError{ pub(crate) content: String, } -fn on_newline(tokens: &Vec) -> bool { - match tokens.last() { - Some(Token::Plaintext(t)) => t.ends_with("\n"), - _ => true, - } -} - /// Convert source markdown to an ordered vector of tokens pub fn lex(source: &str) -> Vec{ let mut char_iter = MiniIter::new(source); From 2899bd258e914483ebb464f19d2133841410b79f Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:55:02 -0700 Subject: [PATCH 22/34] Add the capacity to ignore control chars. Wire up usage for # and now passing 74 --- src/lexer.rs | 6 +++--- src/lib.rs | 10 +++++----- tests/e2e.rs | 6 +++--- tests/lexer.rs | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index b35424c..8ee6277 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); if heading != "" { @@ -420,7 +420,7 @@ fn parse_details<'a>(char_iter: &mut MiniIter<'a>) -> Result").count(); } } - let inner_tokens = crate::lex(remaining_text.strip_suffix("").unwrap_or("")); + let inner_tokens = crate::lex(remaining_text.strip_suffix("").unwrap_or(""), &[]); Ok(Token::Detail(summary_line.to_string(), inner_tokens)) } @@ -461,7 +461,7 @@ pub(crate) fn lex_pipes<'a>(char_iter: &mut MiniIter<'a>) -> Result Vec{ +pub fn lex(source: &str, ignore: &[char]) -> Vec{ let mut char_iter = MiniIter::new(source); let mut tokens = Vec::new(); while char_iter.peek().is_some(){ match char_iter.peek().unwrap(){ - "#" => { + "#" if !ignore.contains(&'#') => { match lex_heading(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), @@ -393,11 +393,11 @@ pub fn parse(tokens: &[Token]) -> String { /// Render HTML from a source markdown string /// Output is sanitized to prevent script injection pub fn render(source: &str) -> String { - parse(&lex(source)) + parse(&lex(source, &[])) } -pub fn render_ignore(source: &str, ignore: &[char]) -> String { - parse(&lex(source)) +pub(crate) fn render_ignore(source: &str, ignore: &[char]) -> String { + parse(&lex(source, ignore)) } /// Replace potentially unsafe characters with html entities diff --git a/tests/e2e.rs b/tests/e2e.rs index be953e7..3b52b78 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -75,7 +75,7 @@ fn test_moderate_render(){ for test in tests.iter(){ let html = render(test.0); if html != test.1 { - println!("?? {:?}", lex(test.0)); + println!("?? {:?}", lex(test.0, &[])); } assert_eq!(html, test.1); } @@ -97,7 +97,7 @@ fn test_table_render() { let html = render(test.0); if html != test.1 { println!("Test failing\n{:?}\n{:?}", html, test.1); - println!("> {:?}", lex(test.0)); + println!("> {:?}", lex(test.0, &[])); for (c1, c2) in test.1.chars().zip(html.chars()) { if c1 != c2 { println!("Difference in {:?} {:?}", c1, c2); @@ -164,7 +164,7 @@ fn test_lists(){ for test in tests.iter(){ let html = render(test.0); if html != test.1 { - println!("> {:?}", lex(test.0)); + println!("> {:?}", lex(test.0, &[])); } assert_eq!(html, test.1); } diff --git a/tests/lexer.rs b/tests/lexer.rs index b9d4de4..fcd0a3a 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -80,7 +80,7 @@ fn test_lex() { ("- [X] A checked box\n- [X] Also a checked box", vec![Token::TaskListItem(TaskBox::Checked, "A checked box".to_string()), Token::Newline, Token::TaskListItem(TaskBox::Checked, "Also a checked box".to_string())]), ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -104,7 +104,7 @@ fn test_lex_plaintext() { Token::Newline]) ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -120,7 +120,7 @@ fn test_blockquote_lex() { ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -137,7 +137,7 @@ fn test_footnote_lex() { ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -153,7 +153,7 @@ fn test_link_lex(){ ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } \ No newline at end of file From fba26f57575a6863ea16ac69f3a0eef418299ed3 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 16:44:37 -0700 Subject: [PATCH 23/34] Handle case where there is no content in html and handle zero length headings --- src/lexer.rs | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 8ee6277..65c6b06 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -109,7 +109,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result 6 { return Err(ParseError{content: hashes}); } - if char_iter.next_if_eq(&" ").is_none() && char_iter.next_if_eq(&"\t").is_none(){ + if char_iter.next_if_eq(&" ").is_none() && char_iter.next_if_eq(&"\t").is_none() && char_iter.peek() != Some(&"\n"){ return Err(ParseError{content: hashes}); } let line = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or(""); diff --git a/src/lib.rs b/src/lib.rs index 06c3659..5b6cf3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -384,7 +384,7 @@ pub fn parse(tokens: &[Token]) -> String { html.push_str("\t\n"); html.push_str("\n"); } - if html.chars().last().unwrap() != '\n' { + if html.chars().last().unwrap_or(' ') != '\n' { html.push('\n'); } html From 2382082807a4a9c12952df59c74414e4af8667e0 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 15:31:36 -0700 Subject: [PATCH 24/34] Pre-removal of lexing newlines. Should handle them in other cases or treat as plaintext --- src/lexer.rs | 17 ++++++++++------- src/lib.rs | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 65c6b06..761ae8e 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,7 +1,7 @@ use crate::MiniIter; /// Tokens are the intermediate representation format in the markdown to html conversion -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum Token { /// String: Body of unstructured text Plaintext(String), @@ -50,7 +50,7 @@ pub enum Token { } /// Holds the possible states of a taskbox in a task list -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum TaskBox { Checked, Unchecked, @@ -179,7 +179,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul } } -pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result> { +pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec) -> Result> { let start_index = char_iter.get_index(); let whitespace = char_iter.consume_while_case_holds(&|c| c == "\t" || c == " "); match whitespace { @@ -191,13 +191,16 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result {}, } let whitespace = whitespace.unwrap_or(""); - let line = char_iter.consume_until_tail_is("\n").unwrap_or("").to_string(); + let line = char_iter.consume_until_tail_is("\n").unwrap_or(""); + println!("??> {:?}", tokens.last()); + println!(">>? {:?}", line); match whitespace { - "\t" | " " => return Ok(Token::Code(line)), + " " if (matches!(tokens.last(), Some(Token::Plaintext(_))) && line.contains('#')) => return Err(ParseError{content: line}), + "\t" | " " => return Ok(Token::Code(line.to_string())), _ => {}, } if char_iter.peek() == Some("\t") || char_iter.peek() == Some(" ") { - match lex_tabs_spaces(char_iter) { + match lex_tabs_spaces(char_iter, tokens) { Ok(Token::CodeBlock(_content, _lang)) => { return Ok(Token::CodeBlock(char_iter.get_substring_from(start_index).unwrap_or("").to_string(),"".to_string()))}, Err(e) => return Err(e), @@ -242,7 +245,7 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result(char_iter: &mut MiniIter<'a>) -> Result> { +pub(crate) fn lex_newlines<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec) -> Result> { match char_iter.consume_while_case_holds(&|c| c == "\n") { Some(s) if s.len() >= 1 => return Ok(Token::Newline), Some(s) if s.len() < 1 => return Err(ParseError{content: s}), diff --git a/src/lib.rs b/src/lib.rs index 5b6cf3d..c9ccabf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,7 +39,7 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec{ } }, " " | "\t" => { - match lex_tabs_spaces(&mut char_iter) { + match lex_tabs_spaces(&mut char_iter, &tokens) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), } @@ -51,7 +51,7 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec{ } }, "\n" => { - match lex_newlines(&mut char_iter) { + match lex_newlines(&mut char_iter, &tokens) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), } From 0e420eafab4f964b799024bcefc507ad51032d1c Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:01:07 -0700 Subject: [PATCH 25/34] 70 working. Others broken --- src/lexer.rs | 6 ++++-- src/lib.rs | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 761ae8e..e4d492a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -194,8 +194,10 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec {:?}", tokens.last()); println!(">>? {:?}", line); + println!(">>? {:?}", whitespace); match whitespace { " " if (matches!(tokens.last(), Some(Token::Plaintext(_))) && line.contains('#')) => return Err(ParseError{content: line}), + " " if (matches!(tokens.last(), Some(Token::Newline)) && line.contains('#')) => return Err(ParseError{content: line}), "\t" | " " => return Ok(Token::Code(line.to_string())), _ => {}, } @@ -247,8 +249,8 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result(char_iter: &mut MiniIter<'a>, tokens: &Vec) -> Result> { match char_iter.consume_while_case_holds(&|c| c == "\n") { - Some(s) if s.len() >= 1 => return Ok(Token::Newline), - Some(s) if s.len() < 1 => return Err(ParseError{content: s}), + Some(s) if s.len() >= 2 => return Ok(Token::Newline), + Some(s) if s.len() < 2 => return Err(ParseError{content: s}), _ => return Err(ParseError{content: ""}), } } diff --git a/src/lib.rs b/src/lib.rs index c9ccabf..5b1f7e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,6 +114,7 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec{ /// Parse tokens to produce safe html output pub fn parse(tokens: &[Token]) -> String { + println!("??? {:?}", tokens); let mut html = String::with_capacity(tokens.len()*100); let mut in_task_list = false; let mut in_ordered_list = false; @@ -171,6 +172,7 @@ pub fn parse(tokens: &[Token]) -> String { // Add content match token { Token::Plaintext(t) => { + println!("?????? {:?}", t); if t.trim().is_empty() {continue} // Handle references @@ -188,9 +190,9 @@ pub fn parse(tokens: &[Token]) -> String { count+=1; } else {s.push_str(tok)} } - html.push_str(&s); + html.push_str(&s.trim_end()); } else { - html.push_str(sanitize_display_text(t.trim_start_matches('\n')).as_str()) + html.push_str(sanitize_display_text(t.trim_start_matches('\n')).trim_end()) } }, Token::Header(l, t, lbl) => { From 2c8fc962c566d9694a160c0192c4ee857e2aec97 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:01:58 -0700 Subject: [PATCH 26/34] Add newlines after headings in html --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5b1f7e5..b454dd0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,12 +197,12 @@ pub fn parse(tokens: &[Token]) -> String { }, Token::Header(l, t, lbl) => { match lbl { - Some(lbl_text) => html.push_str(format!("{text}", + Some(lbl_text) => html.push_str(format!("{text}\n", level=l, text=t, id=sanitize_display_text(&lbl_text.replace(" ", "-"))) .as_str()), - None => html.push_str(format!("{text}", + None => html.push_str(format!("{text}\n", level=l, text=t) .as_str()), From 749108f669c6960cb3d568f57729da3317cd64df Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:04:35 -0700 Subject: [PATCH 27/34] Only trim newlines on plaintext --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b454dd0..e6f1144 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -190,9 +190,9 @@ pub fn parse(tokens: &[Token]) -> String { count+=1; } else {s.push_str(tok)} } - html.push_str(&s.trim_end()); + html.push_str(&s.trim_end_matches('\n')); } else { - html.push_str(sanitize_display_text(t.trim_start_matches('\n')).trim_end()) + html.push_str(sanitize_display_text(t.trim_start_matches('\n')).trim_end_matches('\n')) } }, Token::Header(l, t, lbl) => { From ec416f649e03bc80ac3a2fbc14eb87380f7a1d87 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:24:28 -0700 Subject: [PATCH 28/34] Better handle trailing header closer thingys --- src/lexer.rs | 16 ++++++++++++---- src/lib.rs | 2 -- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index e4d492a..536e37c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,9 +121,20 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result { + match right.chars().all(|c| c == '#') { + true => left, + false => line, + } + }, + None => line, + }; + let parsed_line = crate::render_ignore(line_without_optional_trailing_hash_sequence.trim_end_matches(&[' ', '\t']), &['#']) .strip_prefix("

").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); + println!("line: {:?}", line); + println!("parsed_line: {:?}", parsed_line); if heading != "" { return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } @@ -192,9 +203,6 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec {:?}", tokens.last()); - println!(">>? {:?}", line); - println!(">>? {:?}", whitespace); match whitespace { " " if (matches!(tokens.last(), Some(Token::Plaintext(_))) && line.contains('#')) => return Err(ParseError{content: line}), " " if (matches!(tokens.last(), Some(Token::Newline)) && line.contains('#')) => return Err(ParseError{content: line}), diff --git a/src/lib.rs b/src/lib.rs index e6f1144..e28b690 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,7 +114,6 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec{ /// Parse tokens to produce safe html output pub fn parse(tokens: &[Token]) -> String { - println!("??? {:?}", tokens); let mut html = String::with_capacity(tokens.len()*100); let mut in_task_list = false; let mut in_ordered_list = false; @@ -172,7 +171,6 @@ pub fn parse(tokens: &[Token]) -> String { // Add content match token { Token::Plaintext(t) => { - println!("?????? {:?}", t); if t.trim().is_empty() {continue} // Handle references From 3dd652a71cb402478a7cf6394ba05a572a7a2b3a Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:26:44 -0700 Subject: [PATCH 29/34] And better still --- src/lexer.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 536e37c..db2064c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result { match right.chars().all(|c| c == '#') { true => left, @@ -133,8 +133,6 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("

\n").unwrap_or("").trim().to_string(); - println!("line: {:?}", line); - println!("parsed_line: {:?}", parsed_line); if heading != "" { return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } From 35585300f70ecc1f6c51a38930a56ca9e1daedd2 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:29:12 -0700 Subject: [PATCH 30/34] Add newline after horizontal rule --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e28b690..4021b50 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,7 +242,7 @@ pub fn parse(tokens: &[Token]) -> String { Token::Bold(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::BoldItalic(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::LineBreak => {html.push_str("
")}, - Token::HorizontalRule => {html.push_str("
")}, + Token::HorizontalRule => {html.push_str("
\n")}, Token::Strikethrough(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::Code(t) => { html.push_str(format!("
{}
", sanitize_display_text(t)).as_str())}, From d144f6c8de26c83200ebc1c42bd12ecbd766a460 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:36:04 -0700 Subject: [PATCH 31/34] Don't add a newline if the last thing added was a newline --- src/lib.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4021b50..342b0bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -155,7 +155,7 @@ pub fn parse(tokens: &[Token]) -> String { Token::BlockQuote(_, _) | Token::Newline if quote_level > 0 => {}, Token::CodeBlock(_, _) | Token::Newline | Token::Header(_, _, _) if in_paragraph => { in_paragraph = false; - html.push_str("

") + html.push_str("

\n") }, Token::Plaintext(_) | Token::Italic(_) | Token::Bold(_) | Token::BoldItalic(_) | Token::Strikethrough(_) if !in_paragraph => { for _i in 0..quote_level { @@ -235,7 +235,12 @@ pub fn parse(tokens: &[Token]) -> String { } html.push_str(format!("
  • {}
  • ", sanitize_display_text(t)).as_str()) }, - Token::Newline => {html.push('\n')}, + Token::Newline => { + match html.chars().last() { + Some('\n') => {} + _ => html.push('\n'), + } + }, Token::Tab => {html.push('\t')}, Token::DoubleTab => {html.push_str("\t\t")}, Token::Italic(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, From 7f7de096468c7e21cec4ec3faa9ae41d29e24113 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:41:28 -0700 Subject: [PATCH 32/34] =?UTF-8?q?ATX=20header=20support=20=F0=9F=99=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lexer.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lexer.rs b/src/lexer.rs index db2064c..fe2b4a6 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -130,9 +130,15 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result line, }; + if line.chars().all(|c| c == '#') { + return Ok(Token::Header(hashes.len(), "".to_string(), None)); + } let parsed_line = crate::render_ignore(line_without_optional_trailing_hash_sequence.trim_end_matches(&[' ', '\t']), &['#']) .strip_prefix("

    ").unwrap_or("") .strip_suffix("

    \n").unwrap_or("").trim().to_string(); + println!("line: {:?}", line); + println!("parsed_line: {:?}", parsed_line); + println!("line_without_optional_trailing_hash_sequence: {:?}", line_without_optional_trailing_hash_sequence); if heading != "" { return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } From 109f3b519f8af21e2abdbb87567c3b0a6a380c30 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 16 Sep 2022 09:28:23 -0700 Subject: [PATCH 33/34] Remove old tests and update sanitization test --- tests/e2e.rs | 257 -------------------------------------------- tests/lexer.rs | 159 --------------------------- tests/sanitation.rs | 8 +- 3 files changed, 4 insertions(+), 420 deletions(-) delete mode 100644 tests/e2e.rs delete mode 100644 tests/lexer.rs diff --git a/tests/e2e.rs b/tests/e2e.rs deleted file mode 100644 index 3b52b78..0000000 --- a/tests/e2e.rs +++ /dev/null @@ -1,257 +0,0 @@ -use mini_markdown::render; -use mini_markdown::{lex, parse}; -use mini_markdown::lexer::Token; - - -#[test] -fn test_simple_render() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("# Heading level 1", "

    Heading level 1

    "), - ("## Heading level 2", "

    Heading level 2

    "), - ("### Heading level 3", "

    Heading level 3

    "), - ("#### Heading level 4", "

    Heading level 4

    "), - ("##### Heading level 5", "
    Heading level 5
    "), - ("###### Heading level 6", "
    Heading level 6
    "), - ("####### Invalid Heading level 7", "
    Invalid Heading level 7
    "), - ("Some text _with italics_ in the same paragraph\n", "

    Some text with italics in the same paragraph

    \n"), - ("Some text! With exclamations!", "

    Some text! With exclamations!

    \n"), - - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_moderate_render(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("Text attributes _italic_, \n**bold**, `monospace`. Some implementations may use *single-asterisks* for italic text.", - "

    Text attributes italic,

    \n

    bold, monospace. Some implementations may use single-asterisks for italic text.

    \n"), - ("Horizontal rule:\n\n---\n\nStrikethrough:\n\n~~strikethrough~~\n\n", - "

    Horizontal rule:

    \n
    \n

    Strikethrough:

    \n

    strikethrough

    \n" - ), - ("> Outer quote with some text 1.\n> \n>> Inner quote with some other text\n> Outer again", - "
    Outer quote with some text 1.
    Inner quote with some other text
    Outer again
    \n" - ), - ("```\nCode block 1\n```", - "
    Code block 1\n
    " - ), - ("```python\nCode block 2\n```", - "
    Code block 2\n
    " - ), - ("```\nMulti\nLine\nCode block\n```", - "
    Multi\nLine\nCode block\n
    " - ), - ("> Outer quote with some text.\nNon-quoted text\n> Quote with some other text", - "
    Outer quote with some text.

    Non-quoted text

    \n
    Quote with some other text
    \n" - ), - ("> Outer quote with some other text.\nNon-quoted text\nMore non-quoted\n> Quote with some other text", - "
    Outer quote with some other text.

    Non-quoted text

    \n

    More non-quoted

    \n
    Quote with some other text
    \n" - ), - ("Don't -> quote", - "

    Don't -> quote

    \n" - ), - ("Don't -> quote\n> Do Quote\nDon't quote this either", - "

    Don't -> quote

    \n
    Do Quote

    Don't quote this either

    \n" - ), - ("Testing an inline link [Link title](http://google.com)", - "

    Testing an inline link Link title

    \n" - ), - ("Testing an inline link to a header id [Link title](#some-header)", - "

    Testing an inline link to a header id Link title

    \n" - ), - ("Testing some details\n
    \nSummary text goes here\nSome text goes here\n
    ", - "

    Testing some details

    \n
    \nSummary text goes here\n\n

    Some text goes here

    \n\n
    " - ), - ("Testing some nested details
    \nOuter summary\nOuter text
    \nInner Summary\nInner text\n
    \n
    ", - "

    Testing some nested details

    \n
    \nOuter summary\n\n

    Outer text

    \n
    \nInner Summary\n\n

    Inner text

    \n\n
    \n\n
    " - ), - ]); - - for test in tests.iter(){ - let html = render(test.0); - if html != test.1 { - println!("?? {:?}", lex(test.0, &[])); - } - assert_eq!(html, test.1); - } -} - -#[test] -fn test_table_render() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("| Syntax | Description | Test Text |\n| :--- | :----: | ---: |\n| Body | Text | Here's this |\n| Paragraph | Text | And more |", - "\n\t\n\t\n\t\t\t\t\t\t\t\n\t\n\t\n\t\n\t\t\n\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\n\t\t\n\t\n\t\n
    SyntaxDescriptionTest Text
    BodyTextHere's this
    ParagraphTextAnd more
    "), - ("| Syntax2 | Description | Test Text |\n| :--- | :----: | ---: |\n| *Body* | **Text** | ***Here's this*** |\n| `Paragraph` | Text | And more |", - "\n\t\n\t\n\t\t\t\t\t\t\t\n\t\n\t\n\t\n\t\t\n\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\n\t\t\n\t\n\t\n
    Syntax2DescriptionTest Text
    BodyTextHere's this

    Paragraph

    \n
    TextAnd more
    "), - ("| Syntax3 | Description | Test Text |\n| :--- | :----: | ---: |\n| *Body* | **Text** | ***Here's this*** |\n| `Paragraph test` | Text | And more |", - "\n\t\n\t\n\t\t\t\t\t\t\t\n\t\n\t\n\t\n\t\t\n\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\n\t\t\n\t\n\t\n
    Syntax3DescriptionTest Text
    BodyTextHere's this

    Paragraph <script=foo.js>test</script>

    \n
    TextAnd more
    "), - ]); - - for test in tests.iter(){ - let html = render(test.0); - if html != test.1 { - println!("Test failing\n{:?}\n{:?}", html, test.1); - println!("> {:?}", lex(test.0, &[])); - for (c1, c2) in test.1.chars().zip(html.chars()) { - if c1 != c2 { - println!("Difference in {:?} {:?}", c1, c2); - } - } - } - assert_eq!(html, test.1); - } -} - -#[test] -fn test_images(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("![Alt text](foo.jpeg)", "

    \"Alt

    "), - ("![Alt text]()", "

    \"Alt

    "), - ("![Alt text]( )", "

    \"Alt

    "), - ("![Alt text](https://example.com/my/cool/image.png)", "

    \"Alt

    "), - ("![Red dot]()", "

    \"Red

    "), - ("![Red dot](data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==)", "

    \"Red

    "), - - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_tasklists(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("- [ ] One task", - "
    • One task
    "), - ("- [x] One other task", - "
    • One other task
    "), - ("- [x] One other task\n- [ ] One task\n- [ ] One last task", - "
    • One other task
    • \n
    • One task
    • \n
    • One last task
    "), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_lists(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("* One entry", - "
    • One entry
    "), - ("1. One entry", - "
    1. One entry
    "), - ("* an item\n* another item\n* a third item", - "
    • an item
    • another item
    • a third item
    "), - (" * an item\n * another item\n * a third item", - "
    • an item
    • another item
    • a third item
    "), - ("lead text\n\n- entry 1\n- entry 2\n- entry 3\n- entry 4\n\nMore text", - "

    lead text

    \n
    • entry 1
    • \n
    • entry 2
    • \n
    • entry 3
    • \n
    • entry 4
    • \n
    \n

    More text

    \n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - if html != test.1 { - println!("> {:?}", lex(test.0, &[])); - } - assert_eq!(html, test.1); - } -} - -#[test] -fn test_blockquotes(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("> Outer quote with some text 1.\n> \n>> Inner quote with some other text\n> Outer again", - "
    Outer quote with some text 1.
    Inner quote with some other text
    Outer again
    \n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_references(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("Here's some text. And a ref [^1]\n [^1]: Reference text", - "

    Here's some text. And a ref 1

    \n
    \n\t
      \n\t\t
    1. \t\t\t

      Reference text

      \t\t
    2. \t
    \n
    \n"), - ("Here's some text. And a ref [^1]\n [^1]: Reference text\n\twith multiple\n lines\n to ensure those work", - "

    Here's some text. And a ref 1

    \n
    \n\t
      \n\t\t
    1. \t\t\t

      Reference text\n\twith multiple\n lines\n to ensure those work

      \t\t
    2. \t
    \n
    \n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_paragraphs(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("Paragraph 1.\n\n```\nBlock text should end a paragraph.\n```\n\nThis is paragraph two.\n\n## Heading\n\nParagraph the third.", - "

    Paragraph 1.

    \n
    Block text should end a paragraph.\n
    \n

    This is paragraph two.

    \n

    Heading

    \n

    Paragraph the third.

    \n"), - ("# Post title\nSection text\n# Second section\nGood content", - "

    Post title

    \n

    Section text

    \n

    Second section

    \n

    Good content

    \n") - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } - -} - -#[test] -fn test_links(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("another (See [Sewer Shark](https://en.wikipedia.org/wiki/Sewer_Shark)). Video playback", - "

    another (See Sewer Shark). Video playback

    \n"), - ("r [Distant Worlds](https://www.youtube.com/watch?v=yd3KYOei8o4) a", - "

    r Distant Worlds a

    \n"), - ("Foo\n```\nbattle\nenemy1\n```\nSome text [ddh](https://g.com/d/ddh/t/m)\n\nMore text following", - "

    Foo

    \n
    battle\nenemy1\n
    \n

    Some text ddh

    \n

    More text following

    \n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_details(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("
    \nSummary\n\n```\nFoo\n```\n
    ", - "
    \nSummary\n\n
    Foo\n
    \n\n
    "), - ("
    \nSummary but with spaces\n\n```\nFoo\n```\n
    ", - "
    \nSummary but with spaces\n\n
    Foo\n
    \n\n
    "), - ("
    \r\ntesting right now\r\ninner test\r\n
    ", - "
    \ntesting right now\n\n

    inner test\r

    \n\n
    "), - ("Here's some lead text\n
    \nSummary\n\n```\nFoo\n```\n
    ", - "

    Here's some lead text

    \n
    \nSummary\n\n
    Foo\n
    \n\n
    ") - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } - -} \ No newline at end of file diff --git a/tests/lexer.rs b/tests/lexer.rs deleted file mode 100644 index fcd0a3a..0000000 --- a/tests/lexer.rs +++ /dev/null @@ -1,159 +0,0 @@ -use mini_markdown::lex; -use mini_markdown::lexer::{Token, TaskBox}; - -#[test] -fn test_lex() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("# Heading level 1", vec![Token::Header(1, "Heading level 1".to_string(), None)]), - ("## Heading level 2", vec![Token::Header(2, "Heading level 2".to_string(), None)]), - ("### Heading level 3", vec![Token::Header(3, "Heading level 3".to_string(), None)]), - ("#### Heading level 4", vec![Token::Header(4, "Heading level 4".to_string(), None)]), - ("##### Heading level 5", vec![Token::Header(5, "Heading level 5".to_string(), None)]), - ("###### Heading level 6", vec![Token::Header(6, "Heading level 6".to_string(), None)]), - ("####### Invalid Heading level 7", vec![Token::Header(6, "Invalid Heading level 7".to_string(), None)]), - ]); - tests.extend(vec![ - ("# Heading level 1 {#Test label}", vec![Token::Header(1, "Heading level 1".to_string(), Some("Test label".to_string()))]), - ("## Heading level 2 {#Test label}", vec![Token::Header(2, "Heading level 2".to_string(), Some("Test label".to_string()))]), - ("### Heading level 3 {#Test label}", vec![Token::Header(3, "Heading level 3".to_string(), Some("Test label".to_string()))]), - ("#### Heading level 4 {#Test label}", vec![Token::Header(4, "Heading level 4".to_string(), Some("Test label".to_string()))]), - ("##### Heading level 5 {#Test label}", vec![Token::Header(5, "Heading level 5".to_string(), Some("Test label".to_string()))]), - ("###### Heading level 6 {#Test label}", vec![Token::Header(6, "Heading level 6".to_string(), Some("Test label".to_string()))]), - ("####### Invalid Heading level 7 {#Test label}", vec![Token::Header(6, "Invalid Heading level 7".to_string(), Some("Test label".to_string()))]), - ]); - tests.extend(vec![ - ("I just love **bold text**.", vec![Token::Plaintext("I just love ".to_string()), Token::Bold("bold text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love __bold text__.", vec![Token::Plaintext("I just love ".to_string()), Token::Bold("bold text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love *_bold text*_.", vec![Token::Plaintext("I just love ".to_string()), Token::Bold("bold text".to_string()), Token::Plaintext(".".to_string())]), - ]); - tests.extend(vec![ - ("I just love *italic text*.", vec![Token::Plaintext("I just love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love _italic text_.", vec![Token::Plaintext("I just love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love *italic text_.", vec![Token::Plaintext("I just love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just\n love *italic text_.", vec![Token::Plaintext("I just".to_string()), Token::Newline, Token::Plaintext(" love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ]); - tests.extend(vec![ - ("I just love ***bold italic text***.", vec![Token::Plaintext("I just love ".to_string()), Token::BoldItalic("bold italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love ___bold italic text___.", vec![Token::Plaintext("I just love ".to_string()), Token::BoldItalic("bold italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love _*_bold italic text*_*.", vec![Token::Plaintext("I just love ".to_string()), Token::BoldItalic("bold italic text".to_string()), Token::Plaintext(".".to_string())]), - ]); - tests.extend(vec![ - ("* unodered list\n", vec![Token::UnorderedListEntry("unodered list".to_string())]), - ("* unodered list\n* with two\n", vec![Token::UnorderedListEntry("unodered list".to_string()), Token::UnorderedListEntry("with two".to_string())]), - ("* unodered list\n* with two\n* with three\n", vec![Token::UnorderedListEntry("unodered list".to_string()), Token::UnorderedListEntry("with two".to_string()), Token::UnorderedListEntry("with three".to_string())]), - ]); - tests.extend(vec![ - ("Some text _with italics_ in the same paragraph", vec![Token::Plaintext("Some text ".to_string()), Token::Italic("with italics".to_string()), Token::Plaintext(" in the same paragraph".to_string())]), - ("Text attributes _italic_, \n**bold**, `monospace`. Some implementations may use *single-asterisks* for italic text.", - vec![ - Token::Plaintext("Text attributes ".to_string()), - Token::Italic("italic".to_string()), - Token::Plaintext(", ".to_string()), - Token::Newline, - Token::Bold("bold".to_string()), - Token::Plaintext(", ".to_string()), - Token::Code("monospace".to_string()), - Token::Plaintext(". Some implementations may use ".to_string()), - Token::Italic("single-asterisks".to_string()), - Token::Plaintext(" for italic text.".to_string()), - ]) - ]); - tests.extend(vec![ - ("![alt](https://example.com/foo.jpeg)", vec![Token::Image("https://example.com/foo.jpeg".to_string(), Some("alt".to_string()))]), - ("![alt]()", vec![Token::Image("".to_string(), Some("alt".to_string()))]), - ("Some test text [^1]", vec![Token::Plaintext("Some test text [^1]".to_string())]), - ("[^1]: First footnote", vec![Token::Footnote("1".to_string(), "First footnote".to_string())]), - ("[^HUGE]: Big footnote", vec![Token::Footnote("HUGE".to_string(), "Big footnote".to_string())]), - ("[^BORK ED]: Big footnote", vec![Token::Plaintext("[^BORK ED]: Big footnote".to_string())]), - - ]); - tests.extend(vec![ - ("---", vec![Token::HorizontalRule]), - ("-----", vec![Token::HorizontalRule]), - ("--", vec![Token::Plaintext("--".to_string())]), - ("- [ ] Unchecked box", vec![Token::TaskListItem(TaskBox::Unchecked, "Unchecked box".to_string())]), - ("+ [ ] Unchecked box", vec![Token::TaskListItem(TaskBox::Unchecked, "Unchecked box".to_string())]), - ("- [x] Checked box", vec![Token::TaskListItem(TaskBox::Checked, "Checked box".to_string())]), - ("- [X] Also a checked box", vec![Token::TaskListItem(TaskBox::Checked, "Also a checked box".to_string())]), - ("- [X]Not a checked box", vec![Token::UnorderedListEntry("[X]Not a checked box".to_string())]), - ("- [X] A checked box\n- [X] Also a checked box", vec![Token::TaskListItem(TaskBox::Checked, "A checked box".to_string()), Token::Newline, Token::TaskListItem(TaskBox::Checked, "Also a checked box".to_string())]), - ]); - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_lex_plaintext() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("¯\\\\\\_(ツ)\\_/¯", vec![Token::Plaintext("¯\\_(ツ)_/¯".to_string())]), - ("\\_test\\_", vec![Token::Plaintext("_test_".to_string())]), - ("\\*escaping\\_", vec![Token::Plaintext("*escaping_".to_string())]), - ("\\>controls\\<", vec![Token::Plaintext(">controls<".to_string())]), - ("2017-12-6 20:13:00", vec![Token::Plaintext("2017-12-6 20:13:00".to_string())]), - ("\nlayout: post\ntitle: \"Looking back at consoles and codecs\"\ndate: 2017-12-6 20:13:00 +0100\n", - vec![Token::Newline, - Token::Plaintext("layout: post".to_string()), - Token::Newline, - Token::Plaintext("title: \"Looking back at consoles and codecs\"".to_string()), - Token::Newline, - Token::Plaintext("date: 2017-12-6 20:13:00 +0100".to_string()), - Token::Newline]) - ]); - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_blockquote_lex() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("> ", vec![Token::BlockQuote(1, "".to_string())]), - ("> \n>> text", vec![Token::BlockQuote(1, "".to_string()), Token::BlockQuote(2, "text".to_string())]), - ("> text\n> \n>> more text", vec![Token::BlockQuote(1, "text".to_string()) ,Token::BlockQuote(1, "".to_string()), Token::BlockQuote(2, "more text".to_string())]), - - ]); - - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_footnote_lex() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("[^1]: Footnote #1", vec![Token::Footnote("1".to_string(), "Footnote #1".to_string())]), - ("[^1]: Footnote #1\n with a second line", vec![Token::Footnote("1".to_string(), "Footnote #1\n with a second line".to_string())]), - ("[^1]: Footnote #1\n\twith a second line", vec![Token::Footnote("1".to_string(), "Footnote #1\n\twith a second line".to_string())]), - ("[^1]: Footnote #1\n with a second line", vec![Token::Footnote("1".to_string(), "Footnote #1\n with a second line".to_string())]), - ("[^1]: Footnote #1\n with a second line\n\tand a third line", vec![Token::Footnote("1".to_string(), "Footnote #1\n with a second line\n\tand a third line".to_string())]), - ]); - - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_link_lex(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("another (See [Sewer Shark](https://en.wikipedia.org/wiki/Sewer_Shark)). Video", - vec![Token::Plaintext("another (See ".to_string()), Token::Link("https://en.wikipedia.org/wiki/Sewer_Shark".to_string(), Some("Sewer Shark".to_string()), None), Token::Plaintext("). Video".to_string())]), - ("r [Distant Worlds](https://www.youtube.com/watch?v=yd3KYOei8o4) a", - vec![Token::Plaintext("r ".to_string()), Token::Link("https://www.youtube.com/watch?v=yd3KYOei8o4".to_string(), Some("Distant Worlds".to_string()), None), Token::Plaintext(" a".to_string())]) - ]); - - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} \ No newline at end of file diff --git a/tests/sanitation.rs b/tests/sanitation.rs index 5c49fbe..1103ae0 100644 --- a/tests/sanitation.rs +++ b/tests/sanitation.rs @@ -5,7 +5,7 @@ fn test_simple_tag_injection() { ("foobar ".to_string(), "

    foobar text for context /scriptjunk

    \n".to_string()), ("".to_string(), - "/SCRIPT".to_string()), + "/SCRIPT\n".to_string()), ]); for test in tests.iter_mut(){ @@ -19,9 +19,9 @@ use mini_markdown::render; fn test_image_xss(){ let mut tests = Vec::new(); tests.extend(vec![ - ("![Alt text](foo.jpeg)", "

    \"Alt

    "), - ("![Alt text]()", "

    \"Alt

    "), - ("![Alt text]( )", "

    \"Alt

    "), + ("![Alt text](foo.jpeg)", "

    \"Alt

    \n"), + ("![Alt text]()", "

    \"Alt

    \n"), + ("![Alt text]( )", "

    \"Alt

    \n"), ("![Alt text](javascript:alert(0))", "

    \"Alt

    )

    \n"), ]); From cae67c7ddf4b9e73cc84dc6906fbc6d66d49a21e Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 16 Sep 2022 09:31:18 -0700 Subject: [PATCH 34/34] Derive Eq for allignment --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index fe2b4a6..b500bf0 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -71,7 +71,7 @@ impl Token{ } /// Holds the alignment states for the table token -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Alignment { Left, Right,