From e9b5c97b40b121bafd5c765c202b58e0e1bb8f4a Mon Sep 17 00:00:00 2001
From: Jonathan Moroney Some text with italics in the same paragraph Some text! With exclamations! Paragraph 1. This is paragraph two. Paragraph the third. Paragraph 1. This is paragraph two. Paragraph the third. Section text Good content Section text Good contentHeading level 1
\n"),
- ("## Heading level 2", "Heading level 2
\n"),
- ("### Heading level 3", "Heading level 3
\n"),
- ("#### Heading level 4", "Heading level 4
\n"),
- ("##### Heading level 5", "Heading level 5
\n"),
- ("###### Heading level 6", "Heading level 6
\n"),
- ("####### Invalid Heading level 7", "Invalid Heading level 7
\n"),
+ ("# Heading level 1", "Heading level 1
"),
+ ("## Heading level 2", "Heading level 2
"),
+ ("### Heading level 3", "Heading level 3
"),
+ ("#### Heading level 4", "Heading level 4
"),
+ ("##### Heading level 5", "Heading level 5
"),
+ ("###### Heading level 6", "Heading level 6
"),
+ ("####### Invalid Heading level 7", "Invalid Heading level 7
"),
("Some text _with italics_ in the same paragraph\n", "
\nBlock text should end a paragraph.\n
Heading
\n\n
\nBlock text should end a paragraph.\n
Heading
\nPost title
\n\nSecond section
\n\nPost title
\nSecond section
\n
").unwrap_or("")
From b61b7d0154bc950230de1195ae6d565b78a75a7c Mon Sep 17 00:00:00 2001
From: Jonathan Moroney
").unwrap_or("") - .strip_suffix("
\n").unwrap_or("").to_string(); + .strip_suffix("\n").unwrap_or("").trim().to_string(); return Ok(Token::Header(hashes.len(), parsed_line, None)); } From 11b43c9a696ee3792287338e0a5a41f94b8ab263 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney").unwrap_or("") .strip_suffix("
\n").unwrap_or("").trim().to_string(); + println!(">? {:?}", parsed_line); return Ok(Token::Header(hashes.len(), parsed_line, None)); } @@ -180,6 +182,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul } pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result").unwrap_or("") .strip_suffix("
\n").unwrap_or("").trim().to_string(); - println!(">? {:?}", parsed_line); return Ok(Token::Header(hashes.len(), parsed_line, None)); } From b41acbb25f815fd0c99702dfbb755eb96e352fa7 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney").unwrap_or("") .strip_suffix("
\n").unwrap_or("").trim().to_string(); + if heading != "" { + return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); + } return Ok(Token::Header(hashes.len(), parsed_line, None)); } From b6d4fe9c954ab0f743d42b7258e8fa486f8fb287 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney{}
", sanitize_display_text(t)).as_str())},
+ html.push_str(format!("{}
", sanitize_display_text(t)).as_str())},
Token::CodeBlock(t, lang) => {
html.push_str(""); match lang.as_str() { @@ -369,6 +369,7 @@ pub fn parse(tokens: &[Token]) -> String { } } + // Add references if references.len() > 0{ html.push_str("\n"); @@ -383,6 +384,9 @@ pub fn parse(tokens: &[Token]) -> String { html.push_str("\t\n"); html.push_str("\n"); } + if html.chars().last().unwrap() != '\n' { + html.push('\n'); + } html } From 09cfb6868c6393e02277fe5a4c139b4054eef70b Mon Sep 17 00:00:00 2001 From: Jonathan MoroneyDate: Fri, 26 Aug 2022 14:11:26 -0700 Subject: [PATCH 16/34] Strip optional closing #'s --- src/lexer.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index 11f5f39..db91711 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,8 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result ").unwrap_or("") .strip_suffix("\n").unwrap_or("").trim().to_string(); if heading != "" { From 3156ffbf9f6fffcdd244eb09510d3443baaf055d Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 14:19:52 -0700 Subject: [PATCH 17/34] Also strip trailing spaces --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index db91711..beb7234 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -122,7 +122,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result ").unwrap_or("") .strip_suffix("\n").unwrap_or("").trim().to_string(); if heading != "" { From 79e243c51fbe531a21dfb980454dfe584c41b7fa Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 14:21:57 -0700 Subject: [PATCH 18/34] Strip but better and order independent --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index beb7234..6139b0a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -122,7 +122,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result ").unwrap_or("") .strip_suffix("\n").unwrap_or("").trim().to_string(); if heading != "" { From e2fec24dae76d4083957adc6e3ea9070fe82ec49 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:06:21 -0700 Subject: [PATCH 19/34] Check if on newline for header parsing --- src/lexer.rs | 1 - src/lib.rs | 13 ++++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 6139b0a..b35424c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,6 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result ").unwrap_or("") .strip_suffix("\n").unwrap_or("").trim().to_string(); diff --git a/src/lib.rs b/src/lib.rs index 5855950..475c6e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,13 +8,20 @@ pub(crate) struct SanitizationError{ pub(crate) content: String, } +fn on_newline(tokens: &Vec ) -> bool { + match tokens.last() { + Some(Token::Plaintext(t)) => t.ends_with("\n"), + _ => true, + } +} + /// Convert source markdown to an ordered vector of tokens pub fn lex(source: &str) -> Vec { let mut char_iter = MiniIter::new(source); let mut tokens = Vec::new(); while char_iter.peek().is_some(){ match char_iter.peek().unwrap(){ - "#" => { + "#" if on_newline(&tokens) => { match lex_heading(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), @@ -396,6 +403,10 @@ pub fn render(source: &str) -> String { parse(&lex(source)) } +pub fn render_ignore(source: &str, ignore: &[char]) -> String { + parse(&lex(source)) +} + /// Replace potentially unsafe characters with html entities pub(crate) fn sanitize_display_text(source: &str) -> String { source.replace('&', "&") From 97dacb324519115f9a28ccd5af6e61ca9893fb78 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:08:39 -0700 Subject: [PATCH 20/34] fixup! Check if on newline for header parsing Nevermind it breaks things --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 475c6e7..860b7a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ pub fn lex(source: &str) -> Vec { let mut tokens = Vec::new(); while char_iter.peek().is_some(){ match char_iter.peek().unwrap(){ - "#" if on_newline(&tokens) => { + "#" => { match lex_heading(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), From f7263c9eff9f3108b67717b73db9b743e384f8e4 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:22:42 -0700 Subject: [PATCH 21/34] Remove newline check --- src/lib.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 860b7a0..5bdf5ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,13 +8,6 @@ pub(crate) struct SanitizationError{ pub(crate) content: String, } -fn on_newline(tokens: &Vec ) -> bool { - match tokens.last() { - Some(Token::Plaintext(t)) => t.ends_with("\n"), - _ => true, - } -} - /// Convert source markdown to an ordered vector of tokens pub fn lex(source: &str) -> Vec { let mut char_iter = MiniIter::new(source); From 2899bd258e914483ebb464f19d2133841410b79f Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 15:55:02 -0700 Subject: [PATCH 22/34] Add the capacity to ignore control chars. Wire up usage for # and now passing 74 --- src/lexer.rs | 6 +++--- src/lib.rs | 10 +++++----- tests/e2e.rs | 6 +++--- tests/lexer.rs | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index b35424c..8ee6277 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result ").unwrap_or("") .strip_suffix("\n").unwrap_or("").trim().to_string(); if heading != "" { @@ -420,7 +420,7 @@ fn parse_details<'a>(char_iter: &mut MiniIter<'a>) -> Result ").count(); } } - let inner_tokens = crate::lex(remaining_text.strip_suffix("").unwrap_or("")); + let inner_tokens = crate::lex(remaining_text.strip_suffix("").unwrap_or(""), &[]); Ok(Token::Detail(summary_line.to_string(), inner_tokens)) } @@ -461,7 +461,7 @@ pub(crate) fn lex_pipes<'a>(char_iter: &mut MiniIter<'a>) -> Result Vec { +pub fn lex(source: &str, ignore: &[char]) -> Vec { let mut char_iter = MiniIter::new(source); let mut tokens = Vec::new(); while char_iter.peek().is_some(){ match char_iter.peek().unwrap(){ - "#" => { + "#" if !ignore.contains(&'#') => { match lex_heading(&mut char_iter) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), @@ -393,11 +393,11 @@ pub fn parse(tokens: &[Token]) -> String { /// Render HTML from a source markdown string /// Output is sanitized to prevent script injection pub fn render(source: &str) -> String { - parse(&lex(source)) + parse(&lex(source, &[])) } -pub fn render_ignore(source: &str, ignore: &[char]) -> String { - parse(&lex(source)) +pub(crate) fn render_ignore(source: &str, ignore: &[char]) -> String { + parse(&lex(source, ignore)) } /// Replace potentially unsafe characters with html entities diff --git a/tests/e2e.rs b/tests/e2e.rs index be953e7..3b52b78 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -75,7 +75,7 @@ fn test_moderate_render(){ for test in tests.iter(){ let html = render(test.0); if html != test.1 { - println!("?? {:?}", lex(test.0)); + println!("?? {:?}", lex(test.0, &[])); } assert_eq!(html, test.1); } @@ -97,7 +97,7 @@ fn test_table_render() { let html = render(test.0); if html != test.1 { println!("Test failing\n{:?}\n{:?}", html, test.1); - println!("> {:?}", lex(test.0)); + println!("> {:?}", lex(test.0, &[])); for (c1, c2) in test.1.chars().zip(html.chars()) { if c1 != c2 { println!("Difference in {:?} {:?}", c1, c2); @@ -164,7 +164,7 @@ fn test_lists(){ for test in tests.iter(){ let html = render(test.0); if html != test.1 { - println!("> {:?}", lex(test.0)); + println!("> {:?}", lex(test.0, &[])); } assert_eq!(html, test.1); } diff --git a/tests/lexer.rs b/tests/lexer.rs index b9d4de4..fcd0a3a 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -80,7 +80,7 @@ fn test_lex() { ("- [X] A checked box\n- [X] Also a checked box", vec![Token::TaskListItem(TaskBox::Checked, "A checked box".to_string()), Token::Newline, Token::TaskListItem(TaskBox::Checked, "Also a checked box".to_string())]), ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -104,7 +104,7 @@ fn test_lex_plaintext() { Token::Newline]) ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -120,7 +120,7 @@ fn test_blockquote_lex() { ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -137,7 +137,7 @@ fn test_footnote_lex() { ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } @@ -153,7 +153,7 @@ fn test_link_lex(){ ]); for test in tests.iter(){ - let tokens = lex(test.0); + let tokens = lex(test.0, &[]); assert_eq!(&tokens[..], &test.1[..]); } } \ No newline at end of file From fba26f57575a6863ea16ac69f3a0eef418299ed3 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Fri, 26 Aug 2022 16:44:37 -0700 Subject: [PATCH 23/34] Handle case where there is no content in html and handle zero length headings --- src/lexer.rs | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 8ee6277..65c6b06 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -109,7 +109,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result 6 { return Err(ParseError{content: hashes}); } - if char_iter.next_if_eq(&" ").is_none() && char_iter.next_if_eq(&"\t").is_none(){ + if char_iter.next_if_eq(&" ").is_none() && char_iter.next_if_eq(&"\t").is_none() && char_iter.peek() != Some(&"\n"){ return Err(ParseError{content: hashes}); } let line = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or(""); diff --git a/src/lib.rs b/src/lib.rs index 06c3659..5b6cf3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -384,7 +384,7 @@ pub fn parse(tokens: &[Token]) -> String { html.push_str("\t\n"); html.push_str("\n"); } - if html.chars().last().unwrap() != '\n' { + if html.chars().last().unwrap_or(' ') != '\n' { html.push('\n'); } html From 2382082807a4a9c12952df59c74414e4af8667e0 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 15:31:36 -0700 Subject: [PATCH 24/34] Pre-removal of lexing newlines. Should handle them in other cases or treat as plaintext --- src/lexer.rs | 17 ++++++++++------- src/lib.rs | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 65c6b06..761ae8e 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,7 +1,7 @@ use crate::MiniIter; /// Tokens are the intermediate representation format in the markdown to html conversion -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum Token { /// String: Body of unstructured text Plaintext(String), @@ -50,7 +50,7 @@ pub enum Token { } /// Holds the possible states of a taskbox in a task list -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum TaskBox { Checked, Unchecked, @@ -179,7 +179,7 @@ pub(crate) fn lex_asterisk_underscore<'a>(char_iter: &mut MiniIter<'a>) -> Resul } } -pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result > { +pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec ) -> Result > { let start_index = char_iter.get_index(); let whitespace = char_iter.consume_while_case_holds(&|c| c == "\t" || c == " "); match whitespace { @@ -191,13 +191,16 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result {}, } let whitespace = whitespace.unwrap_or(""); - let line = char_iter.consume_until_tail_is("\n").unwrap_or("").to_string(); + let line = char_iter.consume_until_tail_is("\n").unwrap_or(""); + println!("??> {:?}", tokens.last()); + println!(">>? {:?}", line); match whitespace { - "\t" | " " => return Ok(Token::Code(line)), + " " if (matches!(tokens.last(), Some(Token::Plaintext(_))) && line.contains('#')) => return Err(ParseError{content: line}), + "\t" | " " => return Ok(Token::Code(line.to_string())), _ => {}, } if char_iter.peek() == Some("\t") || char_iter.peek() == Some(" ") { - match lex_tabs_spaces(char_iter) { + match lex_tabs_spaces(char_iter, tokens) { Ok(Token::CodeBlock(_content, _lang)) => { return Ok(Token::CodeBlock(char_iter.get_substring_from(start_index).unwrap_or("").to_string(),"".to_string()))}, Err(e) => return Err(e), @@ -242,7 +245,7 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result (char_iter: &mut MiniIter<'a>) -> Result > { +pub(crate) fn lex_newlines<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec ) -> Result > { match char_iter.consume_while_case_holds(&|c| c == "\n") { Some(s) if s.len() >= 1 => return Ok(Token::Newline), Some(s) if s.len() < 1 => return Err(ParseError{content: s}), diff --git a/src/lib.rs b/src/lib.rs index 5b6cf3d..c9ccabf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,7 +39,7 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec { } }, " " | "\t" => { - match lex_tabs_spaces(&mut char_iter) { + match lex_tabs_spaces(&mut char_iter, &tokens) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), } @@ -51,7 +51,7 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec { } }, "\n" => { - match lex_newlines(&mut char_iter) { + match lex_newlines(&mut char_iter, &tokens) { Ok(t) => tokens.push(t), Err(e) => push_str(&mut tokens, e.content), } From 0e420eafab4f964b799024bcefc507ad51032d1c Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:01:07 -0700 Subject: [PATCH 25/34] 70 working. Others broken --- src/lexer.rs | 6 ++++-- src/lib.rs | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 761ae8e..e4d492a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -194,8 +194,10 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec {:?}", tokens.last()); println!(">>? {:?}", line); + println!(">>? {:?}", whitespace); match whitespace { " " if (matches!(tokens.last(), Some(Token::Plaintext(_))) && line.contains('#')) => return Err(ParseError{content: line}), + " " if (matches!(tokens.last(), Some(Token::Newline)) && line.contains('#')) => return Err(ParseError{content: line}), "\t" | " " => return Ok(Token::Code(line.to_string())), _ => {}, } @@ -247,8 +249,8 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result (char_iter: &mut MiniIter<'a>, tokens: &Vec ) -> Result > { match char_iter.consume_while_case_holds(&|c| c == "\n") { - Some(s) if s.len() >= 1 => return Ok(Token::Newline), - Some(s) if s.len() < 1 => return Err(ParseError{content: s}), + Some(s) if s.len() >= 2 => return Ok(Token::Newline), + Some(s) if s.len() < 2 => return Err(ParseError{content: s}), _ => return Err(ParseError{content: ""}), } } diff --git a/src/lib.rs b/src/lib.rs index c9ccabf..5b1f7e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,6 +114,7 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec { /// Parse tokens to produce safe html output pub fn parse(tokens: &[Token]) -> String { + println!("??? {:?}", tokens); let mut html = String::with_capacity(tokens.len()*100); let mut in_task_list = false; let mut in_ordered_list = false; @@ -171,6 +172,7 @@ pub fn parse(tokens: &[Token]) -> String { // Add content match token { Token::Plaintext(t) => { + println!("?????? {:?}", t); if t.trim().is_empty() {continue} // Handle references @@ -188,9 +190,9 @@ pub fn parse(tokens: &[Token]) -> String { count+=1; } else {s.push_str(tok)} } - html.push_str(&s); + html.push_str(&s.trim_end()); } else { - html.push_str(sanitize_display_text(t.trim_start_matches('\n')).as_str()) + html.push_str(sanitize_display_text(t.trim_start_matches('\n')).trim_end()) } }, Token::Header(l, t, lbl) => { From 2c8fc962c566d9694a160c0192c4ee857e2aec97 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:01:58 -0700 Subject: [PATCH 26/34] Add newlines after headings in html --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5b1f7e5..b454dd0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,12 +197,12 @@ pub fn parse(tokens: &[Token]) -> String { }, Token::Header(l, t, lbl) => { match lbl { - Some(lbl_text) => html.push_str(format!(" {text} ", + Some(lbl_text) => html.push_str(format!("{text} \n", level=l, text=t, id=sanitize_display_text(&lbl_text.replace(" ", "-"))) .as_str()), - None => html.push_str(format!("{text} ", + None => html.push_str(format!("{text} \n", level=l, text=t) .as_str()), From 749108f669c6960cb3d568f57729da3317cd64df Mon Sep 17 00:00:00 2001 From: Jonathan MoroneyDate: Thu, 15 Sep 2022 16:04:35 -0700 Subject: [PATCH 27/34] Only trim newlines on plaintext --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b454dd0..e6f1144 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -190,9 +190,9 @@ pub fn parse(tokens: &[Token]) -> String { count+=1; } else {s.push_str(tok)} } - html.push_str(&s.trim_end()); + html.push_str(&s.trim_end_matches('\n')); } else { - html.push_str(sanitize_display_text(t.trim_start_matches('\n')).trim_end()) + html.push_str(sanitize_display_text(t.trim_start_matches('\n')).trim_end_matches('\n')) } }, Token::Header(l, t, lbl) => { From ec416f649e03bc80ac3a2fbc14eb87380f7a1d87 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:24:28 -0700 Subject: [PATCH 28/34] Better handle trailing header closer thingys --- src/lexer.rs | 16 ++++++++++++---- src/lib.rs | 2 -- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index e4d492a..536e37c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,9 +121,20 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result { + match right.chars().all(|c| c == '#') { + true => left, + false => line, + } + }, + None => line, + }; + let parsed_line = crate::render_ignore(line_without_optional_trailing_hash_sequence.trim_end_matches(&[' ', '\t']), &['#']) .strip_prefix(" ").unwrap_or("") .strip_suffix("
\n").unwrap_or("").trim().to_string(); + println!("line: {:?}", line); + println!("parsed_line: {:?}", parsed_line); if heading != "" { return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } @@ -192,9 +203,6 @@ pub(crate) fn lex_tabs_spaces<'a>(char_iter: &mut MiniIter<'a>, tokens: &Vec{:?}", tokens.last()); - println!(">>? {:?}", line); - println!(">>? {:?}", whitespace); match whitespace { " " if (matches!(tokens.last(), Some(Token::Plaintext(_))) && line.contains('#')) => return Err(ParseError{content: line}), " " if (matches!(tokens.last(), Some(Token::Newline)) && line.contains('#')) => return Err(ParseError{content: line}), diff --git a/src/lib.rs b/src/lib.rs index e6f1144..e28b690 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,7 +114,6 @@ pub fn lex(source: &str, ignore: &[char]) -> Vec { /// Parse tokens to produce safe html output pub fn parse(tokens: &[Token]) -> String { - println!("??? {:?}", tokens); let mut html = String::with_capacity(tokens.len()*100); let mut in_task_list = false; let mut in_ordered_list = false; @@ -172,7 +171,6 @@ pub fn parse(tokens: &[Token]) -> String { // Add content match token { Token::Plaintext(t) => { - println!("?????? {:?}", t); if t.trim().is_empty() {continue} // Handle references From 3dd652a71cb402478a7cf6394ba05a572a7a2b3a Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:26:44 -0700 Subject: [PATCH 29/34] And better still --- src/lexer.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 536e37c..db2064c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -121,7 +121,7 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result { match right.chars().all(|c| c == '#') { true => left, @@ -133,8 +133,6 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result ").unwrap_or("") .strip_suffix("\n").unwrap_or("").trim().to_string(); - println!("line: {:?}", line); - println!("parsed_line: {:?}", parsed_line); if heading != "" { return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } From 35585300f70ecc1f6c51a38930a56ca9e1daedd2 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney Date: Thu, 15 Sep 2022 16:29:12 -0700 Subject: [PATCH 30/34] Add newline after horizontal rule --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e28b690..4021b50 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,7 +242,7 @@ pub fn parse(tokens: &[Token]) -> String { Token::Bold(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::BoldItalic(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::LineBreak => {html.push_str("
")}, - Token::HorizontalRule => {html.push_str("
")}, + Token::HorizontalRule => {html.push_str("
\n")}, Token::Strikethrough(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, Token::Code(t) => { html.push_str(format!("", sanitize_display_text(t)).as_str())}, From d144f6c8de26c83200ebc1c42bd12ecbd766a460 Mon Sep 17 00:00:00 2001 From: Jonathan Moroney{}
Date: Thu, 15 Sep 2022 16:36:04 -0700 Subject: [PATCH 31/34] Don't add a newline if the last thing added was a newline --- src/lib.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4021b50..342b0bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -155,7 +155,7 @@ pub fn parse(tokens: &[Token]) -> String { Token::BlockQuote(_, _) | Token::Newline if quote_level > 0 => {}, Token::CodeBlock(_, _) | Token::Newline | Token::Header(_, _, _) if in_paragraph => { in_paragraph = false; - html.push_str("") + html.push_str("\n") }, Token::Plaintext(_) | Token::Italic(_) | Token::Bold(_) | Token::BoldItalic(_) | Token::Strikethrough(_) if !in_paragraph => { for _i in 0..quote_level { @@ -235,7 +235,12 @@ pub fn parse(tokens: &[Token]) -> String { } html.push_str(format!(" {} ", sanitize_display_text(t)).as_str()) }, - Token::Newline => {html.push('\n')}, + Token::Newline => { + match html.chars().last() { + Some('\n') => {} + _ => html.push('\n'), + } + }, Token::Tab => {html.push('\t')}, Token::DoubleTab => {html.push_str("\t\t")}, Token::Italic(t) => {html.push_str(format!("{}", sanitize_display_text(t)).as_str())}, From 7f7de096468c7e21cec4ec3faa9ae41d29e24113 Mon Sep 17 00:00:00 2001 From: Jonathan MoroneyDate: Thu, 15 Sep 2022 16:41:28 -0700 Subject: [PATCH 32/34] =?UTF-8?q?ATX=20header=20support=20=F0=9F=99=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lexer.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lexer.rs b/src/lexer.rs index db2064c..fe2b4a6 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -130,9 +130,15 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result line, }; + if line.chars().all(|c| c == '#') { + return Ok(Token::Header(hashes.len(), "".to_string(), None)); + } let parsed_line = crate::render_ignore(line_without_optional_trailing_hash_sequence.trim_end_matches(&[' ', '\t']), &['#']) .strip_prefix(" ").unwrap_or("") .strip_suffix("
\n").unwrap_or("").trim().to_string(); + println!("line: {:?}", line); + println!("parsed_line: {:?}", parsed_line); + println!("line_without_optional_trailing_hash_sequence: {:?}", line_without_optional_trailing_hash_sequence); if heading != "" { return Ok(Token::Header(hashes.len(), heading.trim().to_string(), Some(parsed_line))); } From 109f3b519f8af21e2abdbb87567c3b0a6a380c30 Mon Sep 17 00:00:00 2001 From: Jonathan MoroneyDate: Fri, 16 Sep 2022 09:28:23 -0700 Subject: [PATCH 33/34] Remove old tests and update sanitization test --- tests/e2e.rs | 257 -------------------------------------------- tests/lexer.rs | 159 --------------------------- tests/sanitation.rs | 8 +- 3 files changed, 4 insertions(+), 420 deletions(-) delete mode 100644 tests/e2e.rs delete mode 100644 tests/lexer.rs diff --git a/tests/e2e.rs b/tests/e2e.rs deleted file mode 100644 index 3b52b78..0000000 --- a/tests/e2e.rs +++ /dev/null @@ -1,257 +0,0 @@ -use mini_markdown::render; -use mini_markdown::{lex, parse}; -use mini_markdown::lexer::Token; - - -#[test] -fn test_simple_render() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("# Heading level 1", " Heading level 1
"), - ("## Heading level 2", "Heading level 2
"), - ("### Heading level 3", "Heading level 3
"), - ("#### Heading level 4", "Heading level 4
"), - ("##### Heading level 5", "Heading level 5
"), - ("###### Heading level 6", "Heading level 6
"), - ("####### Invalid Heading level 7", "Invalid Heading level 7
"), - ("Some text _with italics_ in the same paragraph\n", "Some text with italics in the same paragraph
\n"), - ("Some text! With exclamations!", "Some text! With exclamations!
\n"), - - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_moderate_render(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("Text attributes _italic_, \n**bold**, `monospace`. Some implementations may use *single-asterisks* for italic text.", - "Text attributes italic,
\nbold,
\n"), - ("Horizontal rule:\n\n---\n\nStrikethrough:\n\n~~strikethrough~~\n\n", - "monospace
. Some implementations may use single-asterisks for italic text.Horizontal rule:
\n
\nStrikethrough:
\n\n" - ), - ("> Outer quote with some text 1.\n> \n>> Inner quote with some other text\n> Outer again", - "
strikethroughOuter quote with some text 1.\n" - ), - ("```\nCode block 1\n```", - "Inner quote with some other textOuter again" - ), - ("```python\nCode block 2\n```", - "Code block 1\n
" - ), - ("```\nMulti\nLine\nCode block\n```", - "Code block 2\n
" - ), - ("> Outer quote with some text.\nNon-quoted text\n> Quote with some other text", - "Multi\nLine\nCode block\n
Outer quote with some text.Non-quoted text
\nQuote with some other text\n" - ), - ("> Outer quote with some other text.\nNon-quoted text\nMore non-quoted\n> Quote with some other text", - "Outer quote with some other text.Non-quoted text
\nMore non-quoted
\nQuote with some other text\n" - ), - ("Don't -> quote", - "Don't -> quote
\n" - ), - ("Don't -> quote\n> Do Quote\nDon't quote this either", - "Don't -> quote
\nDo QuoteDon't quote this either
\n" - ), - ("Testing an inline link [Link title](http://google.com)", - "Testing an inline link Link title
\n" - ), - ("Testing an inline link to a header id [Link title](#some-header)", - "Testing an inline link to a header id Link title
\n" - ), - ("Testing some details\n\n", - "Summary text goes here
\nSome text goes here\nTesting some details
\n\n" - ), - ("Testing some nested detailsSummary text goes here
\n\nSome text goes here
\n\n\n", - "Outer summary
\nOuter text\n\nInner Summary
\nInner text\nTesting some nested details
\n\n" - ), - ]); - - for test in tests.iter(){ - let html = render(test.0); - if html != test.1 { - println!("?? {:?}", lex(test.0, &[])); - } - assert_eq!(html, test.1); - } -} - -#[test] -fn test_table_render() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("| Syntax | Description | Test Text |\n| :--- | :----: | ---: |\n| Body | Text | Here's this |\n| Paragraph | Text | And more |", - "Outer summary
\n\nOuter text
\n\n\n\nInner Summary
\n\nInner text
\n\n\n\t\n\t
"), - ("| Syntax2 | Description | Test Text |\n| :--- | :----: | ---: |\n| *Body* | **Text** | ***Here's this*** |\n| `Paragraph` | Text | And more |", - "\n\t\t \n\t\n\t\n\tSyntax \t\tDescription \t\tTest Text \t\n\t\t \n\tBody \n\t\tText \n\t\tHere's this \n\t\n\t\t \n\t\nParagraph \n\t\tText \n\t\tAnd more \n\t\n\t\n\t
"), - ("| Syntax3 | Description | Test Text |\n| :--- | :----: | ---: |\n| *Body* | **Text** | ***Here's this*** |\n| `Paragraph ` | Text | And more |", - "\n\t\t \n\t\n\t\n\tSyntax2 \t\tDescription \t\tTest Text \t\n\t\t \n\tBody \n\t\tText \n\t\tHere's this \n\t\n\t\t \n\t\n\n\t\t \n
Paragraph
Text \n\t\tAnd more \n\t\n\t\n\t
"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - if html != test.1 { - println!("Test failing\n{:?}\n{:?}", html, test.1); - println!("> {:?}", lex(test.0, &[])); - for (c1, c2) in test.1.chars().zip(html.chars()) { - if c1 != c2 { - println!("Difference in {:?} {:?}", c1, c2); - } - } - } - assert_eq!(html, test.1); - } -} - -#[test] -fn test_images(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("![Alt text](foo.jpeg)", "\n\t\t \n\t\n\t\n\tSyntax3 \t\tDescription \t\tTest Text \t\n\t\t \n\tBody \n\t\tText \n\t\tHere's this \n\t\n\t\t \n\t\n\n\t\t \n
Paragraph <script=foo.js>test</script>
Text \n\t\tAnd more \n\t"), - ("![Alt text]()", "
"), - ("![Alt text]( )", "
"), - ("![Alt text](https://example.com/my/cool/image.png)", "
"), - ("![Red dot](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==)", "
"), - ("![Red dot](data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==)", "
"), - - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_tasklists(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("- [ ] One task", - "
"), - ("- [x] One other task", - "
- One task
"), - ("- [x] One other task\n- [ ] One task\n- [ ] One last task", - "
- One other task
"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_lists(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("* One entry", - "
- One other task
\n- One task
\n- One last task
"), - ("1. One entry", - "
- One entry
"), - ("* an item\n* another item\n* a third item", - "
- One entry
"), - (" * an item\n * another item\n * a third item", - "
- an item
- another item
- a third item
"), - ("lead text\n\n- entry 1\n- entry 2\n- entry 3\n- entry 4\n\nMore text", - "
- an item
- another item
- a third item
lead text
\n\n
- entry 1
\n- entry 2
\n- entry 3
\n- entry 4
\nMore text
\n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - if html != test.1 { - println!("> {:?}", lex(test.0, &[])); - } - assert_eq!(html, test.1); - } -} - -#[test] -fn test_blockquotes(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("> Outer quote with some text 1.\n> \n>> Inner quote with some other text\n> Outer again", - "Outer quote with some text 1.\n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_references(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("Here's some text. And a ref [^1]\n [^1]: Reference text", - "Inner quote with some other textOuter againHere's some text. And a ref 1
\n\n\t\n"), - ("Here's some text. And a ref [^1]\n [^1]: Reference text\n\twith multiple\n lines\n to ensure those work", - "\n\t\t
\n- \t\t\t
\tReference text↩
\t\tHere's some text. And a ref 1
\n\n\t\n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_paragraphs(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("Paragraph 1.\n\n```\nBlock text should end a paragraph.\n```\n\nThis is paragraph two.\n\n## Heading\n\nParagraph the third.", - "\n\t\t
\n- \t\t\t
\tReference text\n\twith multiple\n lines\n to ensure those work↩
\t\tParagraph 1.
\n\nBlock text should end a paragraph.\n
This is paragraph two.
\nHeading
\nParagraph the third.
\n"), - ("# Post title\nSection text\n# Second section\nGood content", - "Post title
\nSection text
\nSecond section
\nGood content
\n") - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } - -} - -#[test] -fn test_links(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("another (See [Sewer Shark](https://en.wikipedia.org/wiki/Sewer_Shark)). Video playback", - "another (See Sewer Shark). Video playback
\n"), - ("r [Distant Worlds](https://www.youtube.com/watch?v=yd3KYOei8o4) a", - "r Distant Worlds a
\n"), - ("Foo\n```\nbattle\nenemy1\n```\nSome text [ddh](https://g.com/d/ddh/t/m)\n\nMore text following", - "Foo
\n\nbattle\nenemy1\n
Some text ddh
\nMore text following
\n"), - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } -} - -#[test] -fn test_details(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("\n", - "Summary
\n\n```\nFoo\n```\n\n"), - ("Summary
\n\n\n\nFoo\n
\n", - "Summary but with spaces
\n\n```\nFoo\n```\n\n"), - ("Summary but with spaces
\n\n\n\nFoo\n
\r\n", - "testing right now
\r\ninner test\r\n\n"), - ("Here's some lead text\ntesting right now
\n\ninner test\r
\n\n\n", - "Summary
\n\n```\nFoo\n```\nHere's some lead text
\n\n") - ]); - - for test in tests.iter(){ - let html = render(test.0); - assert_eq!(html, test.1); - } - -} \ No newline at end of file diff --git a/tests/lexer.rs b/tests/lexer.rs deleted file mode 100644 index fcd0a3a..0000000 --- a/tests/lexer.rs +++ /dev/null @@ -1,159 +0,0 @@ -use mini_markdown::lex; -use mini_markdown::lexer::{Token, TaskBox}; - -#[test] -fn test_lex() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("# Heading level 1", vec![Token::Header(1, "Heading level 1".to_string(), None)]), - ("## Heading level 2", vec![Token::Header(2, "Heading level 2".to_string(), None)]), - ("### Heading level 3", vec![Token::Header(3, "Heading level 3".to_string(), None)]), - ("#### Heading level 4", vec![Token::Header(4, "Heading level 4".to_string(), None)]), - ("##### Heading level 5", vec![Token::Header(5, "Heading level 5".to_string(), None)]), - ("###### Heading level 6", vec![Token::Header(6, "Heading level 6".to_string(), None)]), - ("####### Invalid Heading level 7", vec![Token::Header(6, "Invalid Heading level 7".to_string(), None)]), - ]); - tests.extend(vec![ - ("# Heading level 1 {#Test label}", vec![Token::Header(1, "Heading level 1".to_string(), Some("Test label".to_string()))]), - ("## Heading level 2 {#Test label}", vec![Token::Header(2, "Heading level 2".to_string(), Some("Test label".to_string()))]), - ("### Heading level 3 {#Test label}", vec![Token::Header(3, "Heading level 3".to_string(), Some("Test label".to_string()))]), - ("#### Heading level 4 {#Test label}", vec![Token::Header(4, "Heading level 4".to_string(), Some("Test label".to_string()))]), - ("##### Heading level 5 {#Test label}", vec![Token::Header(5, "Heading level 5".to_string(), Some("Test label".to_string()))]), - ("###### Heading level 6 {#Test label}", vec![Token::Header(6, "Heading level 6".to_string(), Some("Test label".to_string()))]), - ("####### Invalid Heading level 7 {#Test label}", vec![Token::Header(6, "Invalid Heading level 7".to_string(), Some("Test label".to_string()))]), - ]); - tests.extend(vec![ - ("I just love **bold text**.", vec![Token::Plaintext("I just love ".to_string()), Token::Bold("bold text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love __bold text__.", vec![Token::Plaintext("I just love ".to_string()), Token::Bold("bold text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love *_bold text*_.", vec![Token::Plaintext("I just love ".to_string()), Token::Bold("bold text".to_string()), Token::Plaintext(".".to_string())]), - ]); - tests.extend(vec![ - ("I just love *italic text*.", vec![Token::Plaintext("I just love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love _italic text_.", vec![Token::Plaintext("I just love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love *italic text_.", vec![Token::Plaintext("I just love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just\n love *italic text_.", vec![Token::Plaintext("I just".to_string()), Token::Newline, Token::Plaintext(" love ".to_string()), Token::Italic("italic text".to_string()), Token::Plaintext(".".to_string())]), - ]); - tests.extend(vec![ - ("I just love ***bold italic text***.", vec![Token::Plaintext("I just love ".to_string()), Token::BoldItalic("bold italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love ___bold italic text___.", vec![Token::Plaintext("I just love ".to_string()), Token::BoldItalic("bold italic text".to_string()), Token::Plaintext(".".to_string())]), - ("I just love _*_bold italic text*_*.", vec![Token::Plaintext("I just love ".to_string()), Token::BoldItalic("bold italic text".to_string()), Token::Plaintext(".".to_string())]), - ]); - tests.extend(vec![ - ("* unodered list\n", vec![Token::UnorderedListEntry("unodered list".to_string())]), - ("* unodered list\n* with two\n", vec![Token::UnorderedListEntry("unodered list".to_string()), Token::UnorderedListEntry("with two".to_string())]), - ("* unodered list\n* with two\n* with three\n", vec![Token::UnorderedListEntry("unodered list".to_string()), Token::UnorderedListEntry("with two".to_string()), Token::UnorderedListEntry("with three".to_string())]), - ]); - tests.extend(vec![ - ("Some text _with italics_ in the same paragraph", vec![Token::Plaintext("Some text ".to_string()), Token::Italic("with italics".to_string()), Token::Plaintext(" in the same paragraph".to_string())]), - ("Text attributes _italic_, \n**bold**, `monospace`. Some implementations may use *single-asterisks* for italic text.", - vec![ - Token::Plaintext("Text attributes ".to_string()), - Token::Italic("italic".to_string()), - Token::Plaintext(", ".to_string()), - Token::Newline, - Token::Bold("bold".to_string()), - Token::Plaintext(", ".to_string()), - Token::Code("monospace".to_string()), - Token::Plaintext(". Some implementations may use ".to_string()), - Token::Italic("single-asterisks".to_string()), - Token::Plaintext(" for italic text.".to_string()), - ]) - ]); - tests.extend(vec![ - ("![alt](https://example.com/foo.jpeg)", vec![Token::Image("https://example.com/foo.jpeg".to_string(), Some("alt".to_string()))]), - ("![alt]()", vec![Token::Image("".to_string(), Some("alt".to_string()))]), - ("Some test text [^1]", vec![Token::Plaintext("Some test text [^1]".to_string())]), - ("[^1]: First footnote", vec![Token::Footnote("1".to_string(), "First footnote".to_string())]), - ("[^HUGE]: Big footnote", vec![Token::Footnote("HUGE".to_string(), "Big footnote".to_string())]), - ("[^BORK ED]: Big footnote", vec![Token::Plaintext("[^BORK ED]: Big footnote".to_string())]), - - ]); - tests.extend(vec![ - ("---", vec![Token::HorizontalRule]), - ("-----", vec![Token::HorizontalRule]), - ("--", vec![Token::Plaintext("--".to_string())]), - ("- [ ] Unchecked box", vec![Token::TaskListItem(TaskBox::Unchecked, "Unchecked box".to_string())]), - ("+ [ ] Unchecked box", vec![Token::TaskListItem(TaskBox::Unchecked, "Unchecked box".to_string())]), - ("- [x] Checked box", vec![Token::TaskListItem(TaskBox::Checked, "Checked box".to_string())]), - ("- [X] Also a checked box", vec![Token::TaskListItem(TaskBox::Checked, "Also a checked box".to_string())]), - ("- [X]Not a checked box", vec![Token::UnorderedListEntry("[X]Not a checked box".to_string())]), - ("- [X] A checked box\n- [X] Also a checked box", vec![Token::TaskListItem(TaskBox::Checked, "A checked box".to_string()), Token::Newline, Token::TaskListItem(TaskBox::Checked, "Also a checked box".to_string())]), - ]); - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_lex_plaintext() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("¯\\\\\\_(ツ)\\_/¯", vec![Token::Plaintext("¯\\_(ツ)_/¯".to_string())]), - ("\\_test\\_", vec![Token::Plaintext("_test_".to_string())]), - ("\\*escaping\\_", vec![Token::Plaintext("*escaping_".to_string())]), - ("\\>controls\\<", vec![Token::Plaintext(">controls<".to_string())]), - ("2017-12-6 20:13:00", vec![Token::Plaintext("2017-12-6 20:13:00".to_string())]), - ("\nlayout: post\ntitle: \"Looking back at consoles and codecs\"\ndate: 2017-12-6 20:13:00 +0100\n", - vec![Token::Newline, - Token::Plaintext("layout: post".to_string()), - Token::Newline, - Token::Plaintext("title: \"Looking back at consoles and codecs\"".to_string()), - Token::Newline, - Token::Plaintext("date: 2017-12-6 20:13:00 +0100".to_string()), - Token::Newline]) - ]); - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_blockquote_lex() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("> ", vec![Token::BlockQuote(1, "".to_string())]), - ("> \n>> text", vec![Token::BlockQuote(1, "".to_string()), Token::BlockQuote(2, "text".to_string())]), - ("> text\n> \n>> more text", vec![Token::BlockQuote(1, "text".to_string()) ,Token::BlockQuote(1, "".to_string()), Token::BlockQuote(2, "more text".to_string())]), - - ]); - - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_footnote_lex() { - let mut tests = Vec::new(); - tests.extend(vec![ - ("[^1]: Footnote #1", vec![Token::Footnote("1".to_string(), "Footnote #1".to_string())]), - ("[^1]: Footnote #1\n with a second line", vec![Token::Footnote("1".to_string(), "Footnote #1\n with a second line".to_string())]), - ("[^1]: Footnote #1\n\twith a second line", vec![Token::Footnote("1".to_string(), "Footnote #1\n\twith a second line".to_string())]), - ("[^1]: Footnote #1\n with a second line", vec![Token::Footnote("1".to_string(), "Footnote #1\n with a second line".to_string())]), - ("[^1]: Footnote #1\n with a second line\n\tand a third line", vec![Token::Footnote("1".to_string(), "Footnote #1\n with a second line\n\tand a third line".to_string())]), - ]); - - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} - -#[test] -fn test_link_lex(){ - let mut tests = Vec::new(); - tests.extend(vec![ - ("another (See [Sewer Shark](https://en.wikipedia.org/wiki/Sewer_Shark)). Video", - vec![Token::Plaintext("another (See ".to_string()), Token::Link("https://en.wikipedia.org/wiki/Sewer_Shark".to_string(), Some("Sewer Shark".to_string()), None), Token::Plaintext("). Video".to_string())]), - ("r [Distant Worlds](https://www.youtube.com/watch?v=yd3KYOei8o4) a", - vec![Token::Plaintext("r ".to_string()), Token::Link("https://www.youtube.com/watch?v=yd3KYOei8o4".to_string(), Some("Distant Worlds".to_string()), None), Token::Plaintext(" a".to_string())]) - ]); - - for test in tests.iter(){ - let tokens = lex(test.0, &[]); - assert_eq!(&tokens[..], &test.1[..]); - } -} \ No newline at end of file diff --git a/tests/sanitation.rs b/tests/sanitation.rs index 5c49fbe..1103ae0 100644 --- a/tests/sanitation.rs +++ b/tests/sanitation.rs @@ -5,7 +5,7 @@ fn test_simple_tag_injection() { ("foobarSummary
\n\n\n\nFoo\n
".to_string(), " foobar text for context /scriptjunk
\n".to_string()), ("".to_string(), - "/SCRIPT".to_string()), + "/SCRIPT\n".to_string()), ]); for test in tests.iter_mut(){ @@ -19,9 +19,9 @@ use mini_markdown::render; fn test_image_xss(){ let mut tests = Vec::new(); tests.extend(vec![ - ("![Alt text](foo.jpeg)", ""), - ("![Alt text]()", "
"), - ("![Alt text]( )", "
"), + ("![Alt text](foo.jpeg)", "
\n"), + ("![Alt text]()", "
\n"), + ("![Alt text]( )", "
\n"), ("![Alt text](javascript:alert(0))", "
)
\n"), ]); From cae67c7ddf4b9e73cc84dc6906fbc6d66d49a21e Mon Sep 17 00:00:00 2001 From: Jonathan MoroneyDate: Fri, 16 Sep 2022 09:31:18 -0700 Subject: [PATCH 34/34] Derive Eq for allignment --- src/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lexer.rs b/src/lexer.rs index fe2b4a6..b500bf0 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -71,7 +71,7 @@ impl Token{ } /// Holds the alignment states for the table token -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Alignment { Left, Right,