Skip to content

Commit

Permalink
Small cleanup (#50)
Browse files Browse the repository at this point in the history
* Use next if eq rather than peek

* prefer next if eq over peek then next

* Remove some intermediate variables

* Try to avoid reallocations of the html string.
Guess is length of the token vec * 100bytes

* Remove unused note text variable

* One more peek-next pattern

* Simplify match in push_str

* Use raw unwrap

* Simplify newline lex

* Simplify side carrot lex

* Simplify plus minus lex

* Small cleanup in parse_details

* May as well check length while also checking prefix and suffix
  • Loading branch information
darakian committed May 6, 2022
1 parent 9ffadeb commit 0bc0a09
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 108 deletions.
117 changes: 40 additions & 77 deletions src/lexer.rs
Expand Up @@ -97,13 +97,10 @@ pub(crate) struct ParseError<'a>{

pub(crate) fn push_str<'a>(t: &mut Vec<Token>, s: &'a str) {
match t.last_mut() {
Some(markdown_token) => {
match markdown_token {
Token::Plaintext(plaintext_token) => plaintext_token.push_str(s),
_ => t.push(Token::Plaintext(s.to_string())),
}
Some(Token::Plaintext(token)) => {
token.push_str(s)
}
None => t.push(Token::Plaintext(s.to_string())),
_ => t.push(Token::Plaintext(s.to_string())),
}
}

Expand All @@ -117,9 +114,9 @@ pub(crate) fn lex_heading<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>,
if line.contains("{#") &&
line.contains('}') {
let (heading, _title) = line.split_once("{").unwrap_or(("",""));
let line = line.strip_prefix(&heading).unwrap_or("");
let line = line.strip_prefix("{#").unwrap_or("");
let line = line.strip_suffix("}").unwrap_or("");
let line = line.strip_prefix(&heading).unwrap()
.strip_prefix("{#").unwrap()
.strip_suffix("}").unwrap();
return Ok(Token::Header(level, heading.trim(), Some(line)));
}
return Ok(Token::Header(level, line, None));
Expand Down Expand Up @@ -181,7 +178,7 @@ pub(crate) fn lex_spaces<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>,
return Err(ParseError{content: spaces})
}
// Case 2: two or more spaces followed by \n => line break
if char_iter.peek() == Some(&"\n"){
if char_iter.next_if_eq("\n").is_some() {
return Ok(Token::LineBreak);
}
// Case 3: Tokenize for parser
Expand Down Expand Up @@ -210,11 +207,9 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a
}
}
// leading_ticks.len() == 3. Check for lang
if char_iter.peek() != Some(&"\n") {
if char_iter.next_if_eq("\n") != Some(&"\n") {
lang = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
char_iter.next();
} else {
char_iter.next();
}
let s = char_iter.consume_while_case_holds(&|c| c != "`").unwrap_or("");
let trailing_ticks = char_iter.consume_while_case_holds(&|c| c == "`").unwrap_or("");
Expand All @@ -226,17 +221,17 @@ pub(crate) fn lex_backticks<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a
}

pub(crate) fn lex_newlines<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
let newlines = char_iter.consume_while_case_holds(&|c| c == "\n").unwrap_or("");
match newlines.len() {
0..=1 => return Err(ParseError{content: newlines}),
_ => return Ok(Token::Newline)
match char_iter.consume_while_case_holds(&|c| c == "\n") {
Some(s) if s.len() > 1 => return Ok(Token::Newline),
Some(s) if s.len() <= 1 => return Err(ParseError{content: s}),
_ => return Err(ParseError{content: ""}),
}
}

pub(crate) fn lex_blockquotes<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
let right_arrows = char_iter.consume_while_case_holds(&|c| c == ">").unwrap_or("");
match char_iter.peek() {
Some(" ") => {char_iter.next();},
match char_iter.next_if_eq(" ") {
Some(" ") => {},
_ => {return Err(ParseError{content: right_arrows})}
}
let s = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
Expand All @@ -246,10 +241,9 @@ pub(crate) fn lex_blockquotes<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<

pub(crate) fn lex_images<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
let start_index = char_iter.get_index();
if char_iter.peek() != Some(&"!"){
if char_iter.next_if_eq("!") != Some(&"!"){
return Err(ParseError{content: ""})
}
char_iter.next();
let link_result = lex_links(char_iter);
match link_result {
Err(_e) => return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")}),
Expand All @@ -260,36 +254,30 @@ pub(crate) fn lex_images<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>,

pub(crate) fn lex_links<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
let start_index = char_iter.get_index();
if char_iter.peek() != Some(&"[") {
if char_iter.next_if_eq("[") != Some(&"[") {
return Err(ParseError{content: ""})
}
char_iter.next();
let title = char_iter.consume_while_case_holds(&|c| c != "]").unwrap_or("");
if char_iter.peek() != Some(&"]") {
if char_iter.next_if_eq("]") != Some(&"]") {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
}
char_iter.next();
// Parse footnotes big and small
if title.starts_with("^") && char_iter.peek() == Some(&":") {
char_iter.next();
if title.starts_with("^") && char_iter.next_if_eq(":") == Some(&":") {
let ref_id = title.strip_prefix("^").unwrap_or("");
let mut note_text = String::new();
let note_index = char_iter.get_index();
loop {
note_text.push_str(char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or(""));
char_iter.consume_while_case_holds(&|c| c != "\n");
char_iter.next();
if char_iter.peek() != Some(&" ") && char_iter.peek() != Some(&"\t") {
break;
}
if char_iter.peek() == Some(&"\t") {
char_iter.next();
note_text.push('\n');
if char_iter.next_if_eq("\t") == Some(&"\t") {
continue;
}
if char_iter.peek() == Some(&" ") {
let spaces = char_iter.consume_while_case_holds(&|c| c == " ").unwrap_or("");
match spaces.len() {
2 | 4 => {note_text.push('\n');},
2 | 4 => {},
_ => {return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})},
}
continue
Expand All @@ -301,16 +289,14 @@ pub(crate) fn lex_links<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
}
return Ok(Token::Footnote(ref_id, char_iter.get_substring_from(note_index).unwrap_or("").trim()));
}
if char_iter.peek() != Some(&"(") {
if char_iter.next_if_eq("(") != Some(&"(") {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
}
char_iter.next();
let link = char_iter.consume_while_case_holds(&|c| c != ")" && c != " ").unwrap_or("");
if char_iter.peek() != Some(&")") && char_iter.peek() != Some(&" ") {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
}
if char_iter.peek() == Some(&")") {
char_iter.next();
if char_iter.next_if_eq(")") == Some(&")") {
return Ok(Token::Link(link, Some(title), None));
}
if char_iter.peek() == Some(&" ") {
Expand All @@ -322,26 +308,19 @@ pub(crate) fn lex_links<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
}

pub(crate) fn lex_side_carrot<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
match char_iter.peek() {
match char_iter.next_if_eq("<") {
Some("<") => {
char_iter.next();
let s = char_iter.consume_while_case_holds(&|c| c != ">").unwrap_or("");
match char_iter.peek(){
Some(">") if s != "details" => {
char_iter.next();
return Ok(Token::Link(s, None, None))
},
Some(">") if s == "details" => {
char_iter.next();
match (s, char_iter.next_if_eq(">")) {
("details", Some(">")) => {
char_iter.next_if_eq(&"\r");
if !char_iter.next_if_eq(&"\n").is_some(){
return Err(ParseError{content: s});
}
return parse_details(char_iter)
},
_ => {
return Err(ParseError{content: s});
}
(_, Some(">")) => return Ok(Token::Link(s, None, None)),
(_, _) => return Err(ParseError{content: s}),
}
}
_ => return Err(ParseError{content: ""})
Expand All @@ -358,29 +337,20 @@ pub(crate) fn lex_plus_minus<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'
_ => {return Err(ParseError{content: "string length error"})},
}
let line = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
if line.starts_with(" [ ] ") {
return Ok(Token::TaskListItem(TaskBox::Unchecked,line.strip_prefix(" [ ] ").unwrap_or("")))
} else if line.starts_with(" [x] ") {
return Ok(Token::TaskListItem(TaskBox::Checked,line.strip_prefix(" [x] ").unwrap_or("")))
} else if line.starts_with(" [X] ") {
return Ok(Token::TaskListItem(TaskBox::Checked,line.strip_prefix(" [X] ").unwrap_or("")))
} else if line.starts_with(" "){
return Ok(Token::UnorderedListEntry(line.strip_prefix(" ").unwrap_or("")))
} else {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
}
if line.starts_with(" [ ] "){return Ok(Token::TaskListItem(TaskBox::Unchecked, &line[5..]))}
else if line.starts_with(" [x] ") || line.starts_with(" [X] "){return Ok(Token::TaskListItem(TaskBox::Checked, &line[5..]))}
else if line.starts_with(" "){return Ok(Token::UnorderedListEntry(&line[1..]))}
else {return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})}
}

pub(crate) fn lex_numbers<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
let start_index = char_iter.get_index();
let c = char_iter.next().unwrap();
match char_iter.peek() {
match char_iter.next_if_eq(".") {
Some(".") => {
char_iter.next().unwrap();
if char_iter.peek() != Some(&" "){
if char_iter.next_if_eq(" ") != Some(&" "){
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
}
char_iter.next();
let s = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
return Ok(Token::OrderedListEntry(s))
},
Expand All @@ -390,7 +360,7 @@ pub(crate) fn lex_numbers<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>,

pub(crate) fn lex_tilde<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>> {
let start_index = char_iter.get_index();
let lead_tildes = match char_iter.consume_while_case_holds(&|s| s == "~"){
let lead_tildes = match char_iter.consume_while_case_holds(&|s| s == "~") {
Some(s) => s,
None => return Err(ParseError{content: "Failure to parse ~"}),
};
Expand All @@ -401,7 +371,6 @@ pub(crate) fn lex_tilde<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
let tail_tildes = char_iter.consume_while_case_holds(&|s| s == "~").unwrap_or("");
if lead_tildes.len() != tail_tildes.len() {
return Err(ParseError{content: char_iter.get_substring_from(start_index).unwrap_or("")})
// return Err(ParseError{content: format!("{}{}{}", lead_tildes, line, tail_tildes)})
}
return Ok(Token::Strikethrough(line));
}
Expand All @@ -410,19 +379,13 @@ pub(crate) fn lex_tilde<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, P
}

fn parse_details<'a>(char_iter: &mut MiniIter<'a>) -> Result<Token<'a>, ParseError<'a>>{
let mut summary_line = char_iter.consume_while_case_holds(&|c| c != "\n").unwrap_or("");
if summary_line.ends_with("\r") {
summary_line = summary_line.strip_suffix("\r").unwrap_or("");
}
if !summary_line.starts_with("<summary") || !summary_line.ends_with("</summary>") {
let mut summary_line = char_iter.consume_while_case_holds(&|c| c != "\n" && c != "\r").unwrap_or("");
char_iter.next_if_eq("\r");
if (!summary_line.starts_with("<summary>") || !summary_line.ends_with("</summary>")) && !summary_line.len() >= 20 {
return Err(ParseError{content: summary_line});
}
summary_line = summary_line.strip_prefix("<summary>").unwrap_or("");
summary_line = summary_line.strip_suffix("</summary>").unwrap_or("");
match summary_line.len() {
0 => {return Err(ParseError{content: "<summary></summary>"})},
_ => {},
}
summary_line = summary_line.strip_prefix("<summary>").unwrap()
.strip_suffix("</summary>").unwrap_or("");
let remaining_text_index = char_iter.get_index();
let mut remaining_text = char_iter.consume_until_tail_is("</details>").unwrap_or("");
if remaining_text.contains("<details>") {
Expand Down
47 changes: 16 additions & 31 deletions src/lib.rs
Expand Up @@ -15,94 +15,81 @@ pub fn lex(source: &str) -> Vec<Token>{
while char_iter.peek().is_some(){
match char_iter.peek().unwrap(){
"#" => {
let token = lex_heading(&mut char_iter);
match token {
match lex_heading(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"*" | "_" => {
let token = lex_asterisk_underscore(&mut char_iter);
match token {
match lex_asterisk_underscore(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"~" => {
let token = lex_tilde(&mut char_iter);
match token {
match lex_tilde(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"-" | "+" => {
let token = lex_plus_minus(&mut char_iter);
match token {
match lex_plus_minus(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
" " => {
let token = lex_spaces(&mut char_iter);
match token {
match lex_spaces(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"`" => {
let token = lex_backticks(&mut char_iter);
match token {
match lex_backticks(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"\n" => {
let token = lex_newlines(&mut char_iter);
match token {
match lex_newlines(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
">" => {
let token = lex_blockquotes(&mut char_iter);
match token {
match lex_blockquotes(&mut char_iter) {
Ok(t) => {
tokens.push(t);
},
Err(e) => push_str(&mut tokens, e.content),
}
},
"!" => {
let token = lex_images(&mut char_iter);
match token {
match lex_images(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"[" => {
let token = lex_links(&mut char_iter);
match token {
match lex_links(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"<" => {
let token = lex_side_carrot(&mut char_iter);
match token {
match lex_side_carrot(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"|" => {
let token = lex_pipes(&mut char_iter);
match token {
match lex_pipes(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
},
"1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "0" => {
let token = lex_numbers(&mut char_iter);
match token {
match lex_numbers(&mut char_iter) {
Ok(t) => tokens.push(t),
Err(e) => push_str(&mut tokens, e.content),
}
Expand All @@ -111,13 +98,11 @@ pub fn lex(source: &str) -> Vec<Token>{
"\\" => {
char_iter.next();
if char_iter.peek().is_some(){
let c = char_iter.next().unwrap();
push_str(&mut tokens, c);
push_str(&mut tokens, char_iter.next().unwrap());
}
}
_ => {
let c = char_iter.next().unwrap();
push_str(&mut tokens, c);
push_str(&mut tokens, char_iter.next().unwrap());
},
}
}
Expand All @@ -126,7 +111,7 @@ pub fn lex(source: &str) -> Vec<Token>{

/// Parse tokens to produce safe html output
pub fn parse(tokens: &[Token]) -> String {
let mut html = String::new();
let mut html = String::with_capacity(tokens.len()*100);
let mut in_task_list = false;
let mut in_ordered_list = false;
let mut in_unordered_list = false;
Expand Down

0 comments on commit 0bc0a09

Please sign in to comment.