Skip to content

Commit 501f558

Browse files
committed
Change: add quoteable string to Tokenizer
Add possibility to tokenize quoteable strings: ``` let code = "'Hello \\'you\\'!'"; let tokenizer = Tokenizer::new(code); tokenizer.next(); ```
1 parent 4caee91 commit 501f558

File tree

1 file changed

+50
-23
lines changed

1 file changed

+50
-23
lines changed

rust/nasl-syntax/src/token.rs

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,32 @@ impl<'a> Tokenizer<'a> {
228228
_ => single_token!(Less, start, self.cursor.len_consumed()),
229229
}
230230
}
231+
232+
fn tokenize_string(
233+
&mut self,
234+
string_category: StringCategory,
235+
predicate: impl FnMut(char) -> bool,
236+
) -> Option<Token> {
237+
// we don't want the lookup to contain "
238+
let start = self.cursor.len_consumed();
239+
self.cursor.skip_while(predicate);
240+
if self.cursor.is_eof() {
241+
single_token!(
242+
Category::UnclosedString(string_category),
243+
start,
244+
self.cursor.len_consumed()
245+
)
246+
} else {
247+
let result = single_token!(
248+
Category::String(string_category),
249+
start,
250+
self.cursor.len_consumed()
251+
);
252+
// skip "
253+
self.cursor.advance();
254+
result
255+
}
256+
}
231257
}
232258

233259
// Is used to simplify cases for double_tokens, instead of having to rewrite each match case for each double_token
@@ -281,27 +307,17 @@ impl<'a> Iterator for Tokenizer<'a> {
281307
'=' => double_token!(self.cursor, start, Equal, '=', EqualEqual, '~', EqualTilde),
282308
'>' => self.tokenize_greater(),
283309
'<' => self.tokenize_less(),
284-
'"' => {
285-
// we don't want the lookup to contain "
286-
let start = self.cursor.len_consumed();
287-
// we neither care about newlines nor escape character
288-
self.cursor.skip_while(|c| c != '"');
289-
if self.cursor.is_eof() {
290-
single_token!(
291-
UnclosedString(StringCategory::Unquoteable),
292-
start,
293-
self.cursor.len_consumed()
294-
)
295-
} else {
296-
let result = single_token!(
297-
String(StringCategory::Unquoteable),
298-
start,
299-
self.cursor.len_consumed()
300-
);
301-
// skip "
302-
self.cursor.advance();
303-
result
304-
}
310+
'"' => self.tokenize_string(StringCategory::Unquoteable, |c| c != '"'),
311+
'\'' => {
312+
let mut back_slash = false;
313+
self.tokenize_string(StringCategory::Quoteable, |c| {
314+
if !back_slash && c == '\'' {
315+
false
316+
} else {
317+
back_slash = c == '\\';
318+
true
319+
}
320+
})
305321
}
306322
_ => single_token!(UnknownSymbol, start, self.cursor.len_consumed()),
307323
}
@@ -415,7 +431,18 @@ mod tests {
415431
tokenizer.lookup(result[0].range()),
416432
"hello I am a closed string\\"
417433
);
418-
let code = "\"hello I am a closed string\\";
419-
verify_tokens!(code, vec![(Category::UnclosedString(Unquoteable), 1, 28)]);
434+
let code = "\"hello I am a unclosed string\\";
435+
verify_tokens!(code, vec![(Category::UnclosedString(Unquoteable), 1, 30)]);
436+
}
437+
438+
#[test]
439+
fn quoteable_string() {
440+
use StringCategory::*;
441+
let code = "'Hello \\'you\\'!'";
442+
let (tokenizer, result) = verify_tokens!(code, vec![(Category::String(Quoteable), 1, 15)]);
443+
assert_eq!(tokenizer.lookup(result[0].range()), "Hello \\'you\\'!");
444+
445+
let code = "'Hello \\'you\\'!\\'";
446+
verify_tokens!(code, vec![(Category::UnclosedString(Quoteable), 1, 17)]);
420447
}
421448
}

0 commit comments

Comments
 (0)