@@ -228,6 +228,32 @@ impl<'a> Tokenizer<'a> {
228
228
_ => single_token ! ( Less , start, self . cursor. len_consumed( ) ) ,
229
229
}
230
230
}
231
+
232
+ fn tokenize_string (
233
+ & mut self ,
234
+ string_category : StringCategory ,
235
+ predicate : impl FnMut ( char ) -> bool ,
236
+ ) -> Option < Token > {
237
+ // we don't want the lookup to contain "
238
+ let start = self . cursor . len_consumed ( ) ;
239
+ self . cursor . skip_while ( predicate) ;
240
+ if self . cursor . is_eof ( ) {
241
+ single_token ! (
242
+ Category :: UnclosedString ( string_category) ,
243
+ start,
244
+ self . cursor. len_consumed( )
245
+ )
246
+ } else {
247
+ let result = single_token ! (
248
+ Category :: String ( string_category) ,
249
+ start,
250
+ self . cursor. len_consumed( )
251
+ ) ;
252
+ // skip "
253
+ self . cursor . advance ( ) ;
254
+ result
255
+ }
256
+ }
231
257
}
232
258
233
259
// Is used to simplify cases for double_tokens, instead of having to rewrite each match case for each double_token
@@ -281,27 +307,17 @@ impl<'a> Iterator for Tokenizer<'a> {
281
307
'=' => double_token ! ( self . cursor, start, Equal , '=' , EqualEqual , '~' , EqualTilde ) ,
282
308
'>' => self . tokenize_greater ( ) ,
283
309
'<' => self . tokenize_less ( ) ,
284
- '"' => {
285
- // we don't want the lookup to contain "
286
- let start = self . cursor . len_consumed ( ) ;
287
- // we neither care about newlines nor escape character
288
- self . cursor . skip_while ( |c| c != '"' ) ;
289
- if self . cursor . is_eof ( ) {
290
- single_token ! (
291
- UnclosedString ( StringCategory :: Unquoteable ) ,
292
- start,
293
- self . cursor. len_consumed( )
294
- )
295
- } else {
296
- let result = single_token ! (
297
- String ( StringCategory :: Unquoteable ) ,
298
- start,
299
- self . cursor. len_consumed( )
300
- ) ;
301
- // skip "
302
- self . cursor . advance ( ) ;
303
- result
304
- }
310
+ '"' => self . tokenize_string ( StringCategory :: Unquoteable , |c| c != '"' ) ,
311
+ '\'' => {
312
+ let mut back_slash = false ;
313
+ self . tokenize_string ( StringCategory :: Quoteable , |c| {
314
+ if !back_slash && c == '\'' {
315
+ false
316
+ } else {
317
+ back_slash = c == '\\' ;
318
+ true
319
+ }
320
+ } )
305
321
}
306
322
_ => single_token ! ( UnknownSymbol , start, self . cursor. len_consumed( ) ) ,
307
323
}
@@ -415,7 +431,18 @@ mod tests {
415
431
tokenizer. lookup( result[ 0 ] . range( ) ) ,
416
432
"hello I am a closed string\\ "
417
433
) ;
418
- let code = "\" hello I am a closed string\\ " ;
419
- verify_tokens ! ( code, vec![ ( Category :: UnclosedString ( Unquoteable ) , 1 , 28 ) ] ) ;
434
+ let code = "\" hello I am a unclosed string\\ " ;
435
+ verify_tokens ! ( code, vec![ ( Category :: UnclosedString ( Unquoteable ) , 1 , 30 ) ] ) ;
436
+ }
437
+
438
+ #[ test]
439
+ fn quoteable_string ( ) {
440
+ use StringCategory :: * ;
441
+ let code = "'Hello \\ 'you\\ '!'" ;
442
+ let ( tokenizer, result) = verify_tokens ! ( code, vec![ ( Category :: String ( Quoteable ) , 1 , 15 ) ] ) ;
443
+ assert_eq ! ( tokenizer. lookup( result[ 0 ] . range( ) ) , "Hello \\ 'you\\ '!" ) ;
444
+
445
+ let code = "'Hello \\ 'you\\ '!\\ '" ;
446
+ verify_tokens ! ( code, vec![ ( Category :: UnclosedString ( Quoteable ) , 1 , 17 ) ] ) ;
420
447
}
421
448
}
0 commit comments