@@ -354,11 +354,15 @@ impl<'a> Tokenizer<'a> {
354354 }
355355
356356 Token :: Whitespace ( Whitespace :: Tab ) => self . col += 4 ,
357- Token :: Word ( w) if w. quote_style == None => self . col += w. value . len ( ) as u64 ,
358- Token :: Word ( w) if w. quote_style != None => self . col += w. value . len ( ) as u64 + 2 ,
359- Token :: Number ( s, _) => self . col += s. len ( ) as u64 ,
360- Token :: SingleQuotedString ( s) => self . col += s. len ( ) as u64 ,
361- Token :: Placeholder ( s) => self . col += s. len ( ) as u64 ,
357+ Token :: Word ( w) if w. quote_style == None => {
358+ self . col += w. value . chars ( ) . count ( ) as u64
359+ }
360+ Token :: Word ( w) if w. quote_style != None => {
361+ self . col += w. value . chars ( ) . count ( ) as u64 + 2
362+ }
363+ Token :: Number ( s, _) => self . col += s. chars ( ) . count ( ) as u64 ,
364+ Token :: SingleQuotedString ( s) => self . col += s. chars ( ) . count ( ) as u64 ,
365+ Token :: Placeholder ( s) => self . col += s. chars ( ) . count ( ) as u64 ,
362366 _ => self . col += 1 ,
363367 }
364368
@@ -1220,6 +1224,22 @@ mod tests {
12201224 ) ;
12211225 }
12221226
1227+ #[ test]
1228+ fn tokenize_unterminated_string_literal_utf8 ( ) {
1229+ let sql = String :: from ( "SELECT \" なにか\" FROM Y WHERE \" なにか\" = 'test;" ) ;
1230+
1231+ let dialect = GenericDialect { } ;
1232+ let mut tokenizer = Tokenizer :: new ( & dialect, & sql) ;
1233+ assert_eq ! (
1234+ tokenizer. tokenize( ) ,
1235+ Err ( TokenizerError {
1236+ message: "Unterminated string literal" . to_string( ) ,
1237+ line: 1 ,
1238+ col: 35
1239+ } )
1240+ ) ;
1241+ }
1242+
12231243 #[ test]
12241244 fn tokenize_invalid_string_cols ( ) {
12251245 let sql = String :: from ( "\n \n SELECT * FROM table\t مصطفىh" ) ;
0 commit comments