From 7439a34ea01f7d0eacf88ee21682870856009e6c Mon Sep 17 00:00:00 2001 From: coderJeff Date: Sun, 5 Feb 2023 09:41:43 -0500 Subject: [PATCH] fbc: respect whitespace between operators - don't join operators separated by whitespace. - Previously, '<' + '=', '<' + '>', '=' + '>', and '-' + '>' were joined together in to one token even if separated by whitespace. - don't skip whitespace between decimal point and digit. Previously, '.' + '0..9' floating point literals were allowed to be separated by whitespace. fbc: refactor lexCurrentChar() - add lexEatWhitespace() specifically for testing for whitespace and skipping over over whitespace. It's only used in a couple of places - remove the skipwhitespc parameter and whitespace skipping checks from lexCurrentChar() and lexGetLookAheadChar() procedures. - instead, call lexEatWhitespace() explicitly when needed to skip whitepspace. Most lexer/parser loops already handle whitespace characters and tokens. --- changelog.txt | 2 ++ src/compiler/lex.bas | 47 ++++++++++++++++++------------------ src/compiler/lex.bi | 4 +-- src/compiler/pp-define.bas | 28 +++++++++++++-------- tests/pp/quote-operators.bas | 45 ++++++++++++++++++++++++++++++++++ tests/pp/quote.bas | 2 +- 6 files changed, 91 insertions(+), 37 deletions(-) create mode 100644 tests/pp/quote-operators.bas diff --git a/changelog.txt b/changelog.txt index 335d550a63..4ae7d8fd62 100644 --- a/changelog.txt +++ b/changelog.txt @@ -30,6 +30,8 @@ Version 1.10.0 - fbc: remove #pragma push( lookup108 ) to use symbol lookup method from fbc-1.08.x and earlier on unqualified symbol names - it was crutch for fb 1.09.0 to help with testing and transition to better lookups - fbc: passing byval constructor preference: prefer A.constructor( B ) instead of of B.operator CAST() as A + A.constructor() when passing UDT by value - this is to fix some inconsistencies in the rules when converting by construction one type to another - fbc: only up-cast initializers when needed. New behaviour is to try matching initializers without up-casting before matching with up-casting. +- fbc: don't join operators separated by white space. Previously, '<' + '=', '<' + '>', '=' + '>', and '-' + '>' were joined together even if separated by whitespace. +- fbc: don't skip whitespace between decimal point and digit. Previously, '.' + '0..9' floating point literals were allowed to be separated by whitespace. [added] - gas/gas64: '.cif_sections' and '.cif_' directives for stack unwinding (adeyblue) diff --git a/src/compiler/lex.bas b/src/compiler/lex.bas index c624f395a2..316221ea68 100644 --- a/src/compiler/lex.bas +++ b/src/compiler/lex.bas @@ -300,6 +300,23 @@ sub lexEatChar( ) end if end sub +function lexEatWhitespace( ) as integer + + function = FALSE + + if( lex.ctx->currchar = UINVALID ) then + lex.ctx->currchar = hReadChar( ) + end if + + do while( (lex.ctx->currchar = CHAR_TAB) or (lex.ctx->currchar = CHAR_SPACE) ) + lex.ctx->after_space = TRUE + lexEatChar( ) + lex.ctx->currchar = hReadChar( ) + function = TRUE + loop + +end function + ''::::: private sub hSkipChar @@ -335,21 +352,12 @@ end sub ''::::: function lexCurrentChar _ ( _ - byval skipwhitespc as integer = FALSE _ ) as uinteger if( lex.ctx->currchar = UINVALID ) then lex.ctx->currchar = hReadChar( ) end if - if( skipwhitespc ) then - do while( (lex.ctx->currchar = CHAR_TAB) or (lex.ctx->currchar = CHAR_SPACE) ) - lex.ctx->after_space = TRUE - lexEatChar( ) - lex.ctx->currchar = hReadChar( ) - loop - end if - function = lex.ctx->currchar end function @@ -357,7 +365,6 @@ end function ''::::: function lexGetLookAheadChar _ ( _ - byval skipwhitespc as integer = FALSE _ ) as uinteger if( lex.ctx->lahdchar1 = UINVALID ) then @@ -365,14 +372,6 @@ function lexGetLookAheadChar _ lex.ctx->lahdchar1 = hReadChar( ) end if - if( skipwhitespc ) then - do while( (lex.ctx->lahdchar1 = CHAR_TAB) or (lex.ctx->lahdchar1 = CHAR_SPACE) ) - lex.ctx->after_space = TRUE - hSkipChar( ) - lex.ctx->lahdchar1 = hReadChar( ) - loop - end if - function = lex.ctx->lahdchar1 end function @@ -1777,7 +1776,7 @@ re_read: case CHAR_DOT '' only check for fpoint literals if not inside a comment or parsing an $include if( (flags and (LEXCHECK_NOLINECONT or LEXCHECK_NOSUFFIX)) = 0 ) then - var lachar = lexGetLookAheadChar( TRUE ) + var lachar = lexGetLookAheadChar( ) '' '0' .. '9'? if( (lachar >= CHAR_0) and (lachar <= CHAR_9) ) then hReadNumber( *t, flags ) @@ -1882,7 +1881,7 @@ read_char: select case char case CHAR_LT - select case lexCurrentChar( TRUE ) + select case lexCurrentChar( ) '' '<='? case CHAR_EQ t->text[t->len+0] = CHAR_EQ @@ -1905,7 +1904,7 @@ read_char: case CHAR_GT '' '>='? - if( (fbGetGtInParensOnly( ) = FALSE) andalso (lexCurrentChar( TRUE ) = CHAR_EQ) ) then + if( (fbGetGtInParensOnly( ) = FALSE) andalso (lexCurrentChar( ) = CHAR_EQ) ) then t->text[t->len+0] = CHAR_EQ t->text[t->len+1] = 0 t->len += 1 @@ -1917,7 +1916,7 @@ read_char: case CHAR_EQ '' '=>'? - if( lexCurrentChar( TRUE ) = CHAR_GT ) then + if( lexCurrentChar( ) = CHAR_GT ) then t->text[t->len+0] = CHAR_GT t->text[t->len+1] = 0 t->len += 1 @@ -1937,7 +1936,7 @@ read_char: t->class = FB_TKCLASS_OPERATOR '' check for type-field dereference - if( lexCurrentChar( TRUE ) = CHAR_GT ) then + if( lexCurrentChar( ) = CHAR_GT ) then t->text[t->len+0] = CHAR_GT t->text[t->len+1] = 0 t->len += 1 @@ -2013,7 +2012,7 @@ private sub hMultiLineComment( ) static cnt = 0 do - select case as const lexCurrentChar( TRUE ) + select case as const lexCurrentChar( ) '' EOF? case 0 errReportEx( FB_ERRMSG_EXPECTEDENDCOMMENT, NULL ) diff --git a/src/compiler/lex.bi b/src/compiler/lex.bi index c29a16b52f..00f8de5eae 100644 --- a/src/compiler/lex.bi +++ b/src/compiler/lex.bi @@ -229,12 +229,10 @@ declare sub lexNextToken _ declare function lexCurrentChar _ ( _ - byval skipwhitespc as integer = FALSE _ ) as uinteger declare function lexGetLookAheadChar _ ( _ - byval skipwhitespc as integer = FALSE _ ) as uinteger declare function lexGetLookAheadChar2 _ @@ -243,6 +241,8 @@ declare function lexGetLookAheadChar2 _ declare sub lexEatChar( ) +declare function lexEatWhitespace( ) as integer + declare function lexPeekCurrentLine _ ( _ byref token_pos as string, _ diff --git a/src/compiler/pp-define.bas b/src/compiler/pp-define.bas index f02e08b2df..287a70ae18 100644 --- a/src/compiler/pp-define.bas +++ b/src/compiler/pp-define.bas @@ -65,13 +65,14 @@ private function hLoadMacro _ function = -1 - var hasParens = false + var hasParens = FALSE '' TODO: we don't know if this paren is the start of the argument list '' or is part of the expression for the first argument. + var hasWhitespace = lexEatWhitespace( ) '' '('? - if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then + if( lexCurrentChar( ) = CHAR_LPRNT ) then hasParens = true else if( (pp.invoking > 0) or ((symbGetDefineFlags( s ) and FB_DEFINE_FLAGS_NEEDPARENS) <> 0 ) ) then @@ -380,9 +381,11 @@ private function hLoadDefine _ '' arg-less macro? if( symbGetDefineIsArgless( s ) ) then - var hasParens = false + var hasParens = FALSE + var hasWhitespace = lexEatWhitespace( ) + '' '('? - if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then + if( lexCurrentChar( ) = CHAR_LPRNT ) then hasParens = true else '' not an error, macro can be passed as param to other macros @@ -393,9 +396,10 @@ private function hLoadDefine _ if( hasParens ) then lexEatChar( ) + lexEatWhitespace( ) '' ')' - if( lexCurrentChar( TRUE ) <> CHAR_RPRNT ) then + if( lexCurrentChar( ) <> CHAR_RPRNT ) then errReport( FB_ERRMSG_EXPECTEDRPRNT ) else lexEatChar( ) @@ -451,13 +455,14 @@ private function hLoadMacroW _ function = -1 - var hasParens = false + var hasParens = FALSE '' TODO: we don't know if this paren is the start of the argument list '' or is part of the expression for the first argument. + var hasWhitespace = lexEatWhitespace( ) '' '('? - if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then + if( lexCurrentChar( ) = CHAR_LPRNT ) then hasParens = true else if( (pp.invoking > 0) or ((symbGetDefineFlags( s ) and FB_DEFINE_FLAGS_NEEDPARENS) <> 0 ) ) then @@ -775,9 +780,11 @@ private function hLoadDefineW _ else '' arg-less macro? if( symbGetDefineIsArgless( s ) ) then - var hasParens = false + var hasParens = FALSE + var hasWhitespace = lexEatWhitespace( ) + '' '('? - if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then + if( lexCurrentChar( ) = CHAR_LPRNT ) then hasParens = true else '' not an error, macro can be passed as param to other macros @@ -788,9 +795,10 @@ private function hLoadDefineW _ if( hasParens ) then lexEatChar( ) + lexEatWhitespace( ) '' ')' - if( lexCurrentChar( TRUE ) <> CHAR_RPRNT ) then + if( lexCurrentChar( ) <> CHAR_RPRNT ) then errReport( FB_ERRMSG_EXPECTEDRPRNT ) else lexEatChar( ) diff --git a/tests/pp/quote-operators.bas b/tests/pp/quote-operators.bas new file mode 100644 index 0000000000..e691044f1e --- /dev/null +++ b/tests/pp/quote-operators.bas @@ -0,0 +1,45 @@ +#include "fbcunit.bi" + +SUITE( fbc_tests.pp.quote_operators ) + + TEST( direct ) + + '' previously, these operators where joined together + '' because the lexer was skipping the whitespace in + '' between the operators. + + '' do a test for common operators and check + '' that whitespace is preserved for certain cases + + #macro check( arg, compare ) + scope + dim s as string + s = __fb_quote__( arg ) + CU_ASSERT_EQUAL( s, compare ) + end scope + #endmacro + + check(1 = 2, "1 = 2" ) + check(1 + 2, "1 + 2" ) + check(1 - 2, "1 - 2" ) + check(1 / 2, "1 / 2" ) + check(1 \ 2, "1 \ 2" ) + check(1 * 2, "1 * 2" ) + check(1 ^ 2, "1 ^ 2" ) + + check(1 < 2, "1 < 2" ) + check(1 > 2, "1 > 2" ) + + check(1 <> 2, "1 <> 2" ) + check(1 <= 2, "1 <= 2" ) + check(1 >= 2, "1 >= 2" ) + check(1 -> 2, "1 -> 2" ) + + check(1 < > 2, "1 < > 2" ) + check(1 < = 2, "1 < = 2" ) + check(1 > = 2, "1 > = 2" ) + check(1 - > 2, "1 - > 2" ) + + END_TEST + +END_SUITE diff --git a/tests/pp/quote.bas b/tests/pp/quote.bas index d5b940ac02..516e465659 100644 --- a/tests/pp/quote.bas +++ b/tests/pp/quote.bas @@ -14,7 +14,7 @@ SUITE( fbc_tests.pp.quote ) res += """" function = res end function - + TEST( direct ) CU_ASSERT_EQUAL( __FB_QUOTE__( freebasic ), "freebasic" )