Skip to content

Commit

Permalink
fbc: respect whitespace between operators
Browse files Browse the repository at this point in the history
- don't join operators separated by whitespace.
- Previously, '<' + '=', '<' + '>', '=' + '>', and '-' + '>' were
  joined together in to one token even if separated by whitespace.
- don't skip whitespace between decimal point and digit. Previously,
  '.' + '0..9' floating point literals were allowed to be separated
  by whitespace.

fbc: refactor lexCurrentChar()

- add lexEatWhitespace() specifically for testing for whitespace
  and skipping over over whitespace.  It's only used in a couple of
  places
- remove the skipwhitespc parameter and whitespace skipping checks
  from lexCurrentChar() and lexGetLookAheadChar() procedures.
- instead, call lexEatWhitespace() explicitly when needed to skip
  whitepspace.  Most lexer/parser loops already handle whitespace
  characters and tokens.
  • Loading branch information
jayrm committed Feb 5, 2023
1 parent 38bfcc6 commit 7439a34
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 37 deletions.
2 changes: 2 additions & 0 deletions changelog.txt
Expand Up @@ -30,6 +30,8 @@ Version 1.10.0
- fbc: remove #pragma push( lookup108 ) to use symbol lookup method from fbc-1.08.x and earlier on unqualified symbol names - it was crutch for fb 1.09.0 to help with testing and transition to better lookups
- fbc: passing byval constructor preference: prefer A.constructor( B ) instead of of B.operator CAST() as A + A.constructor() when passing UDT by value - this is to fix some inconsistencies in the rules when converting by construction one type to another
- fbc: only up-cast initializers when needed. New behaviour is to try matching initializers without up-casting before matching with up-casting.
- fbc: don't join operators separated by white space. Previously, '<' + '=', '<' + '>', '=' + '>', and '-' + '>' were joined together even if separated by whitespace.
- fbc: don't skip whitespace between decimal point and digit. Previously, '.' + '0..9' floating point literals were allowed to be separated by whitespace.

[added]
- gas/gas64: '.cif_sections' and '.cif_' directives for stack unwinding (adeyblue)
Expand Down
47 changes: 23 additions & 24 deletions src/compiler/lex.bas
Expand Up @@ -300,6 +300,23 @@ sub lexEatChar( )
end if
end sub

function lexEatWhitespace( ) as integer

function = FALSE

if( lex.ctx->currchar = UINVALID ) then
lex.ctx->currchar = hReadChar( )
end if

do while( (lex.ctx->currchar = CHAR_TAB) or (lex.ctx->currchar = CHAR_SPACE) )
lex.ctx->after_space = TRUE
lexEatChar( )
lex.ctx->currchar = hReadChar( )
function = TRUE
loop

end function

'':::::
private sub hSkipChar

Expand Down Expand Up @@ -335,44 +352,26 @@ end sub
'':::::
function lexCurrentChar _
( _
byval skipwhitespc as integer = FALSE _
) as uinteger

if( lex.ctx->currchar = UINVALID ) then
lex.ctx->currchar = hReadChar( )
end if

if( skipwhitespc ) then
do while( (lex.ctx->currchar = CHAR_TAB) or (lex.ctx->currchar = CHAR_SPACE) )
lex.ctx->after_space = TRUE
lexEatChar( )
lex.ctx->currchar = hReadChar( )
loop
end if

function = lex.ctx->currchar

end function

'':::::
function lexGetLookAheadChar _
( _
byval skipwhitespc as integer = FALSE _
) as uinteger

if( lex.ctx->lahdchar1 = UINVALID ) then
hSkipChar( )
lex.ctx->lahdchar1 = hReadChar( )
end if

if( skipwhitespc ) then
do while( (lex.ctx->lahdchar1 = CHAR_TAB) or (lex.ctx->lahdchar1 = CHAR_SPACE) )
lex.ctx->after_space = TRUE
hSkipChar( )
lex.ctx->lahdchar1 = hReadChar( )
loop
end if

function = lex.ctx->lahdchar1

end function
Expand Down Expand Up @@ -1777,7 +1776,7 @@ re_read:
case CHAR_DOT
'' only check for fpoint literals if not inside a comment or parsing an $include
if( (flags and (LEXCHECK_NOLINECONT or LEXCHECK_NOSUFFIX)) = 0 ) then
var lachar = lexGetLookAheadChar( TRUE )
var lachar = lexGetLookAheadChar( )
'' '0' .. '9'?
if( (lachar >= CHAR_0) and (lachar <= CHAR_9) ) then
hReadNumber( *t, flags )
Expand Down Expand Up @@ -1882,7 +1881,7 @@ read_char:

select case char
case CHAR_LT
select case lexCurrentChar( TRUE )
select case lexCurrentChar( )
'' '<='?
case CHAR_EQ
t->text[t->len+0] = CHAR_EQ
Expand All @@ -1905,7 +1904,7 @@ read_char:

case CHAR_GT
'' '>='?
if( (fbGetGtInParensOnly( ) = FALSE) andalso (lexCurrentChar( TRUE ) = CHAR_EQ) ) then
if( (fbGetGtInParensOnly( ) = FALSE) andalso (lexCurrentChar( ) = CHAR_EQ) ) then
t->text[t->len+0] = CHAR_EQ
t->text[t->len+1] = 0
t->len += 1
Expand All @@ -1917,7 +1916,7 @@ read_char:

case CHAR_EQ
'' '=>'?
if( lexCurrentChar( TRUE ) = CHAR_GT ) then
if( lexCurrentChar( ) = CHAR_GT ) then
t->text[t->len+0] = CHAR_GT
t->text[t->len+1] = 0
t->len += 1
Expand All @@ -1937,7 +1936,7 @@ read_char:
t->class = FB_TKCLASS_OPERATOR

'' check for type-field dereference
if( lexCurrentChar( TRUE ) = CHAR_GT ) then
if( lexCurrentChar( ) = CHAR_GT ) then
t->text[t->len+0] = CHAR_GT
t->text[t->len+1] = 0
t->len += 1
Expand Down Expand Up @@ -2013,7 +2012,7 @@ private sub hMultiLineComment( ) static

cnt = 0
do
select case as const lexCurrentChar( TRUE )
select case as const lexCurrentChar( )
'' EOF?
case 0
errReportEx( FB_ERRMSG_EXPECTEDENDCOMMENT, NULL )
Expand Down
4 changes: 2 additions & 2 deletions src/compiler/lex.bi
Expand Up @@ -229,12 +229,10 @@ declare sub lexNextToken _

declare function lexCurrentChar _
( _
byval skipwhitespc as integer = FALSE _
) as uinteger

declare function lexGetLookAheadChar _
( _
byval skipwhitespc as integer = FALSE _
) as uinteger

declare function lexGetLookAheadChar2 _
Expand All @@ -243,6 +241,8 @@ declare function lexGetLookAheadChar2 _

declare sub lexEatChar( )

declare function lexEatWhitespace( ) as integer

declare function lexPeekCurrentLine _
( _
byref token_pos as string, _
Expand Down
28 changes: 18 additions & 10 deletions src/compiler/pp-define.bas
Expand Up @@ -65,13 +65,14 @@ private function hLoadMacro _

function = -1

var hasParens = false
var hasParens = FALSE

'' TODO: we don't know if this paren is the start of the argument list
'' or is part of the expression for the first argument.
var hasWhitespace = lexEatWhitespace( )

'' '('?
if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
if( lexCurrentChar( ) = CHAR_LPRNT ) then
hasParens = true
else
if( (pp.invoking > 0) or ((symbGetDefineFlags( s ) and FB_DEFINE_FLAGS_NEEDPARENS) <> 0 ) ) then
Expand Down Expand Up @@ -380,9 +381,11 @@ private function hLoadDefine _

'' arg-less macro?
if( symbGetDefineIsArgless( s ) ) then
var hasParens = false
var hasParens = FALSE
var hasWhitespace = lexEatWhitespace( )

'' '('?
if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
if( lexCurrentChar( ) = CHAR_LPRNT ) then
hasParens = true
else
'' not an error, macro can be passed as param to other macros
Expand All @@ -393,9 +396,10 @@ private function hLoadDefine _

if( hasParens ) then
lexEatChar( )
lexEatWhitespace( )

'' ')'
if( lexCurrentChar( TRUE ) <> CHAR_RPRNT ) then
if( lexCurrentChar( ) <> CHAR_RPRNT ) then
errReport( FB_ERRMSG_EXPECTEDRPRNT )
else
lexEatChar( )
Expand Down Expand Up @@ -451,13 +455,14 @@ private function hLoadMacroW _

function = -1

var hasParens = false
var hasParens = FALSE

'' TODO: we don't know if this paren is the start of the argument list
'' or is part of the expression for the first argument.
var hasWhitespace = lexEatWhitespace( )

'' '('?
if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
if( lexCurrentChar( ) = CHAR_LPRNT ) then
hasParens = true
else
if( (pp.invoking > 0) or ((symbGetDefineFlags( s ) and FB_DEFINE_FLAGS_NEEDPARENS) <> 0 ) ) then
Expand Down Expand Up @@ -775,9 +780,11 @@ private function hLoadDefineW _
else
'' arg-less macro?
if( symbGetDefineIsArgless( s ) ) then
var hasParens = false
var hasParens = FALSE
var hasWhitespace = lexEatWhitespace( )

'' '('?
if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
if( lexCurrentChar( ) = CHAR_LPRNT ) then
hasParens = true
else
'' not an error, macro can be passed as param to other macros
Expand All @@ -788,9 +795,10 @@ private function hLoadDefineW _

if( hasParens ) then
lexEatChar( )
lexEatWhitespace( )

'' ')'
if( lexCurrentChar( TRUE ) <> CHAR_RPRNT ) then
if( lexCurrentChar( ) <> CHAR_RPRNT ) then
errReport( FB_ERRMSG_EXPECTEDRPRNT )
else
lexEatChar( )
Expand Down
45 changes: 45 additions & 0 deletions tests/pp/quote-operators.bas
@@ -0,0 +1,45 @@
#include "fbcunit.bi"

SUITE( fbc_tests.pp.quote_operators )

TEST( direct )

'' previously, these operators where joined together
'' because the lexer was skipping the whitespace in
'' between the operators.

'' do a test for common operators and check
'' that whitespace is preserved for certain cases

#macro check( arg, compare )
scope
dim s as string
s = __fb_quote__( arg )
CU_ASSERT_EQUAL( s, compare )
end scope
#endmacro

check(1 = 2, "1 = 2" )
check(1 + 2, "1 + 2" )
check(1 - 2, "1 - 2" )
check(1 / 2, "1 / 2" )
check(1 \ 2, "1 \ 2" )
check(1 * 2, "1 * 2" )
check(1 ^ 2, "1 ^ 2" )

check(1 < 2, "1 < 2" )
check(1 > 2, "1 > 2" )

check(1 <> 2, "1 <> 2" )
check(1 <= 2, "1 <= 2" )
check(1 >= 2, "1 >= 2" )
check(1 -> 2, "1 -> 2" )

check(1 < > 2, "1 < > 2" )
check(1 < = 2, "1 < = 2" )
check(1 > = 2, "1 > = 2" )
check(1 - > 2, "1 - > 2" )

END_TEST

END_SUITE
2 changes: 1 addition & 1 deletion tests/pp/quote.bas
Expand Up @@ -14,7 +14,7 @@ SUITE( fbc_tests.pp.quote )
res += """"
function = res
end function

TEST( direct )

CU_ASSERT_EQUAL( __FB_QUOTE__( freebasic ), "freebasic" )
Expand Down

0 comments on commit 7439a34

Please sign in to comment.