fbc: respect whitespace between operators

- don't join operators separated by whitespace. - Previously, '<' + '=', '<' + '>', '=' + '>', and '-' + '>' were joined together in to one token even if separated by whitespace. - don't skip whitespace between decimal point and digit. Previously, '.' + '0..9' floating point literals were allowed to be separated by whitespace. fbc: refactor lexCurrentChar() - add lexEatWhitespace() specifically for testing for whitespace and skipping over over whitespace. It's only used in a couple of places - remove the skipwhitespc parameter and whitespace skipping checks from lexCurrentChar() and lexGetLookAheadChar() procedures. - instead, call lexEatWhitespace() explicitly when needed to skip whitepspace. Most lexer/parser loops already handle whitespace characters and tokens.
freebasic · Feb 5, 2023 · 7439a34 · 7439a34
1 parent 38bfcc6
commit 7439a34
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 37 deletions.
diff --git a/changelog.txt b/changelog.txt
@@ -30,6 +30,8 @@ Version 1.10.0
 - fbc: remove #pragma push( lookup108 ) to use symbol lookup method from fbc-1.08.x and earlier on unqualified symbol names - it was crutch for fb 1.09.0 to help with testing and transition to better lookups
 - fbc: passing byval constructor preference: prefer A.constructor( B ) instead of of B.operator CAST() as A + A.constructor() when passing UDT by value - this is to fix some inconsistencies in the rules when converting by construction one type to another
 - fbc: only up-cast initializers when needed.  New behaviour is to try matching initializers without up-casting before matching with up-casting.
+- fbc: don't join operators separated by white space.  Previously, '<' + '=', '<' + '>', '=' + '>', and '-' + '>' were joined together even if separated by whitespace.
+- fbc: don't skip whitespace between decimal point and digit. Previously, '.' + '0..9' floating point literals were allowed to be separated by whitespace.
 
 [added]
 - gas/gas64: '.cif_sections' and '.cif_' directives for stack unwinding (adeyblue)

diff --git a/src/compiler/lex.bas b/src/compiler/lex.bas
@@ -300,6 +300,23 @@ sub lexEatChar( )
 	end if
 end sub
 
+function lexEatWhitespace( ) as integer
+
+	function = FALSE
+
+	if( lex.ctx->currchar = UINVALID ) then
+		lex.ctx->currchar = hReadChar( )
+	end if
+
+	do while( (lex.ctx->currchar = CHAR_TAB) or (lex.ctx->currchar = CHAR_SPACE) )
+		lex.ctx->after_space = TRUE
+		lexEatChar( )
+		lex.ctx->currchar = hReadChar( )
+		function = TRUE
+	loop
+
+end function
+
 '':::::
 private sub hSkipChar
 
@@ -335,44 +352,26 @@ end sub
 '':::::
 function lexCurrentChar _
 	( _
-		byval skipwhitespc as integer = FALSE _
 	) as uinteger
 
 	if( lex.ctx->currchar = UINVALID ) then
 		lex.ctx->currchar = hReadChar( )
 	end if
 
-	if( skipwhitespc ) then
-		do while( (lex.ctx->currchar = CHAR_TAB) or (lex.ctx->currchar = CHAR_SPACE) )
-			lex.ctx->after_space = TRUE
-			lexEatChar( )
-			lex.ctx->currchar = hReadChar( )
-		loop
-	end if
-
 	function = lex.ctx->currchar
 
 end function
 
 '':::::
 function lexGetLookAheadChar _
 	( _
-		byval skipwhitespc as integer = FALSE _
 	) as uinteger
 
 	if( lex.ctx->lahdchar1 = UINVALID ) then
 		hSkipChar( )
 		lex.ctx->lahdchar1 = hReadChar( )
 	end if
 
-	if( skipwhitespc ) then
-		do while( (lex.ctx->lahdchar1 = CHAR_TAB) or (lex.ctx->lahdchar1 = CHAR_SPACE) )
-			lex.ctx->after_space = TRUE
-			hSkipChar( )
-			lex.ctx->lahdchar1 = hReadChar( )
-		loop
-	end if
-
 	function = lex.ctx->lahdchar1
 
 end function
@@ -1777,7 +1776,7 @@ re_read:
 	case CHAR_DOT
 		'' only check for fpoint literals if not inside a comment or parsing an $include
 		if( (flags and (LEXCHECK_NOLINECONT or LEXCHECK_NOSUFFIX)) = 0 ) then
-			var lachar = lexGetLookAheadChar( TRUE )
+			var lachar = lexGetLookAheadChar( )
 			'' '0' .. '9'?
 			if( (lachar >= CHAR_0) and (lachar <= CHAR_9) ) then
 				hReadNumber( *t, flags )
@@ -1882,7 +1881,7 @@ read_char:
 
 			select case char
 			case CHAR_LT
-				select case lexCurrentChar( TRUE )
+				select case lexCurrentChar( )
 				'' '<='?
 				case CHAR_EQ
 					t->text[t->len+0] = CHAR_EQ
@@ -1905,7 +1904,7 @@ read_char:
 
 			case CHAR_GT
 				'' '>='?
-				if( (fbGetGtInParensOnly( ) = FALSE) andalso (lexCurrentChar( TRUE ) = CHAR_EQ) ) then
+				if( (fbGetGtInParensOnly( ) = FALSE) andalso (lexCurrentChar( ) = CHAR_EQ) ) then
 					t->text[t->len+0] = CHAR_EQ
 					t->text[t->len+1] = 0
 					t->len += 1
@@ -1917,7 +1916,7 @@ read_char:
 
 			case CHAR_EQ
 				'' '=>'?
-				if( lexCurrentChar( TRUE ) = CHAR_GT ) then
+				if( lexCurrentChar( ) = CHAR_GT ) then
 					t->text[t->len+0] = CHAR_GT
 					t->text[t->len+1] = 0
 					t->len += 1
@@ -1937,7 +1936,7 @@ read_char:
 			t->class = FB_TKCLASS_OPERATOR
 
 			'' check for type-field dereference
-			if( lexCurrentChar( TRUE ) = CHAR_GT ) then
+			if( lexCurrentChar( ) = CHAR_GT ) then
 				t->text[t->len+0] = CHAR_GT
 				t->text[t->len+1] = 0
 				t->len += 1
@@ -2013,7 +2012,7 @@ private sub hMultiLineComment( ) static
 
 	cnt = 0
 	do
-		select case as const lexCurrentChar( TRUE )
+		select case as const lexCurrentChar( )
 		'' EOF?
 		case 0
 			errReportEx( FB_ERRMSG_EXPECTEDENDCOMMENT, NULL )

diff --git a/src/compiler/lex.bi b/src/compiler/lex.bi
@@ -229,12 +229,10 @@ declare sub lexNextToken _
 
 declare function lexCurrentChar _
 	( _
-		byval skipwhitespc as integer = FALSE _
 	) as uinteger
 
 declare function lexGetLookAheadChar _
 	( _
-		byval skipwhitespc as integer = FALSE _
 	) as uinteger
 
 declare function lexGetLookAheadChar2 _
@@ -243,6 +241,8 @@ declare function lexGetLookAheadChar2 _
 
 declare sub lexEatChar( )
 
+declare function lexEatWhitespace( ) as integer
+
 declare function lexPeekCurrentLine _
 	( _
 		byref token_pos as string, _

diff --git a/src/compiler/pp-define.bas b/src/compiler/pp-define.bas
@@ -65,13 +65,14 @@ private function hLoadMacro _
 
 	function = -1
 
-	var hasParens = false
+	var hasParens = FALSE
 
 	'' TODO: we don't know if this paren is the start of the argument list
 	'' or is part of the expression for the first argument.
+	var hasWhitespace = lexEatWhitespace( )
 
 	'' '('?
-	if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
+	if( lexCurrentChar( ) = CHAR_LPRNT ) then
 		hasParens = true
 	else
 		if( (pp.invoking > 0) or ((symbGetDefineFlags( s ) and FB_DEFINE_FLAGS_NEEDPARENS) <> 0 ) ) then
@@ -380,9 +381,11 @@ private function hLoadDefine _
 
 			'' arg-less macro?
 			if( symbGetDefineIsArgless( s ) ) then
-				var hasParens = false
+				var hasParens = FALSE
+				var hasWhitespace = lexEatWhitespace( )
+
 				'' '('?
-				if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
+				if( lexCurrentChar( ) = CHAR_LPRNT ) then
 					hasParens = true
 				else
 					'' not an error, macro can be passed as param to other macros
@@ -393,9 +396,10 @@ private function hLoadDefine _
 
 				if( hasParens ) then
 					lexEatChar( )
+					lexEatWhitespace( )
 
 					'' ')'
-					if( lexCurrentChar( TRUE ) <> CHAR_RPRNT ) then
+					if( lexCurrentChar( ) <> CHAR_RPRNT ) then
 						errReport( FB_ERRMSG_EXPECTEDRPRNT )
 					else
 						lexEatChar( )
@@ -451,13 +455,14 @@ private function hLoadMacroW _
 
 	function = -1
 
-	var hasParens = false
+	var hasParens = FALSE
 
 	'' TODO: we don't know if this paren is the start of the argument list
 	'' or is part of the expression for the first argument.
+	var hasWhitespace = lexEatWhitespace( )
 
 	'' '('?
-	if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
+	if( lexCurrentChar( ) = CHAR_LPRNT ) then
 		hasParens = true
 	else
 		if( (pp.invoking > 0) or ((symbGetDefineFlags( s ) and FB_DEFINE_FLAGS_NEEDPARENS) <> 0 ) ) then
@@ -775,9 +780,11 @@ private function hLoadDefineW _
 		else
 			'' arg-less macro?
 			if( symbGetDefineIsArgless( s ) ) then
-				var hasParens = false
+				var hasParens = FALSE
+				var hasWhitespace = lexEatWhitespace( )
+
 				'' '('?
-				if( lexCurrentChar( TRUE ) = CHAR_LPRNT ) then
+				if( lexCurrentChar( ) = CHAR_LPRNT ) then
 					hasParens = true
 				else
 					'' not an error, macro can be passed as param to other macros
@@ -788,9 +795,10 @@ private function hLoadDefineW _
 
 				if( hasParens ) then
 					lexEatChar( )
+					lexEatWhitespace( )
 
 					'' ')'
-					if( lexCurrentChar( TRUE ) <> CHAR_RPRNT ) then
+					if( lexCurrentChar( ) <> CHAR_RPRNT ) then
 						errReport( FB_ERRMSG_EXPECTEDRPRNT )
 					else
 						lexEatChar( )

diff --git a/tests/pp/quote-operators.bas b/tests/pp/quote-operators.bas
@@ -0,0 +1,45 @@
+#include "fbcunit.bi"
+
+SUITE( fbc_tests.pp.quote_operators )
+
+	TEST( direct )
+
+		'' previously, these operators where joined together
+		'' because the lexer was skipping the whitespace in
+		'' between the operators.
+
+		'' do a test for common operators and check
+		'' that whitespace is preserved for certain cases
+
+		#macro check( arg, compare )
+			scope
+				dim s as string
+				s = __fb_quote__( arg )
+				CU_ASSERT_EQUAL( s, compare )
+			end scope
+		#endmacro
+
+		check(1 = 2, "1 = 2" )
+		check(1 + 2, "1 + 2" )
+		check(1 - 2, "1 - 2" )
+		check(1 / 2, "1 / 2" )
+		check(1 \ 2, "1 \ 2" )
+		check(1 * 2, "1 * 2" )
+		check(1 ^ 2, "1 ^ 2" )
+
+		check(1 < 2, "1 < 2" )
+		check(1 > 2, "1 > 2" )
+
+		check(1 <> 2, "1 <> 2" )
+		check(1 <= 2, "1 <= 2" )
+		check(1 >= 2, "1 >= 2" )
+		check(1 -> 2, "1 -> 2" )
+
+		check(1 < > 2, "1 < > 2" )
+		check(1 < = 2, "1 < = 2" )
+		check(1 > = 2, "1 > = 2" )
+		check(1 - > 2, "1 - > 2" )
+
+	END_TEST
+
+END_SUITE
diff --git a/tests/pp/quote.bas b/tests/pp/quote.bas
@@ -14,7 +14,7 @@ SUITE( fbc_tests.pp.quote )
 		res += """"
 		function = res
 	end function
-	
+
 	TEST( direct )
 
 		CU_ASSERT_EQUAL( __FB_QUOTE__( freebasic ), "freebasic" )