From a6ca2785f7122ec6977084a99918d15e435e95fd Mon Sep 17 00:00:00 2001 From: coderJeff Date: Sat, 20 Apr 2019 02:19:46 -0400 Subject: [PATCH 1/2] bugfix: sf.net #794 literal tabs in string literals miscompiled if followed by 0-9 numeric chars - when emitting octal escapes, always write out 3 digit octal escape codes "\nnn". --- changelog.txt | 3 ++- src/compiler/hlp-str.bas | 8 ++++++-- tests/string/literal-tab.bas | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 tests/string/literal-tab.bas diff --git a/changelog.txt b/changelog.txt index b6003d5572..7100e9729c 100644 --- a/changelog.txt +++ b/changelog.txt @@ -6,9 +6,10 @@ Version 1.07.0 - CVA_LIST type, CVA_START(), CVA_COPY() CVA_END(), CVA_ARG() macros will map to gcc's __builtin_va_list and __builtin_va_* macros in gcc backend [fixed] -- #881: C backend: support for varadic function parameters in gcc using __builtin_va_list type and related macros +- sf.net #881: C backend: support for varadic function parameters in gcc using __builtin_va_list type and related macros - push/pop correct GL_PROJECTION matrix and GL_MODELVIEW matrix when setting up graphics screen (gothon) - fix char & wchar concatentation. For char double byte characters (such as Chinese characters), the "wstr = char & wchar" would become "wstr = char & chrw(0) & wchar" (SkyFish) +- sf.net #794: literal tabs in string literals miscompiled if followed by 0-9 numeric chars Version 1.06.0 diff --git a/src/compiler/hlp-str.bas b/src/compiler/hlp-str.bas index 8b3c6b2d2f..2f718e9293 100644 --- a/src/compiler/hlp-str.bas +++ b/src/compiler/hlp-str.bas @@ -812,11 +812,15 @@ function hEscape _ dst += 1 if( c < 8 ) then + dst[0] = CHAR_0 + dst[1] = CHAR_0 + dst += 2 c += CHAR_0 elseif( c < 64 ) then - *dst = CHAR_0 + (c shr 3) - dst += 1 + dst[0] = CHAR_0 + dst[1] = CHAR_0 + (c shr 3) + dst += 2 c = CHAR_0 + (c and 7) else diff --git a/tests/string/literal-tab.bas b/tests/string/literal-tab.bas new file mode 100644 index 0000000000..520eb3db87 --- /dev/null +++ b/tests/string/literal-tab.bas @@ -0,0 +1,34 @@ +#include "fbcunit.bi" + +SUITE( fbc_tests.string_.literal_tab ) + + TEST( default) + + '' test that literal tabs in the source file + '' are correctly escaped and emitted. + + '' careful - string is 'x' chr(9) '0' 'x' chr(9) '0' '9' + dim as string a = "x 0x 1x" + dim as string b = "x" & chr(9) & "0x" & chr(9) & "1x" + dim as string c = !"x\t0x\t1x" + dim as ubyte d(0 to 6) = { asc("x"), 9, asc("0"), asc("x"), _ + 9, asc("1"), asc("x") } + + CU_ASSERT( len("x 0x 1x") = 7 ) + CU_ASSERT( len( a ) = 7 ) + CU_ASSERT( len( b ) = 7 ) + CU_ASSERT( len( c ) = 7 ) + + CU_ASSERT( a = "x 0x 1x" ) + CU_ASSERT( b = a ) + CU_ASSERT( c = a ) + + for i as integer = 0 to 6 + CU_ASSERT( a[i] = d(i) ) + CU_ASSERT( b[i] = d(i) ) + CU_ASSERT( c[i] = d(i) ) + next + + END_TEST + +END_SUITE From 20ef868dd14ef42bd077333906ccf797a8fca711 Mon Sep 17 00:00:00 2001 From: coderJeff Date: Sat, 20 Apr 2019 02:23:09 -0400 Subject: [PATCH 2/2] bugfix: sf.net #897 length of literal wstring is miscalculated at compile time. Compile time evaluation of len(!"\u1234") was using the internal escaped string length instead of the actual codepoint length --- changelog.txt | 1 + src/compiler/parser-quirk-math.bas | 5 +- tests/wstring/len.bas | 79 +++++++++++++++++++++++------- 3 files changed, 66 insertions(+), 19 deletions(-) diff --git a/changelog.txt b/changelog.txt index 7100e9729c..52fe2eda6a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -10,6 +10,7 @@ Version 1.07.0 - push/pop correct GL_PROJECTION matrix and GL_MODELVIEW matrix when setting up graphics screen (gothon) - fix char & wchar concatentation. For char double byte characters (such as Chinese characters), the "wstr = char & wchar" would become "wstr = char & chrw(0) & wchar" (SkyFish) - sf.net #794: literal tabs in string literals miscompiled if followed by 0-9 numeric chars +- sf.net #897: length of literal wstring is miscalculated at compile time. Compile time evaluation of len(!"\u1234") was using the internal escaped string length instead of the actual codepoint length Version 1.06.0 diff --git a/src/compiler/parser-quirk-math.bas b/src/compiler/parser-quirk-math.bas index a25e31c6a3..96f3f194ef 100644 --- a/src/compiler/parser-quirk-math.bas +++ b/src/compiler/parser-quirk-math.bas @@ -74,7 +74,10 @@ private function hLen _ end if '' String literal, evaluate at compile-time - lgt = symbGetWstrLen( litsym ) - 1 + '' symbGetStrLen( litsym ) will return the number of codepoints + '' that are used to store the escaped WSTRING literal, when what + '' we really want is the number od codepoints unescaped. + lgt = len( *hUnescapeW( symbGetVarLitTextW( litsym ) ) ) case FB_DATATYPE_FIXSTR '' len( fixstr ) returns the N from STRING * N, i.e. it works diff --git a/tests/wstring/len.bas b/tests/wstring/len.bas index 87c11e039c..dbbf39d005 100644 --- a/tests/wstring/len.bas +++ b/tests/wstring/len.bas @@ -2,27 +2,70 @@ SUITE( fbc_tests.wstring_.len_ ) - const TEST_LIT = "abcdef" + '' len( [w]str( literal ) ) is evaluated at compile time + '' check that the compile time calculation matches the + '' run time library calculation + + const LIT_A1 = "abcdef" + const LIT_W1 = wstr( "abcdef" ) + const LIT_W2 = !"bcd\u0065\u0066\u0067" + const LIT_W3 = !"\u0063\u0064\u0065\u0065\u0067\u0068" + + const CODE_LEN = 6 + const TEST_LEN = 32 const TEST_SIZ = TEST_LEN * len( wstring ) - TEST( default ) - - dim s as wstring * TEST_LEN - dim ps as wstring ptr - - s = TEST_LIT - ps = @s - - CU_ASSERT( len( s ) = len( TEST_LIT ) ) - - CU_ASSERT( sizeof( s ) = TEST_SIZ ) - - CU_ASSERT( len( *ps ) = len( TEST_LIT ) ) - - CU_ASSERT( len( ps ) = len( any ptr ) ) - - CU_ASSERT( sizeof( ps ) = sizeof( any ptr ) ) + #macro do_test( init_string ) + + scope + dim s as wstring * TEST_LEN + dim ps as wstring ptr + + s = init_string + ps = @s + + CU_ASSERT( len( s ) = CODE_LEN ) + CU_ASSERT( len( *ps ) = CODE_LEN ) + + CU_ASSERT( len( s ) = len( init_string ) ) + + CU_ASSERT( sizeof( s ) = TEST_SIZ ) + + CU_ASSERT( len( *ps ) = len( init_string ) ) + + CU_ASSERT( len( ps ) = len( any ptr ) ) + + CU_ASSERT( sizeof( ps ) = sizeof( any ptr ) ) + end scope + + #endmacro + + TEST( literal ) + + do_test( "abcdef" ) + do_test( wstr( "abcdef" ) ) + do_test( !"bcd\u0065\u0066\u0067" ) + do_test( !"\u0063\u0064\u0065\u0065\u0067\u0068" ) + + do_test( LIT_A1 ) + do_test( LIT_W1 ) + do_test( LIT_W2 ) + do_test( LIT_W3 ) + + END_TEST + + TEST( literal_const ) + + do_test( "abcdef" ) + do_test( wstr( "abcdef" ) ) + do_test( !"bcd\u0065\u0066\u0067" ) + do_test( !"\u0063\u0064\u0065\u0065\u0067\u0068" ) + + do_test( LIT_A1 ) + do_test( LIT_W1 ) + do_test( LIT_W2 ) + do_test( LIT_W3 ) END_TEST