From a6ca2785f7122ec6977084a99918d15e435e95fd Mon Sep 17 00:00:00 2001
From: coderJeff <coder@execulink.com>
Date: Sat, 20 Apr 2019 02:19:46 -0400
Subject: [PATCH 1/2] bugfix: sf.net #794 literal tabs in string literals
 miscompiled if followed by 0-9 numeric chars

- when emitting octal escapes, always write out 3 digit octal escape codes "\nnn".
---
 changelog.txt                |  3 ++-
 src/compiler/hlp-str.bas     |  8 ++++++--
 tests/string/literal-tab.bas | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 3 deletions(-)
 create mode 100644 tests/string/literal-tab.bas

diff --git a/changelog.txt b/changelog.txt
index b6003d5572..7100e9729c 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -6,9 +6,10 @@ Version 1.07.0
 - CVA_LIST type, CVA_START(), CVA_COPY() CVA_END(), CVA_ARG() macros will map to gcc's __builtin_va_list and __builtin_va_* macros in gcc backend
 
 [fixed]
-- #881: C backend: support for varadic function parameters in gcc using __builtin_va_list type and related macros
+- sf.net #881: C backend: support for varadic function parameters in gcc using __builtin_va_list type and related macros
 - push/pop correct GL_PROJECTION matrix and GL_MODELVIEW matrix when setting up graphics screen (gothon)
 - fix char & wchar concatentation.  For char double byte characters (such as Chinese characters), the "wstr = char & wchar" would become "wstr = char & chrw(0) & wchar" (SkyFish)
+- sf.net #794: literal tabs in string literals miscompiled if followed by 0-9 numeric chars
 
 
 Version 1.06.0
diff --git a/src/compiler/hlp-str.bas b/src/compiler/hlp-str.bas
index 8b3c6b2d2f..2f718e9293 100644
--- a/src/compiler/hlp-str.bas
+++ b/src/compiler/hlp-str.bas
@@ -812,11 +812,15 @@ function hEscape _
 			dst += 1
 
 			if( c < 8 ) then
+				dst[0] = CHAR_0
+				dst[1] = CHAR_0
+				dst += 2
 				c += CHAR_0
 
 			elseif( c < 64 ) then
-				*dst = CHAR_0 + (c shr 3)
-				dst += 1
+				dst[0] = CHAR_0
+				dst[1] = CHAR_0 + (c shr 3)
+				dst += 2
 				c = CHAR_0 + (c and 7)
 
 			else
diff --git a/tests/string/literal-tab.bas b/tests/string/literal-tab.bas
new file mode 100644
index 0000000000..520eb3db87
--- /dev/null
+++ b/tests/string/literal-tab.bas
@@ -0,0 +1,34 @@
+#include "fbcunit.bi"
+
+SUITE( fbc_tests.string_.literal_tab )
+
+	TEST( default)
+
+		'' test that literal tabs in the source file
+		'' are correctly escaped and emitted.
+
+		'' careful - string is 'x' chr(9) '0' 'x' chr(9) '0' '9'
+		dim as string a = "x	0x	1x"
+		dim as string b = "x" & chr(9) & "0x" & chr(9) & "1x"
+		dim as string c = !"x\t0x\t1x"
+		dim as ubyte d(0 to 6) = { asc("x"), 9, asc("0"), asc("x"), _
+			9, asc("1"), asc("x") }
+
+		CU_ASSERT( len("x	0x	1x") = 7 )
+		CU_ASSERT( len( a ) = 7 )
+		CU_ASSERT( len( b ) = 7 )
+		CU_ASSERT( len( c ) = 7 )
+
+		CU_ASSERT( a = "x	0x	1x" )
+		CU_ASSERT( b = a )
+		CU_ASSERT( c = a )
+
+		for i as integer = 0 to 6
+			CU_ASSERT( a[i] = d(i) )
+			CU_ASSERT( b[i] = d(i) )
+			CU_ASSERT( c[i] = d(i) )
+		next
+
+	END_TEST
+
+END_SUITE

From 20ef868dd14ef42bd077333906ccf797a8fca711 Mon Sep 17 00:00:00 2001
From: coderJeff <coder@execulink.com>
Date: Sat, 20 Apr 2019 02:23:09 -0400
Subject: [PATCH 2/2] bugfix: sf.net #897 length of literal wstring is
 miscalculated at compile time.  Compile time evaluation of len(!"\u1234") was
 using the internal escaped string length instead of the actual codepoint
 length

---
 changelog.txt                      |  1 +
 src/compiler/parser-quirk-math.bas |  5 +-
 tests/wstring/len.bas              | 79 +++++++++++++++++++++++-------
 3 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/changelog.txt b/changelog.txt
index 7100e9729c..52fe2eda6a 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -10,6 +10,7 @@ Version 1.07.0
 - push/pop correct GL_PROJECTION matrix and GL_MODELVIEW matrix when setting up graphics screen (gothon)
 - fix char & wchar concatentation.  For char double byte characters (such as Chinese characters), the "wstr = char & wchar" would become "wstr = char & chrw(0) & wchar" (SkyFish)
 - sf.net #794: literal tabs in string literals miscompiled if followed by 0-9 numeric chars
+- sf.net #897: length of literal wstring is miscalculated at compile time.  Compile time evaluation of len(!"\u1234") was using the internal escaped string length instead of the actual codepoint length
 
 
 Version 1.06.0
diff --git a/src/compiler/parser-quirk-math.bas b/src/compiler/parser-quirk-math.bas
index a25e31c6a3..96f3f194ef 100644
--- a/src/compiler/parser-quirk-math.bas
+++ b/src/compiler/parser-quirk-math.bas
@@ -74,7 +74,10 @@ private function hLen _
 		end if
 
 		'' String literal, evaluate at compile-time
-		lgt = symbGetWstrLen( litsym ) - 1
+		'' symbGetStrLen( litsym ) will return the number of codepoints
+		'' that are used to store the escaped WSTRING literal, when what
+		'' we really want is the number od codepoints unescaped.
+		lgt = len( *hUnescapeW( symbGetVarLitTextW( litsym ) ) )
 
 	case FB_DATATYPE_FIXSTR
 		'' len( fixstr ) returns the N from STRING * N, i.e. it works
diff --git a/tests/wstring/len.bas b/tests/wstring/len.bas
index 87c11e039c..dbbf39d005 100644
--- a/tests/wstring/len.bas
+++ b/tests/wstring/len.bas
@@ -2,27 +2,70 @@
 
 SUITE( fbc_tests.wstring_.len_ )
 
-	const TEST_LIT = "abcdef"
+	'' len( [w]str( literal ) ) is evaluated at compile time
+	'' check that the compile time calculation matches the
+	'' run time library calculation
+
+	const LIT_A1 = "abcdef"
+	const LIT_W1 = wstr( "abcdef" )
+	const LIT_W2 = !"bcd\u0065\u0066\u0067"
+	const LIT_W3 = !"\u0063\u0064\u0065\u0065\u0067\u0068"
+
+	const CODE_LEN = 6
+
 	const TEST_LEN = 32
 	const TEST_SIZ = TEST_LEN * len( wstring )
 
-	TEST( default )
-
-		dim s as wstring * TEST_LEN
-		dim ps as wstring ptr
-		
-		s = TEST_LIT
-		ps = @s
-		
-		CU_ASSERT( len( s ) = len( TEST_LIT ) )
-		
-		CU_ASSERT( sizeof( s ) = TEST_SIZ )
-		
-		CU_ASSERT( len( *ps ) = len( TEST_LIT ) )
-		
-		CU_ASSERT( len( ps ) = len( any ptr ) )
-		
-		CU_ASSERT( sizeof( ps ) = sizeof( any ptr ) )
+	#macro do_test( init_string )
+
+		scope
+			dim s as wstring * TEST_LEN
+			dim ps as wstring ptr
+			
+			s = init_string
+			ps = @s
+			
+			CU_ASSERT( len( s ) = CODE_LEN )
+			CU_ASSERT( len( *ps ) = CODE_LEN )
+
+			CU_ASSERT( len( s ) = len( init_string ) )
+			
+			CU_ASSERT( sizeof( s ) = TEST_SIZ )
+			
+			CU_ASSERT( len( *ps ) = len( init_string ) )
+			
+			CU_ASSERT( len( ps ) = len( any ptr ) )
+			
+			CU_ASSERT( sizeof( ps ) = sizeof( any ptr ) )
+		end scope
+
+	#endmacro
+
+	TEST( literal )
+
+		do_test( "abcdef" )
+		do_test( wstr( "abcdef" ) )
+		do_test( !"bcd\u0065\u0066\u0067" )
+		do_test( !"\u0063\u0064\u0065\u0065\u0067\u0068" )
+
+		do_test( LIT_A1 )
+		do_test( LIT_W1 )
+		do_test( LIT_W2 )
+		do_test( LIT_W3 )
+
+	END_TEST
+
+	TEST( literal_const )
+
+		do_test( "abcdef" )
+		do_test( wstr( "abcdef" ) )
+		do_test( !"bcd\u0065\u0066\u0067" )
+		do_test( !"\u0063\u0064\u0065\u0065\u0067\u0068" )
+
+		do_test( LIT_A1 )
+		do_test( LIT_W1 )
+		do_test( LIT_W2 )
+		do_test( LIT_W3 )
 
 	END_TEST