chakra-core · MSLaguana · Jan 29, 2018 · Feb 20, 2018 · MSLaguana · Jul 25, 2018
diff --git a/lib/Jsrt/Jsrt.cpp b/lib/Jsrt/Jsrt.cpp
@@ -3729,26 +3729,40 @@ JsErrorCode GetScriptBufferDetails(
     }
     const bool isUtf8 = !isString && !(parseAttributes & JsParseScriptAttributeArrayBufferIsUtf16Encoded);
 
-    *script = isExternalArray ?
-        ((Js::ExternalArrayBuffer*)(scriptVal))->GetBuffer() :
-        (const byte*)((Js::JavascriptString*)(scriptVal))->GetSz();
-    *cb = isExternalArray ?
-        ((Js::ExternalArrayBuffer*)(scriptVal))->GetByteLength() :
-        ((Js::JavascriptString*)(scriptVal))->GetSizeInBytes();
-
-    if (isExternalArray && isUtf8)
-    {
-        *scriptFlag = (LoadScriptFlag)(LoadScriptFlag_ExternalArrayBuffer | LoadScriptFlag_Utf8Source);
-    }
-    else if (isUtf8)
+    if (isExternalArray)
     {
-        *scriptFlag = (LoadScriptFlag)(LoadScriptFlag_Utf8Source);
+        Js::ExternalArrayBuffer* scriptBuffer = (Js::ExternalArrayBuffer*)(scriptVal);
+        *script = scriptBuffer->GetBuffer();
+        *cb = scriptBuffer->GetByteLength();
+        if (isUtf8)
+        {
+            *scriptFlag = (LoadScriptFlag)(LoadScriptFlag_ExternalArrayBuffer | LoadScriptFlag_Utf8Source);
+        }
+        else
+        {
+            *scriptFlag = (LoadScriptFlag)(LoadScriptFlag_ExternalArrayBuffer);
+        }
     }
     else
     {
-        *scriptFlag = LoadScriptFlag_None;
+        Js::JavascriptString* scriptString = (Js::JavascriptString*)(scriptVal);
+        if (Js::Utf8String::Is(scriptString))
+        {
+            Js::Utf8String * utf8String = Js::Utf8String::From(scriptString);
+            *script = (const byte*)utf8String->Utf8Buffer();
+            *cb = utf8String->Utf8Length();
+            *scriptFlag = (LoadScriptFlag)(LoadScriptFlag_Utf8Source);
+        }
+        else
+        {
+            return ContextAPINoScriptWrapper_NoRecord([&](Js::ScriptContext *scriptContext) -> JsErrorCode {
+                *script = (const byte*)scriptString->GetSz();
+                *cb = scriptString->GetSizeInBytes();
+                *scriptFlag = LoadScriptFlag_None;
+                return JsNoError;
+            });
+        }
     }
-
     return JsNoError;
 }
 
@@ -4781,9 +4795,17 @@ CHAKRA_API JsCreateString(
 
     return ContextAPINoScriptWrapper([&](Js::ScriptContext *scriptContext, TTDRecorder& _actionEntryPopper) -> JsErrorCode {
 
-        Js::JavascriptString *stringValue = Js::LiteralStringWithPropertyStringPtr::
-            NewFromCString(content, (CharCount)length, scriptContext->GetLibrary());
+        char * recyclerBuffer = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char, length);
+        if (recyclerBuffer == nullptr)
+        {
+            Js::JavascriptError::ThrowOutOfMemoryError(scriptContext);
+        }
 
+        memcpy_s(recyclerBuffer, length, content, length);
+
+        Js::JavascriptString *stringValue = RecyclerNew(scriptContext->GetRecycler(), Js::Utf8String, recyclerBuffer, length, scriptContext->GetLibrary()->GetStringTypeStatic());
+
+        // TODO: With TTD enabled we immediately flatten these strings to utf16. Perhaps we should handle this differently.
         PERFORM_JSRT_TTD_RECORD_ACTION(scriptContext, RecordJsRTCreateString, stringValue->GetSz(), stringValue->GetLength());
 
         *value = stringValue;
@@ -4934,44 +4956,20 @@ _ALWAYSINLINE JsErrorCode CompileRun(
     VALIDATE_JSREF(scriptVal);
     PARAM_NOT_NULL(sourceUrl);
 
-    bool isExternalArray = Js::ExternalArrayBuffer::Is(scriptVal),
-         isString = false;
-    bool isUtf8   = !(parseAttributes & JsParseScriptAttributeArrayBufferIsUtf16Encoded);
-
     LoadScriptFlag scriptFlag = LoadScriptFlag_None;
     const byte* script;
     size_t cb;
-    const WCHAR *url;
-
-    if (isExternalArray)
-    {
-        script = ((Js::ExternalArrayBuffer*)(scriptVal))->GetBuffer();
 
-        cb = ((Js::ExternalArrayBuffer*)(scriptVal))->GetByteLength();
+    JsErrorCode error = GetScriptBufferDetails(scriptVal, parseAttributes, &scriptFlag, &cb, &script);
 
-        scriptFlag = (LoadScriptFlag)(isUtf8 ?
-            LoadScriptFlag_ExternalArrayBuffer | LoadScriptFlag_Utf8Source :
-            LoadScriptFlag_ExternalArrayBuffer);
-    }
-    else
+    if (error != JsNoError)
     {
-        isString = Js::JavascriptString::Is(scriptVal);
-        if (!isString)
-        {
-            return JsErrorInvalidArgument;
-        }
+        return error;
     }
 
-    JsErrorCode error = GlobalAPIWrapper_NoRecord([&]() -> JsErrorCode {
-        if (isString)
-        {
-            Js::JavascriptString* jsString = Js::JavascriptString::FromVar(scriptVal);
-            script = (const byte*)jsString->GetSz();
-
-            // JavascriptString is 2 bytes (WCHAR/char16)
-            cb = jsString->GetLength() * sizeof(WCHAR);
-        }
+    const WCHAR *url;
 
+    error = GlobalAPIWrapper_NoRecord([&]() -> JsErrorCode {
         if (!Js::JavascriptString::Is(sourceUrl))
         {
             return JsErrorInvalidArgument;
@@ -5165,14 +5163,23 @@ CHAKRA_API JsRunSerialized(
 {
     PARAM_NOT_NULL(bufferVal);
     const WCHAR *url;
+    JsErrorCode errorCode = ContextAPINoScriptWrapper_NoRecord([&](Js::ScriptContext *scriptContext) -> JsErrorCode {
 
-    if (sourceUrl && Js::JavascriptString::Is(sourceUrl))
-    {
-        url = ((Js::JavascriptString*)(sourceUrl))->GetSz();
-    }
-    else
+        if (sourceUrl && Js::JavascriptString::Is(sourceUrl))
+        {
+            url = ((Js::JavascriptString*)(sourceUrl))->GetSz();
+        }
+        else
+        {
+            return JsErrorInvalidArgument;
+        }
+
+        return JsNoError;
+    });
+
+    if (errorCode != JsNoError)
     {
-        return JsErrorInvalidArgument;
+        return errorCode;
     }
 
     // JsParseSerialized only accepts ArrayBuffer (incl. ExternalArrayBuffer)
@@ -5689,12 +5696,23 @@ CHAKRA_API JsRunScriptWithParserState(
     const WCHAR *url = nullptr;
     uint sourceIndex = 0;
 
-    JsErrorCode errorCode = ContextAPINoScriptWrapper_NoRecord([&](Js::ScriptContext *scriptContext) -> JsErrorCode {
-        const byte* bytes;
-        size_t cb;
-        LoadScriptFlag loadScriptFlag;
+    const byte* bytes;
+    size_t cb;
+    LoadScriptFlag loadScriptFlag;
 
-        JsErrorCode errorCode = GetScriptBufferDetails(script, parseAttributes, &loadScriptFlag, &cb, &bytes);
+    JsErrorCode errorCode = GetScriptBufferDetails(script, parseAttributes, &loadScriptFlag, &cb, &bytes);
+
+    if (errorCode != JsNoError)
+    {
+        return errorCode;
+    }
+
+    if (!Js::ExternalArrayBuffer::Is(parserState))
+    {
+        return JsErrorInvalidArgument;
+    }
+
+    errorCode = ContextAPINoScriptWrapper_NoRecord([&](Js::ScriptContext *scriptContext) -> JsErrorCode {
 
         if (sourceUrl && Js::JavascriptString::Is(sourceUrl))
         {
@@ -5705,11 +5723,6 @@ CHAKRA_API JsRunScriptWithParserState(
             return JsErrorInvalidArgument;
         }
 
-        if (errorCode != JsNoError)
-        {
-            return errorCode;
-        }
-
         SourceContextInfo* sourceContextInfo = scriptContext->GetSourceContextInfo(sourceContext, nullptr);
 
         if (sourceContextInfo == nullptr)
@@ -5761,11 +5774,6 @@ CHAKRA_API JsRunScriptWithParserState(
         return errorCode;
     }
 
-    if (!Js::ExternalArrayBuffer::Is(parserState))
-    {
-        return JsErrorInvalidArgument;
-    }
-
     Js::ArrayBuffer* arrayBuffer = Js::ArrayBuffer::FromVar(parserState);
     byte* buffer = arrayBuffer->GetBuffer();
     JsSerializedLoadScriptCallback dummy = DummyScriptLoadSourceCallbackForRunScriptWithParserState;

diff --git a/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj b/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj
@@ -266,6 +266,7 @@
     <ClInclude Include="SparseArraySegment.h" />
     <ClInclude Include="SubString.h" />
     <ClInclude Include="UriHelper.h" />
+    <ClInclude Include="Utf8String.h" />
     <ClInclude Include="WabtInterface.h" />
     <ClInclude Include="WasmLibrary.h" />
     <ClInclude Include="WebAssembly.h" />

diff --git a/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters b/lib/Runtime/Library/Chakra.Runtime.Library.vcxproj.filters
@@ -225,6 +225,7 @@
     <ClInclude Include="JsBuiltIn\JsBuiltIn.js.nojit.bc.32b.h" />
     <ClInclude Include="JsBuiltIn\JsBuiltIn.js.nojit.bc.64b.h" />
     <ClInclude Include="PropertyRecordUsageCache.h" />
+    <ClInclude Include="Utf8String.h" />
     <ClInclude Include="..\LibraryFunction.h" />
   </ItemGroup>
   <ItemGroup>

diff --git a/lib/Runtime/Library/JavascriptString.h b/lib/Runtime/Library/JavascriptString.h
@@ -106,7 +106,7 @@ namespace Js
         void FinishCopy(__inout_xcount(m_charLength) char16 *const buffer, StringCopyInfoStack &nestedStringTreeCopyInfos);
 
     public:
-        virtual int GetRandomAccessItemsFromConcatString(Js::JavascriptString * const *& items) const { return -1; }
+        virtual int GetRandomAccessItemsFromConcatString(_Out_ Js::JavascriptString * const *& items) const { items = nullptr; return -1; }
         virtual bool IsTree() const { return false; }
 
         virtual BOOL SetItem(uint32 index, Var value, PropertyOperationFlags propertyOperationFlags) override;

diff --git a/lib/Runtime/Library/RuntimeLibraryPch.h b/lib/Runtime/Library/RuntimeLibraryPch.h
@@ -27,6 +27,7 @@
 #include "Library/SingleCharString.h"
 #include "Library/SubString.h"
 #include "Library/BufferStringBuilder.h"
+#include "Library/Utf8String.h"
 
 #include "Library/BoundFunction.h"
 #include "Library/JavascriptGeneratorFunction.h"

diff --git a/lib/Runtime/Library/Utf8String.h b/lib/Runtime/Library/Utf8String.h
@@ -0,0 +1,142 @@
+//-------------------------------------------------------------------------------------------------------
+// Copyright (C) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
+//-------------------------------------------------------------------------------------------------------
+#pragma once
+
+#include "Codex/Utf8Codex.h"
+
+namespace Js
+{
+    class Utf8String : public JavascriptString
+    {
+    private:
+        typedef struct {
+            FieldNoBarrier(size_t) length;
+            Field(char*) buffer;
+        } PrefixedUtf8String;
+        Field(PrefixedUtf8String*) utf8String;
+
+    protected:
+        DEFINE_VTABLE_CTOR(Utf8String, JavascriptString);
+
+    private:
+
+        void SetUtf8Buffer(_In_reads_(utf8Length) char* buffer, size_t utf8Length)
+        {
+            this->utf8String = RecyclerNew(this->GetRecycler(), PrefixedUtf8String);
+            this->utf8String->length = utf8Length;
+            this->utf8String->buffer = buffer;
+        }
+
+    public:
+
+        Utf8String(_In_ JavascriptString* originalString, _In_reads_(utf8Length) char* buffer, size_t utf8Length, _In_ StaticType* type) :
+            JavascriptString(type),
+            utf8String(nullptr)
+        {
+            SetUtf8Buffer(buffer, utf8Length);
+
+            this->SetLength(originalString->GetLength());
+            this->SetBuffer(originalString->UnsafeGetBuffer());
+        }
+
+        Utf8String(_In_reads_(utf8Length) char* buffer, size_t utf8Length, _In_ StaticType* type) :
+            JavascriptString(type),
+            utf8String(nullptr)
+        {
+            SetUtf8Buffer(buffer, utf8Length);
+
+            charcount_t utf16Length = 0;
+            utf8::DecodeOptions opts = utf8::DecodeOptions::doDefault;
+            LPCUTF8 buf = reinterpret_cast<LPCUTF8>(buffer);
+            LPCUTF8 end = buf + utf8Length;
+            while (buf < end)
+            {
+                if ((*buf & 0x80) == 0)
+                {
+                    // Single byte character
+                    utf16Length++;
+                    buf++;
+                }
+                else
+                {
+                    // Decode a single utf16 character, and increment the pointer
+                    // to the start of the next character.
+                    char16 c1 = *buf;
+                    ++buf;
+                    utf8::DecodeTail(c1, buf, end, opts);
+                    utf16Length++;
+                }
+            }
+
+            this->SetLength(utf16Length);
+            this->SetBuffer(nullptr);
+        }
+
+
+        size_t Utf8Length() const
+        {
+            return this->utf8String->length;
+        }
+
+        const char* Utf8Buffer() const
+        {
+            return this->utf8String->buffer;
+        }
+
+        const char16* GetSz() override
+        {
+            if (this->IsFinalized())
+            {
+                return this->UnsafeGetBuffer();
+            }
+
+            // TODO: This is currently wrong in the presence of unmatched surrogate pairs.
+            // They will be converted to a 3 byte replacement character in utf8, and then
+            // converted back into a 1 utf16 character 0xfffd representation of that, not the original
+            // utf16 surrogate pair character
+            char16* buffer = RecyclerNewArrayLeaf(this->GetRecycler(), char16, this->GetLength() + 1);
+
+            // Fetching the char* from the Field(char*) first so we can then cast to LPCUTF8
+            const char* bufferStart = this->utf8String->buffer;
+            LPCUTF8 start = reinterpret_cast<LPCUTF8>(bufferStart);
+            size_t decodeLength = utf8::DecodeUnitsIntoAndNullTerminateNoAdvance(buffer, start, start + this->utf8String->length);
+
+            Assert(decodeLength == this->GetLength());
+
+            buffer[this->GetLength()] = 0;
+
+            this->SetBuffer(buffer);
+            return this->UnsafeGetBuffer();
+        }
+
+        static bool Is(RecyclableObject* obj)
+        {
+            return VirtualTableInfo<Js::Utf8String>::HasVirtualTable(obj);
+        }
+
+        static Utf8String* From(RecyclableObject* obj)
+        {
+            if (Utf8String::Is(obj))
+            {
+                return static_cast<Utf8String*>(obj);
+            }
+
+            return nullptr;
+        }
+
+        template <typename StringType>
+        static Utf8String * ConvertString(StringType * originalString, _In_reads_(utf8Length) char* buffer, size_t utf8Length)
+        {
+            VirtualTableInfo<Utf8String>::SetVirtualTable(originalString);
+            Utf8String * convertedString = (Utf8String *)originalString;
+
+            convertedString->SetUtf8Buffer(buffer, utf8Length);
+
+            // length and buffer are preserved from the original string, since that is part of JavascriptString
+
+            return convertedString;
+        }
+    };
+}
diff --git a/lib/Runtime/Runtime.h b/lib/Runtime/Runtime.h
@@ -497,6 +497,8 @@ enum tagDEBUG_EVENT_INFO_TYPE
 #include "Library/PropertyRecordUsageCache.h"
 #include "Library/PropertyString.h"
 #include "Library/SingleCharString.h"
+#include "Library/Utf8String.h"
+#include "Library/LazyJSONString.h"
 
 #include "Library/JavascriptTypedNumber.h"
 #include "Library/SparseArraySegment.h"