New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose a Utf8String type. #17872

Closed
wants to merge 58 commits into
base: master
from
Commits
Jump to file or symbol
Failed to load files and symbols.
+342 −13
Diff settings

Always

Just for now

@@ -1400,7 +1400,7 @@
<value>The object has no underlying COM data associated with it.</value>
</data>
<data name="Argument_NoUninitializedStrings" xml:space="preserve">
<value>Uninitialized Strings cannot be created.</value>
<value>Uninitialized strings cannot be created.</value>
</data>
<data name="Argument_ObjIsWinRTObject" xml:space="preserve">
<value>The object's type must not be a Windows Runtime type.</value>
@@ -288,6 +288,7 @@
<Compile Include="$(BclSourcesRoot)\System\TypeNameParser.cs" />
<Compile Include="$(BclSourcesRoot)\System\TypedReference.cs" />
<Compile Include="$(BclSourcesRoot)\System\TypeLoadException.cs" />
<Compile Include="$(BclSourcesRoot)\System\Utf8String.cs" />
<Compile Include="$(BclSourcesRoot)\System\ValueType.cs" />
<Compile Include="$(BclSourcesRoot)\System\WeakReference.cs" />
<Compile Include="$(BclSourcesRoot)\System\WeakReferenceOfT.cs" />
@@ -0,0 +1,86 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Text;
using System.Runtime.InteropServices;
using System.Runtime.CompilerServices;
namespace System
{
// This is an experimental type and not referenced from CoreFx but needs to exists and be public so we can prototype in CoreFxLab.
public sealed class Utf8String
{
// Do not reorder these fields. Must match layout of Utf8StringObject in object.h.
private readonly int _length;
private readonly byte _firstByte;
public int Length => _length;
public ref readonly byte GetPinnableReference() => ref _firstByte;
public static readonly Utf8String Empty = FastAllocate(0);
// Utf8String constructors
// These are special. The implementation methods for these have a different signature from the
// declared constructors.
[MethodImpl(MethodImplOptions.InternalCall)]
public extern Utf8String(ReadOnlySpan<byte> value);
#if PROJECTN
[DependencyReductionRoot]
#endif
#if !CORECLR
static
#endif
private Utf8String Ctor(ReadOnlySpan<byte> value)
{

This comment has been minimized.

@jkotas

jkotas May 18, 2018

Member

Do we plan to specialize empty strings like they are specialized in regular strings?

if (value.Length == 0)
return Empty;
Utf8String newString = FastAllocate(value.Length);
unsafe
{
fixed (byte* pDst = &newString._firstByte)
fixed (byte* pSrc = &MemoryMarshal.GetNonNullPinnableReference(value))
{
Buffer.Memcpy(dest: pDst, src: pSrc, len: value.Length);
}
}
return newString;
}
[MethodImpl(MethodImplOptions.InternalCall)]
public extern Utf8String(ReadOnlySpan<char> value);
#if PROJECTN
[DependencyReductionRoot]
#endif
#if !CORECLR
static
#endif
private Utf8String Ctor(ReadOnlySpan<char> value)
{
if (value.Length == 0)
return Empty;
Encoding e = Encoding.UTF8;
int length = e.GetByteCount(value);
Utf8String newString = FastAllocate(length);
unsafe
{
fixed (byte* pFirstByte = &newString._firstByte)
fixed (char* pFirstChar = &MemoryMarshal.GetNonNullPinnableReference(value))
{
e.GetBytes(pFirstChar, length, pFirstByte, length);
}
}
return newString;
}
// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes (the extra
// +1 is for the NUL terminator.)
[MethodImpl(MethodImplOptions.InternalCall)]
public static extern Utf8String FastAllocate(int length); //TODO: Is public for experimentation in CoreFxLab. Will be private in its ultimate form.
}
}
View
@@ -172,6 +172,7 @@ DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pObjectClass, ::g_pObjectClass
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pRuntimeTypeClass, ::g_pRuntimeTypeClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pCanonMethodTableClass, ::g_pCanonMethodTableClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pStringClass, ::g_pStringClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pUtf8StringClass, ::g_pUtf8StringClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pArrayClass, ::g_pArrayClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pSZArrayHelperClass, ::g_pSZArrayHelperClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pNullableClass, ::g_pNullableClass)
View
@@ -2760,6 +2760,9 @@ void SystemDomain::LoadBaseSystemClasses()
// Load String
g_pStringClass = MscorlibBinder::LoadPrimitiveType(ELEMENT_TYPE_STRING);
// Load Utf8String
g_pUtf8StringClass = MscorlibBinder::GetClass(CLASS__UTF8_STRING);
// Used by Buffer::BlockCopy
g_pByteArrayMT = ClassLoader::LoadArrayTypeThrowing(
TypeHandle(MscorlibBinder::GetElementType(ELEMENT_TYPE_U1))).AsArray()->GetMethodTable();
View
@@ -147,6 +147,8 @@
#define g_TransparentProxyName "__TransparentProxy"
#define g_TypeClassName "System.Type"
#define g_Utf8StringName "Utf8String"
#define g_VariantClassName "System.Variant"
#define g_GuidClassName "System.Guid"
View
@@ -55,23 +55,43 @@ static_assert_no_msg(ECallCtor_First + 8 == ECall::CtorSBytePtrStartLengthEncodi
#define NumberOfStringConstructors 9
#define METHOD__UTF8STRING__CTORF_FIRST METHOD__UTF8_STRING__CTORF_READONLYSPANOFBYTE
static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 0 == METHOD__UTF8_STRING__CTORF_READONLYSPANOFBYTE);
static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 1 == METHOD__UTF8_STRING__CTORF_READONLYSPANOFCHAR);
#define ECallUtf8String_Ctor_First ECall::Utf8StringCtorReadOnlySpanOfByteManaged
static_assert_no_msg(ECallUtf8String_Ctor_First + 0 == ECall::Utf8StringCtorReadOnlySpanOfByteManaged);
static_assert_no_msg(ECallUtf8String_Ctor_First + 1 == ECall::Utf8StringCtorReadOnlySpanOfCharManaged);
#define NumberOfUtf8StringConstructors 2
static void PopulateConstructors(DWORD firstECallIndex, BinderMethodID firstMethod, int numConstructors)
{
STANDARD_VM_CONTRACT;
for (int i = 0; i < numConstructors; i++)
{
MethodDesc* pMD = MscorlibBinder::GetMethod((BinderMethodID)(firstMethod + i));
_ASSERTE(pMD != NULL);
PCODE pDest = pMD->GetMultiCallableAddrOfCode();
ECall::DynamicallyAssignFCallImpl(pDest, firstECallIndex + i);
}
}
void ECall::PopulateManagedStringConstructors()
{
STANDARD_VM_CONTRACT;
INDEBUG(static bool fInitialized = false);
_ASSERTE(!fInitialized); // assume this method is only called once
_ASSERTE(g_pStringClass != NULL);
_ASSERTE(g_pUtf8StringClass != NULL);
for (int i = 0; i < NumberOfStringConstructors; i++)
{
MethodDesc* pMD = MscorlibBinder::GetMethod((BinderMethodID)(METHOD__STRING__CTORF_FIRST + i));
_ASSERTE(pMD != NULL);
PCODE pDest = pMD->GetMultiCallableAddrOfCode();
PopulateConstructors(ECallCtor_First, METHOD__STRING__CTORF_FIRST, NumberOfStringConstructors);
PopulateConstructors(ECallUtf8String_Ctor_First, METHOD__UTF8STRING__CTORF_FIRST, NumberOfUtf8StringConstructors);
ECall::DynamicallyAssignFCallImpl(pDest, ECallCtor_First + i);
}
INDEBUG(fInitialized = true);
}
View
@@ -114,7 +114,10 @@ class ECall
DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrManaged, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrStartLengthManaged, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrStartLengthEncodingManaged, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(FastAllocateUtf8String, FramedAllocateUtf8String) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(InternalGetCurrentThread, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorReadOnlySpanOfByteManaged, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorReadOnlySpanOfCharManaged, NULL) \
enum
{
View
@@ -116,6 +116,12 @@ FCFuncStart(gStringFuncs)
#endif // FEATURE_COMINTEROP
FCFuncEnd()
FCFuncStart(gUtf8StringFuncs)
FCDynamic("FastAllocate", CORINFO_INTRINSIC_Illegal, ECall::FastAllocateUtf8String)
FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ReadOnlySpanOfByte_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorReadOnlySpanOfByteManaged)
FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ReadOnlySpanOfChar_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorReadOnlySpanOfCharManaged)
FCFuncEnd()
FCFuncStart(gValueTypeFuncs)
FCFuncElement("CanCompareBits", ValueTypeHelper::CanCompareBits)
FCFuncElement("FastEqualsCheck", ValueTypeHelper::FastEqualsCheck)
@@ -1377,6 +1383,7 @@ FCClassElement("TypedReference", "System", gTypedReferenceFuncs)
#ifdef FEATURE_COMINTEROP
FCClassElement("UriMarshaler", "System.StubHelpers", gUriMarshalerFuncs)
#endif
FCClassElement("Utf8String", "System", gUtf8StringFuncs)
FCClassElement("ValueClassMarshaler", "System.StubHelpers", gValueClassMarshalerFuncs)
FCClassElement("ValueType", "System", gValueTypeFuncs)
#ifdef FEATURE_COMINTEROP
View
@@ -1059,6 +1059,73 @@ STRINGREF SlowAllocateString( DWORD cchStringLength )
return( ObjectToSTRINGREF(orObject) );
}
Utf8StringObject *SlowAllocateUtf8String(DWORD cchStringLength)
{
CONTRACTL{
THROWS;
GC_TRIGGERS;
MODE_COOPERATIVE; // returns an objref without pinning it => cooperative
} CONTRACTL_END;
Utf8StringObject *orObject = NULL;
#ifdef _DEBUG
if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
{
char *a = new char;
delete a;
}
#endif
// Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer
// overflows in buffer size calculations.
if (cchStringLength > 0x7FFFFFDF)
ThrowOutOfMemory();
SIZE_T ObjectSize = PtrAlign(Utf8StringObject::GetSize(cchStringLength));

This comment has been minimized.

@jkotas

jkotas May 3, 2018

Member

I think this allocates more than required.

I have noticed that we may have the same problem in regular string. For example:
new string('a', 5) allocates 0x28 bytes objects on x64 today. But it should be enough for it to allocate just 0x20 bytes:

8 syncblock
8 vtable
4 size
5*2 content
2 zero terminator

Do you know why it is the case?

This comment has been minimized.

@jkotas

jkotas May 3, 2018

Member

I have checked CoreRT. CoreRT allocates 0x20 bytes in this case, so there is definitely something pretty broken.

This comment has been minimized.

@mikedn

mikedn May 3, 2018

Contributor

Isn't there some padding between size and content?

This comment has been minimized.

@jkotas

jkotas May 3, 2018

Member

There is padding for regular arrays (so that all arrays have same layout even when elements are 8-byte aligned). There is no padding for strings. The content starts right after length. The extra unnecessary bytes are at the end.

I remember folks went to a great length to ensure that strings do not pay the extra 4 bytes during the initial 64-bit ports. It looks like it has regressed.

This comment has been minimized.

@AtsushiKan

AtsushiKan May 3, 2018

Contributor

It's coming from sizeof(Utf8StringObject) - there should be a pragma pack 4 around that declaration.

Presumably the same for StringObject, though I want to keep anything like that separate from this PR. We haven't yet agreed to merge this into master.

This comment has been minimized.

@jkotas

jkotas May 3, 2018

Member

#17876 has the fix for the String overallocation problem.

_ASSERTE(ObjectSize > cchStringLength);
SetTypeHandleOnThreadForAlloc(TypeHandle(g_pUtf8StringClass));
orObject = (Utf8StringObject *)Alloc(ObjectSize, FALSE, FALSE);
// Object is zero-init already

This comment has been minimized.

@GrabYourPitchforks

GrabYourPitchforks May 3, 2018

Member

This includes the null terminator being zero-inited, I suppose?

This comment has been minimized.

@AtsushiKan

AtsushiKan May 3, 2018

Contributor

Yep.

_ASSERTE(orObject->HasEmptySyncBlockInfo());
// Initialize Object
orObject->SetMethodTable(g_pUtf8StringClass);
orObject->SetLength(cchStringLength);
if (ObjectSize >= LARGE_OBJECT_SIZE)
{
GCHeapUtilities::GetGCHeap()->PublishObject((BYTE*)orObject);
}
// Notify the profiler of the allocation
if (TrackAllocations())
{
OBJECTREF objref = ObjectToOBJECTREF((Object*)orObject);
GCPROTECT_BEGIN(objref);
ProfilerObjectAllocatedCallback(objref, (ClassID)orObject->GetTypeHandle().AsPtr());
GCPROTECT_END();
orObject = (Utf8StringObject *)OBJECTREFToObject(objref);
}
#ifdef FEATURE_EVENT_TRACE
// Send ETW event for allocation
if (ETW::TypeSystemLog::IsHeapAllocEventEnabled())
{
ETW::TypeSystemLog::SendObjectAllocatedEvent(orObject);
}
#endif // FEATURE_EVENT_TRACE
LogAlloc(ObjectSize, g_pUtf8StringClass, orObject);
return orObject;
}
#ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION
// OBJECTREF AllocateComClassObject(ComClassFactory* pComClsFac)
void AllocateComClassObject(ComClassFactory* pComClsFac, OBJECTREF* ppRefClass)
View
@@ -71,6 +71,8 @@ STRINGREF AllocateString( DWORD cchStringLength );
// The slow version, implemented in gcscan.cpp
STRINGREF SlowAllocateString( DWORD cchStringLength );
Utf8StringObject *SlowAllocateUtf8String( DWORD cchStringLength );
#else
// On other platforms, go to the (somewhat less efficient) implementations in gcscan.cpp
@@ -83,6 +85,8 @@ OBJECTREF AllocateObjectArray(DWORD cElements, TypeHandle ElementType, BOOL bAll
STRINGREF SlowAllocateString( DWORD cchStringLength );
Utf8StringObject *SlowAllocateUtf8String( DWORD cchStringLength );
inline STRINGREF AllocateString( DWORD cchStringLength )
{
WRAPPER_NO_CONTRACT;
View
@@ -2881,6 +2881,60 @@ HCIMPL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
}
HCIMPLEND
HCIMPL1(Utf8StringObject*, AllocateUtf8String_MP_FastPortable, DWORD stringLength)
{
FCALL_CONTRACT;
do
{
_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
// Instead of doing elaborate overflow checks, we just limit the number of elements. This will avoid all overflow
// problems, as well as making sure big string objects are correctly allocated in the big object heap.
if (stringLength >= LARGE_OBJECT_SIZE - 256)
{
break;
}
// This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler
// to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates
// some reshuffling of intermediate values into nonvolatile registers around the call.
Thread *thread = GetThread();
SIZE_T totalSize = Utf8StringObject::GetSize(stringLength);
// The method table's base size includes space for a terminating null character
_ASSERTE(totalSize >= g_pUtf8StringClass->GetBaseSize());
_ASSERTE(totalSize - g_pUtf8StringClass->GetBaseSize() == stringLength);
SIZE_T alignedTotalSize = ALIGN_UP(totalSize, DATA_ALIGNMENT);
_ASSERTE(alignedTotalSize >= totalSize);
totalSize = alignedTotalSize;
gc_alloc_context *allocContext = thread->GetAllocContext();
BYTE *allocPtr = allocContext->alloc_ptr;
_ASSERTE(allocPtr <= allocContext->alloc_limit);
if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
{
break;
}
allocContext->alloc_ptr = allocPtr + totalSize;
_ASSERTE(allocPtr != nullptr);
Utf8StringObject *stringObject = reinterpret_cast<Utf8StringObject *>(allocPtr);
stringObject->SetMethodTable(g_pUtf8StringClass);
stringObject->SetLength(stringLength);
return stringObject;
} while (false);
// Tail call to the slow helper
ENDFORBIDGC();
return HCCALL1(FramedAllocateUtf8String, stringLength);
}
HCIMPLEND
#include <optdefault.h>
/*********************************************************************/
@@ -2920,6 +2974,20 @@ HCIMPL1(StringObject*, FramedAllocateString, DWORD stringLength)
}
HCIMPLEND
HCIMPL1(Utf8StringObject*, FramedAllocateUtf8String, DWORD stringLength)
{
FCALL_CONTRACT;
Utf8StringObject* result = NULL;
HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame
result = SlowAllocateUtf8String(stringLength);
HELPER_METHOD_FRAME_END();
return result;
}
HCIMPLEND
/*********************************************************************/
OBJECTHANDLE ConstructStringLiteral(CORINFO_MODULE_HANDLE scopeHnd, mdToken metaTok)
{
View
@@ -223,6 +223,9 @@ extern FCDECL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength
extern FCDECL1(StringObject*, UnframedAllocateString, DWORD stringLength);
extern FCDECL1(StringObject*, FramedAllocateString, DWORD stringLength);
extern FCDECL1(Utf8StringObject*, AllocateUtf8String_MP_FastPortable, DWORD stringLength);
extern FCDECL1(Utf8StringObject*, FramedAllocateUtf8String, DWORD stringLength);
extern FCDECL2(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size);
extern FCDECL2(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size);
extern FCDECL2(Object*, JIT_NewArr1_R2R, CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size);
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.