Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Fix System.String over-allocation (#17876)
Browse files Browse the repository at this point in the history
BaseSize for System.String was not set correctly. It caused unnecessary extra 8 bytes to be allocated at the end of strings that had `Length % 4 < 2` on 64-bit platforms.

This change makes affected strings proportionally cheaper. For example, `new string('a', 1)` in a long-running loop is 7% faster.
  • Loading branch information
jkotas committed May 4, 2018
1 parent fac17ce commit 37e6436
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 77 deletions.
2 changes: 1 addition & 1 deletion src/debug/daccess/request.cpp
Expand Up @@ -1475,7 +1475,7 @@ ClrDataAccess::GetObjectStringData(CLRDATA_ADDRESS obj, unsigned int count, __ou
if (count > needed)
count = needed;

TADDR pszStr = TO_TADDR(obj)+offsetof(StringObject, m_Characters);
TADDR pszStr = TO_TADDR(obj)+offsetof(StringObject, m_FirstChar);
hr = m_pTarget->ReadVirtual(pszStr, (PBYTE)stringData, count * sizeof(wchar_t), &needed);

if (SUCCEEDED(hr))
Expand Down
4 changes: 1 addition & 3 deletions src/vm/amd64/JitHelpers_InlineGetThread.asm
Expand Up @@ -134,12 +134,10 @@ LEAF_ENTRY AllocateStringFastMP_InlineGetThread, _TEXT
cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2
jae OversizedString

mov edx, [r9 + OFFSET__MethodTable__m_BaseSize]

; Calculate the final size to allocate.
; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7.

lea edx, [edx + ecx*2 + 7]
lea edx, [STRING_BASE_SIZE + ecx*2 + 7]
and edx, -8

INLINE_GETTHREAD r11
Expand Down
4 changes: 1 addition & 3 deletions src/vm/amd64/JitHelpers_Slow.asm
Expand Up @@ -276,12 +276,10 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT
cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2
jae FramedAllocateString

mov r8d, [r11 + OFFSET__MethodTable__m_BaseSize]

; Calculate the final size to allocate.
; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7.

lea r8d, [r8d + ecx*2 + 7]
lea r8d, [STRING_BASE_SIZE + ecx*2 + 7]
and r8d, -8

inc [g_global_alloc_lock]
Expand Down
3 changes: 3 additions & 0 deletions src/vm/amd64/asmconstants.h
Expand Up @@ -545,6 +545,9 @@ ASMCONSTANTS_C_ASSERT(ASM_LARGE_OBJECT_SIZE == LARGE_OBJECT_SIZE);
ASMCONSTANTS_C_ASSERT(OFFSETOF__ArrayBase__m_NumComponents
== offsetof(ArrayBase, m_NumComponents));

#define STRING_BASE_SIZE 0x16
ASMCONSTANTS_RUNTIME_ASSERT(STRING_BASE_SIZE == StringObject::GetBaseSize());

#define OFFSETOF__StringObject__m_StringLength 0x8
ASMCONSTANTS_C_ASSERT(OFFSETOF__StringObject__m_StringLength
== offsetof(StringObject, m_StringLength));
Expand Down
2 changes: 0 additions & 2 deletions src/vm/appdomain.cpp
Expand Up @@ -2759,8 +2759,6 @@ void SystemDomain::LoadBaseSystemClasses()

// Load String
g_pStringClass = MscorlibBinder::LoadPrimitiveType(ELEMENT_TYPE_STRING);
_ASSERTE(g_pStringClass->GetBaseSize() == ObjSizeOf(StringObject)+sizeof(WCHAR));
_ASSERTE(g_pStringClass->GetComponentSize() == 2);

// Used by Buffer::BlockCopy
g_pByteArrayMT = ClassLoader::LoadArrayTypeThrowing(
Expand Down
45 changes: 0 additions & 45 deletions src/vm/arm/asmconstants.h
Expand Up @@ -95,18 +95,6 @@ ASMCONSTANTS_C_ASSERT(MethodTableWriteableData__m_dwFlags == offsetof(MethodTabl
#define MethodTableWriteableData__enum_flag_Unrestored 0x04
ASMCONSTANTS_C_ASSERT(MethodTableWriteableData__enum_flag_Unrestored == MethodTableWriteableData::enum_flag_Unrestored);

#define StringObject__m_StringLength 0x04
ASMCONSTANTS_C_ASSERT(StringObject__m_StringLength == offsetof(StringObject, m_StringLength));

#define SIZEOF__BaseStringObject 0xe
ASMCONSTANTS_C_ASSERT(SIZEOF__BaseStringObject == (ObjSizeOf(StringObject) + sizeof(WCHAR)));

#define SIZEOF__ArrayOfObjectRef 0xc
ASMCONSTANTS_C_ASSERT(SIZEOF__ArrayOfObjectRef == ObjSizeOf(ArrayBase));

#define SIZEOF__ArrayOfValueType 0xc
ASMCONSTANTS_C_ASSERT(SIZEOF__ArrayOfValueType == ObjSizeOf(ArrayBase));

#define ArrayBase__m_NumComponents 0x4
ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents));

Expand All @@ -116,33 +104,8 @@ ASMCONSTANTS_C_ASSERT(ArrayTypeDesc__m_Arg == offsetof(ArrayTypeDesc, m_Arg));
#define PtrArray__m_Array 0x8
ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array));

#define SYSTEM_INFO__dwNumberOfProcessors 0x14
ASMCONSTANTS_C_ASSERT(SYSTEM_INFO__dwNumberOfProcessors == offsetof(SYSTEM_INFO, dwNumberOfProcessors));

#define TypeHandle_CanCast 0x1 // TypeHandle::CanCast

// Maximum number of characters to be allocated for a string in AllocateStringFast*. Chosen so that we'll
// never have to check for overflow and will never try to allocate a string on regular heap that should have
// gone on the large object heap. Additionally the constant has been chosen such that it can be encoded in a
// single Thumb2 CMP instruction.
#define MAX_FAST_ALLOCATE_STRING_SIZE 42240
ASMCONSTANTS_C_ASSERT(MAX_FAST_ALLOCATE_STRING_SIZE < ((LARGE_OBJECT_SIZE - SIZEOF__BaseStringObject) / 2));


// Array of objectRef of this Maximum number of elements can be allocated in JIT_NewArr1OBJ_MP*. Chosen so that we'll
// never have to check for overflow and will never try to allocate the array on regular heap that should have
// gone on the large object heap. Additionally the constant has been chosen such that it can be encoded in a
// single Thumb2 CMP instruction.
#define MAX_FAST_ALLOCATE_ARRAY_OBJECTREF_SIZE 21120
ASMCONSTANTS_C_ASSERT(MAX_FAST_ALLOCATE_ARRAY_OBJECTREF_SIZE < ((LARGE_OBJECT_SIZE - SIZEOF__ArrayOfObjectRef) / sizeof(void*)));

// Array of valueClass of this Maximum number of characters can be allocated JIT_NewArr1VC_MP*. Chosen so that we'll
// never have to check for overflow and will never try to allocate the array on regular heap that should have
// gone on the large object heap. Additionally the constant has been chosen such that it can be encoded in a
// single Thumb2 CMP instruction.
#define MAX_FAST_ALLOCATE_ARRAY_VC_SIZE 65280
ASMCONSTANTS_C_ASSERT(MAX_FAST_ALLOCATE_ARRAY_VC_SIZE < ((4294967296 - 1 - SIZEOF__ArrayOfValueType) / 65536));

#define SIZEOF__GSCookie 0x4
ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie));

Expand Down Expand Up @@ -203,14 +166,6 @@ ASMCONSTANTS_C_ASSERT(UnmanagedToManagedFrame__m_pvDatum == offsetof(UnmanagedTo

#endif // FEATURE_COMINTEROP

#ifndef CROSSGEN_COMPILE
#define Thread__m_alloc_context__alloc_limit 0x44
ASMCONSTANTS_C_ASSERT(Thread__m_alloc_context__alloc_limit == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_limit));

#define Thread__m_alloc_context__alloc_ptr 0x40
ASMCONSTANTS_C_ASSERT(Thread__m_alloc_context__alloc_ptr == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_ptr));
#endif // CROSSGEN_COMPILE

#define Thread__m_fPreemptiveGCDisabled 0x08
#ifndef CROSSGEN_COMPILE
ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled));
Expand Down
9 changes: 4 additions & 5 deletions src/vm/i386/jitinterfacex86.cpp
Expand Up @@ -1063,14 +1063,13 @@ void *JIT_TrialAlloc::GenAllocString(Flags flags)
// jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet
sl.X86EmitCondJump(noLock, X86CondCode::kJAE);

// mov edx, [ecx]MethodTable.m_BaseSize
sl.X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable,m_BaseSize));

// Calculate the final size to allocate.
// We need to calculate baseSize + cnt*2, then round that up by adding 3 and anding ~3.

// lea eax, [edx+eax*2+5]
sl.X86EmitOp(0x8d, kEAX, kEDX, (DATA_ALIGNMENT-1), kEAX, 2);
// lea eax, [basesize+(alignment-1)+eax*2]
sl.Emit16(0x048d);
sl.Emit8(0x45);
sl.Emit32(StringObject::GetBaseSize() + (DATA_ALIGNMENT-1));

// and eax, ~3
sl.Emit16(0xe083);
Expand Down
4 changes: 2 additions & 2 deletions src/vm/methodtablebuilder.cpp
Expand Up @@ -9737,8 +9737,8 @@ void MethodTableBuilder::CheckForSystemTypes()
{
// Strings are not "normal" objects, so we need to mess with their method table a bit
// so that the GC can figure out how big each string is...
DWORD baseSize = ObjSizeOf(StringObject) + sizeof(WCHAR);
pMT->SetBaseSize(baseSize); // NULL character included
DWORD baseSize = StringObject::GetBaseSize();
pMT->SetBaseSize(baseSize);

GetHalfBakedClass()->SetBaseSizePadding(baseSize - bmtFP->NumInstanceFieldBytes);

Expand Down
20 changes: 6 additions & 14 deletions src/vm/object.h
Expand Up @@ -914,7 +914,7 @@ typedef PTR_StringObject STRINGREF;
* Special String implementation for performance.
*
* m_StringLength - Length of string in number of WCHARs
* m_Characters - The string buffer
* m_FirstChar - The string buffer
*
*/

Expand All @@ -932,9 +932,6 @@ typedef PTR_StringObject STRINGREF;
#define STRING_STATE_FAST_OPS 0x80000000
#define STRING_STATE_SPECIAL_SORT 0xC0000000

#ifdef _MSC_VER
#pragma warning(disable : 4200) // disable zero-sized array warning
#endif
class StringObject : public Object
{
#ifdef DACCESS_COMPILE
Expand All @@ -947,11 +944,7 @@ class StringObject : public Object

private:
DWORD m_StringLength;
WCHAR m_Characters[0];
// GC will see a StringObject like this:
// DWORD m_StringLength
// WCHAR m_Characters[0]
// DWORD m_OptionalPadding (this is an optional field and will appear based on need)
WCHAR m_FirstChar;

public:
VOID SetStringLength(DWORD len) { LIMITED_METHOD_CONTRACT; _ASSERTE(len >= 0); m_StringLength = len; }
Expand All @@ -961,12 +954,11 @@ class StringObject : public Object
~StringObject() {LIMITED_METHOD_CONTRACT; }

public:
static DWORD GetBaseSize();
static SIZE_T GetSize(DWORD stringLength);

DWORD GetStringLength() { LIMITED_METHOD_DAC_CONTRACT; return( m_StringLength );}
WCHAR* GetBuffer() { LIMITED_METHOD_CONTRACT; _ASSERTE(this != nullptr); return (WCHAR*)( dac_cast<TADDR>(this) + offsetof(StringObject, m_Characters) ); }
WCHAR* GetBuffer(DWORD *pdwSize) { LIMITED_METHOD_CONTRACT; _ASSERTE((this != nullptr) && pdwSize); *pdwSize = GetStringLength(); return GetBuffer(); }
WCHAR* GetBufferNullable() { LIMITED_METHOD_CONTRACT; return( (this == nullptr) ? nullptr : (WCHAR*)( dac_cast<TADDR>(this) + offsetof(StringObject, m_Characters) ) ); }
WCHAR* GetBuffer() { LIMITED_METHOD_CONTRACT; _ASSERTE(this != nullptr); return (WCHAR*)( dac_cast<TADDR>(this) + offsetof(StringObject, m_FirstChar) ); }

DWORD GetHighCharState() {
WRAPPER_NO_CONTRACT;
Expand All @@ -992,7 +984,7 @@ class StringObject : public Object
static UINT GetBufferOffset()
{
LIMITED_METHOD_DAC_CONTRACT;
return (UINT)(offsetof(StringObject, m_Characters));
return (UINT)(offsetof(StringObject, m_FirstChar));
}
static UINT GetStringLengthOffset()
{
Expand Down Expand Up @@ -3329,7 +3321,7 @@ class Nullable {
static inline void *Value(void *src, MethodTable *nullableMT)
{
Nullable *nullable = (Nullable *)src;
return nullable->ValueAddr(nullableMT);
return nullable->ValueAddr(nullableMT);
}

private:
Expand Down
10 changes: 8 additions & 2 deletions src/vm/object.inl
Expand Up @@ -58,12 +58,18 @@ inline SIZE_T Object::GetSize()
return s;
}

__forceinline /*static*/ DWORD StringObject::GetBaseSize()
{
LIMITED_METHOD_DAC_CONTRACT;

return ObjSizeOf(Object) + sizeof(DWORD) /* length */ + sizeof(WCHAR) /* null terminator */;
}

__forceinline /*static*/ SIZE_T StringObject::GetSize(DWORD strLen)
{
LIMITED_METHOD_DAC_CONTRACT;

// Extra WCHAR for null terminator
return ObjSizeOf(StringObject) + sizeof(WCHAR) + strLen * sizeof(WCHAR);
return GetBaseSize() + strLen * sizeof(WCHAR);
}

#ifdef DACCESS_COMPILE
Expand Down

0 comments on commit 37e6436

Please sign in to comment.