From fe917d01ad8e45c949398d5298c43829495d1d3a Mon Sep 17 00:00:00 2001 From: Oguz Bastemur Date: Wed, 19 Oct 2016 12:12:54 +0200 Subject: [PATCH] Disable RecyclerWatsonTelemetry + Use RDTSC for GetTickCount Brings ~4% perf improvement [ http-load test, measured on xplat ] - Disables RecyclerWatsonTelemetry for ChakraCore [ reduces the number of calls to system clock api <=~1.5% ] - Use RDTSC for GetTickCount. [ this affects only xplat <=~3% ] --- Build/Chakra.Build.props | 2 +- lib/Common/Memory/Recycler.cpp | 11 +- lib/Common/Memory/Recycler.h | 5 +- lib/Common/Memory/RecyclerWatsonTelemetry.h | 8 ++ lib/Runtime/Base/ThreadContext.cpp | 4 + lib/Runtime/Base/ThreadContext.h | 8 +- pal/src/misc/time.cpp | 123 ++++++++++++++------ 7 files changed, 120 insertions(+), 41 deletions(-) diff --git a/Build/Chakra.Build.props b/Build/Chakra.Build.props index 10ee9a3f26d..8552aa9fefb 100644 --- a/Build/Chakra.Build.props +++ b/Build/Chakra.Build.props @@ -1,6 +1,6 @@ - + 0x0601 0x0602 diff --git a/lib/Common/Memory/Recycler.cpp b/lib/Common/Memory/Recycler.cpp index 7ff804e798b..3c2ac3a5a57 100644 --- a/lib/Common/Memory/Recycler.cpp +++ b/lib/Common/Memory/Recycler.cpp @@ -242,7 +242,9 @@ Recycler::Recycler(AllocationPolicyManager * policyManager, IdleDecommitPageAllo #ifdef HEAP_ENUMERATION_VALIDATION ,pfPostHeapEnumScanCallback(nullptr) #endif +#ifdef NTBUILD , telemetryBlock(&localTelemetryBlock) +#endif #ifdef ENABLE_JS_ETW ,bulkFreeMemoryWrittenCount(0) #endif @@ -321,7 +323,9 @@ Recycler::Recycler(AllocationPolicyManager * policyManager, IdleDecommitPageAllo this->inDetachProcess = false; #endif +#ifdef NTBUILD memset(&localTelemetryBlock, 0, sizeof(localTelemetryBlock)); +#endif #ifdef ENABLE_DEBUG_CONFIG_OPTIONS // recycler requires at least Recycler::PrimaryMarkStackReservedPageCount to function properly for the main mark context @@ -3426,8 +3430,10 @@ Recycler::Collect() { RECORD_TIMESTAMP(initialCollectionStartTime); +#ifdef NTBUILD this->telemetryBlock->initialCollectionStartProcessUsedBytes = PageAllocator::GetProcessUsedBytes(); this->telemetryBlock->exhaustiveRepeatedCount = 0; +#endif return DoCollectWrapped(finalFlags); } @@ -3554,7 +3560,9 @@ Recycler::DoCollect(CollectionFlags flags) { INC_TIMESTAMP_FIELD(exhaustiveRepeatedCount); RECORD_TIMESTAMP(currentCollectionStartTime); +#ifdef NTBUILD this->telemetryBlock->currentCollectionStartProcessUsedBytes = PageAllocator::GetProcessUsedBytes(); +#endif #if ENABLE_CONCURRENT_GC // DisposeObject may call script again and start another GC, so we may still be in concurrent GC state @@ -6910,7 +6918,7 @@ Recycler::FillCheckPad(void * address, size_t size, size_t alignedAllocSize, boo } } -void +void Recycler::FillPadNoCheck(void * address, size_t size, size_t alignedAllocSize, bool objectAlreadyInitialized) { // Ignore the first word @@ -8187,4 +8195,3 @@ RecyclerHeapObjectInfo::GetSize() const } template char* Recycler::AllocWithAttributesInlined<(Memory::ObjectInfoBits)32, false>(size_t); - diff --git a/lib/Common/Memory/Recycler.h b/lib/Common/Memory/Recycler.h index 21a04e8a7fb..d4ef7920271 100644 --- a/lib/Common/Memory/Recycler.h +++ b/lib/Common/Memory/Recycler.h @@ -1009,9 +1009,10 @@ class Recycler #if DBG || defined(RECYCLER_STATS) bool isForceSweeping; #endif +#ifdef NTBUILD RecyclerWatsonTelemetryBlock localTelemetryBlock; RecyclerWatsonTelemetryBlock * telemetryBlock; - +#endif #ifdef RECYCLER_STATS RecyclerCollectionStats collectionStats; void PrintHeapBlockStats(char16 const * name, HeapBlock::HeapBlockType type); @@ -1072,7 +1073,9 @@ class Recycler void LogMemProtectHeapSize(bool fromGC); char* Realloc(void* buffer, DECLSPEC_GUARD_OVERFLOW size_t existingBytes, DECLSPEC_GUARD_OVERFLOW size_t requestedBytes, bool truncate = true); +#ifdef NTBUILD void SetTelemetryBlock(RecyclerWatsonTelemetryBlock * telemetryBlock) { this->telemetryBlock = telemetryBlock; } +#endif void Prime(); diff --git a/lib/Common/Memory/RecyclerWatsonTelemetry.h b/lib/Common/Memory/RecyclerWatsonTelemetry.h index a866584cc63..0fadfa0e0b9 100644 --- a/lib/Common/Memory/RecyclerWatsonTelemetry.h +++ b/lib/Common/Memory/RecyclerWatsonTelemetry.h @@ -2,6 +2,13 @@ // Copyright (C) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. //------------------------------------------------------------------------------------------------------- +#ifndef NTBUILD + +#define RECORD_TIMESTAMP(Field) +#define INC_TIMESTAMP_FIELD(Field) +#define AUTO_TIMESTAMP(Field) + +#else // CHAKRA_FULL namespace Memory { /* @@ -46,3 +53,4 @@ namespace Memory DWORD exhaustiveRepeatedCount; }; }; +#endif diff --git a/lib/Runtime/Base/ThreadContext.cpp b/lib/Runtime/Base/ThreadContext.cpp index 4b9825922ad..1ed53a27aa5 100644 --- a/lib/Runtime/Base/ThreadContext.cpp +++ b/lib/Runtime/Base/ThreadContext.cpp @@ -178,7 +178,9 @@ ThreadContext::ThreadContext(AllocationPolicyManager * allocationPolicyManager, #endif dynamicObjectEnumeratorCacheMap(&HeapAllocator::Instance, 16), //threadContextFlags(ThreadContextFlagNoFlag), +#ifdef NTBUILD telemetryBlock(&localTelemetryBlock), +#endif configuration(enableExperimentalFeatures), jsrtRuntime(nullptr), propertyMap(nullptr), @@ -248,7 +250,9 @@ ThreadContext::ThreadContext(AllocationPolicyManager * allocationPolicyManager, this->threadId = ::GetCurrentThreadId(); #endif +#ifdef NTBUILD memset(&localTelemetryBlock, 0, sizeof(localTelemetryBlock)); +#endif AutoCriticalSection autocs(ThreadContext::GetCriticalSection()); ThreadContext::LinkToBeginning(this, &ThreadContext::globalListFirst, &ThreadContext::globalListLast); diff --git a/lib/Runtime/Base/ThreadContext.h b/lib/Runtime/Base/ThreadContext.h index 7fca7804c9f..e86faf07ea0 100644 --- a/lib/Runtime/Base/ThreadContext.h +++ b/lib/Runtime/Base/ThreadContext.h @@ -204,11 +204,13 @@ class IProjectionContextMemoryInfo abstract #endif #endif +#ifdef NTBUILD struct ThreadContextWatsonTelemetryBlock { FILETIME lastScriptStartTime; FILETIME lastScriptEndTime; }; +#endif class NativeLibraryEntryRecord { @@ -781,8 +783,10 @@ class ThreadContext sealed : typedef JsUtil::BaseDictionary DynamicObjectEnumeratorCacheMap; DynamicObjectEnumeratorCacheMap dynamicObjectEnumeratorCacheMap; +#ifdef NTBUILD ThreadContextWatsonTelemetryBlock localTelemetryBlock; ThreadContextWatsonTelemetryBlock * telemetryBlock; +#endif NativeLibraryEntryRecord nativeLibraryEntry; @@ -1002,8 +1006,6 @@ class ThreadContext sealed : #endif } - - DateTime::HiResTimer * GetHiResTimer() { return &hTimer; } ArenaAllocator* GetThreadAlloc() { return &threadAlloc; } static CriticalSection * GetCriticalSection() { return &s_csThreadContext; } @@ -1014,7 +1016,9 @@ class ThreadContext sealed : ThreadConfiguration const * GetConfig() const { return &configuration; } public: +#ifdef NTBUILD void SetTelemetryBlock(ThreadContextWatsonTelemetryBlock * telemetryBlock) { this->telemetryBlock = telemetryBlock; } +#endif static ThreadContext* GetContextForCurrentThread(); diff --git a/pal/src/misc/time.cpp b/pal/src/misc/time.cpp index 17fe0373727..7be0fe47e4e 100644 --- a/pal/src/misc/time.cpp +++ b/pal/src/misc/time.cpp @@ -1,6 +1,6 @@ // // Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. // /*++ @@ -75,8 +75,8 @@ time. The system time is expressed in Coordinated Universal Time Parameters -lpSystemTime - [out] Pointer to a SYSTEMTIME structure to receive the current system date and time. +lpSystemTime + [out] Pointer to a SYSTEMTIME structure to receive the current system date and time. Return Values @@ -101,10 +101,10 @@ GetSystemTime( tt = time(NULL); - /* We can't get millisecond resolution from time(), so we get it from + /* We can't get millisecond resolution from time(), so we get it from gettimeofday() */ timeofday_retval = gettimeofday(&timeval,NULL); - + #if HAVE_GMTIME_R utPtr = &ut; if (gmtime_r(&tt, utPtr) == NULL) @@ -134,20 +134,20 @@ GetSystemTime( { int old_seconds; int new_seconds; - + lpSystemTime->wMilliseconds = timeval.tv_usec/tccMillieSecondsToMicroSeconds; - + old_seconds = utPtr->tm_sec; new_seconds = timeval.tv_sec%60; - - /* just in case we reached the next second in the interval between + + /* just in case we reached the next second in the interval between time() and gettimeofday() */ if( old_seconds!=new_seconds ) { TRACE("crossed seconds boundary; setting milliseconds to 999\n"); lpSystemTime->wMilliseconds = 999; - } - } + } + } EXIT: LOGEXIT("GetSystemTime returns void\n"); PERF_EXIT(GetSystemTime); @@ -164,7 +164,7 @@ use the GetSystemTimeAdjustment function. Parameters -This function has no parameters. +This function has no parameters. Return Values @@ -212,7 +212,7 @@ QueryPerformanceCounter( retval = FALSE; break; } - lpPerformanceCount->QuadPart = + lpPerformanceCount->QuadPart = (LONGLONG)ts.tv_sec * (LONGLONG)tccSecondsToNanoSeconds + (LONGLONG)ts.tv_nsec; } #elif HAVE_MACH_ABSOLUTE_TIME @@ -233,22 +233,22 @@ QueryPerformanceCounter( retval = FALSE; break; } - lpPerformanceCount->QuadPart = + lpPerformanceCount->QuadPart = (LONGLONG)tb.tb_high * (LONGLONG)tccSecondsToNanoSeconds + (LONGLONG)tb.tb_low; } #else { - struct timeval tv; + struct timeval tv; if (gettimeofday(&tv, NULL) == -1) { ASSERT("gettimeofday() failed; errno is %d (%s)\n", errno, strerror(errno)); retval = FALSE; break; } - lpPerformanceCount->QuadPart = - (LONGLONG)tv.tv_sec * (LONGLONG)tccSecondsToMicroSeconds + (LONGLONG)tv.tv_usec; + lpPerformanceCount->QuadPart = + (LONGLONG)tv.tv_sec * (LONGLONG)tccSecondsToMicroSeconds + (LONGLONG)tv.tv_usec; } -#endif // HAVE_CLOCK_MONOTONIC +#endif // HAVE_CLOCK_MONOTONIC while (false); LOGEXIT("QueryPerformanceCounter\n"); @@ -280,7 +280,7 @@ QueryPerformanceFrequency( } #else lpFrequency->QuadPart = (LONGLONG)tccSecondsToMicroSeconds; -#endif // HAVE_GETHRTIME || HAVE_READ_REAL_TIME || HAVE_CLOCK_MONOTONIC +#endif // HAVE_GETHRTIME || HAVE_READ_REAL_TIME || HAVE_CLOCK_MONOTONIC LOGEXIT("QueryPerformanceFrequency\n"); PERF_EXIT(QueryPerformanceFrequency); return retval; @@ -324,24 +324,14 @@ QueryThreadCycleTime( return retval; } -/*++ -Function: - GetTickCount64 - -Returns a 64-bit tick count with a millisecond resolution. It tries its best -to return monotonically increasing counts and avoid being affected by changes -to the system clock (either due to drift or due to explicit changes to system -time). ---*/ -PALAPI -ULONGLONG -GetTickCount64() +static ULONGLONG +GetTickCount64Fallback() { ULONGLONG retval = 0; #if HAVE_CLOCK_MONOTONIC_COARSE || HAVE_CLOCK_MONOTONIC { - clockid_t clockType = + clockid_t clockType = #if HAVE_CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC_COARSE; // good enough resolution, fastest speed #else @@ -382,7 +372,7 @@ GetTickCount64() } #else { - struct timeval tv; + struct timeval tv; if (gettimeofday(&tv, NULL) == -1) { ASSERT("gettimeofday() failed; errno is %d (%s)\n", errno, strerror(errno)); @@ -390,8 +380,71 @@ GetTickCount64() } retval = (tv.tv_sec * tccSecondsToMillieSeconds) + (tv.tv_usec / tccMillieSecondsToMicroSeconds); } -#endif // HAVE_CLOCK_MONOTONIC -EXIT: +#endif // HAVE_CLOCK_MONOTONIC +EXIT: return retval; } +#if defined(_X86_) || defined(__AMD64__) || defined(__x86_64__) +inline ULONGLONG rdtsc() +{ + ULONGLONG H, L; + __asm volatile ("rdtsc":"=a"(L), "=d"(H)); +#ifdef _X86_ + return L; +#else + return (H << 32) | L; +#endif +} + +static double CPUFreq() +{ + struct timeval tstart, tend; + ULONGLONG start, end; + + struct timezone tzone; + memset(&tzone, 0, sizeof(tzone)); + + start = rdtsc(); + gettimeofday(&tstart, &tzone); + + usleep(1000); // 1ms + + end = rdtsc(); + gettimeofday(&tend, &tzone); + + ULONGLONG usec = ((tend.tv_sec - tstart.tv_sec)*1e6) + + (tend.tv_usec - tstart.tv_usec); + + if (!usec) return 0; + return (end - start) / usec; +} + +static ULONGLONG cpu_speed = CPUFreq() * 1e3; // 1000 * 1e6 => ns to ms +typedef ULONGLONG (*GetTickCount64FallbackCB)(void); +inline ULONGLONG FastTickCount() +{ + return rdtsc() / cpu_speed; +} +static GetTickCount64FallbackCB getTickCount64FallbackCB = cpu_speed ? FastTickCount : GetTickCount64Fallback; +#endif + +/*++ +Function: + GetTickCount64 + +Returns a 64-bit tick count with a millisecond resolution. It tries its best +to return monotonically increasing counts and avoid being affected by changes +to the system clock (either due to drift or due to explicit changes to system +time). +--*/ +PALAPI +ULONGLONG +GetTickCount64() +{ +#if defined(_X86_) || defined(__AMD64__) || defined(__x86_64__) + return getTickCount64FallbackCB(); +#else + return GetTickCount64Fallback(); +#endif +}