From 641e820257d19a341e6fe0b5623d70a7893b07e5 Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Sat, 18 Oct 2014 02:50:02 +0200 Subject: [PATCH] Profiling: measure time on POSIX systems using clock_gettime --- Source/Core/Common/CMakeLists.txt | 1 + Source/Core/Common/PerformanceCounter.cpp | 48 +++++++++++++++++++ Source/Core/Common/PerformanceCounter.h | 17 +++++++ Source/Core/Core/PowerPC/Jit64/Jit.cpp | 6 +-- Source/Core/Core/PowerPC/JitArm32/Jit.cpp | 3 +- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 2 - Source/Core/Core/PowerPC/JitInterface.cpp | 17 ++----- Source/Core/Core/PowerPC/Profiler.h | 27 ++++++----- 8 files changed, 86 insertions(+), 35 deletions(-) create mode 100644 Source/Core/Common/PerformanceCounter.cpp create mode 100644 Source/Core/Common/PerformanceCounter.h diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index 1a93dfd0f6ec..9a0d508f4c8e 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRCS BreakPoints.cpp NandPaths.cpp Network.cpp PcapFile.cpp + PerformanceCounter.cpp SettingsHandler.cpp SDCardUtil.cpp StringUtil.cpp diff --git a/Source/Core/Common/PerformanceCounter.cpp b/Source/Core/Common/PerformanceCounter.cpp new file mode 100644 index 000000000000..64a9a0b20a98 --- /dev/null +++ b/Source/Core/Common/PerformanceCounter.cpp @@ -0,0 +1,48 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#if !defined(_WIN32) + +#include +#include + +#include + +#include "Common/CommonTypes.h" +#include "Common/PerformanceCounter.h" + +#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 +#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK>0 +#define DOLPHIN_CLOCK CLOCK_MONOTONIC +#else +#define DOLPHIN_CLOCK CLOCK_REALTIME +#endif +#endif + +bool QueryPerformanceCounter(u64* out) +{ +#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 + timespec tp; + if (clock_gettime(DOLPHIN_CLOCK, &tp)) + return false; + *out = (u64) tp.tv_nsec + (u64) 1000000000 * (u64) tp.tv_sec; + return true; +#else + *out = 0; + return false; +#endif +} + +bool QueryPerformanceFrequency(u64* out) +{ +#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0 + *out = 1000000000; + return true; +#else + *out = 1; + return false; +#endif +} + +#endif diff --git a/Source/Core/Common/PerformanceCounter.h b/Source/Core/Common/PerformanceCounter.h new file mode 100644 index 000000000000..561cb95b48dd --- /dev/null +++ b/Source/Core/Common/PerformanceCounter.h @@ -0,0 +1,17 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#if !defined(_WIN32) + +#include + +#include "Common/CommonTypes.h" + +typedef u64 LARGE_INTEGER; +bool QueryPerformanceCounter(u64* out); +bool QueryPerformanceFrequency(u64* lpFrequency); + +#endif diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index c9e805784758..2d13a55137c1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -575,13 +575,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { MOV(64, R(RSCRATCH), Imm64((u64)&b->runCount)); ADD(32, MatR(RSCRATCH), Imm8(1)); -#ifdef _WIN32 b->ticCounter = 0; b->ticStart = 0; b->ticStop = 0; -#else -//TODO -#endif // get start tic PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart); } @@ -625,7 +621,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // get end tic PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); // tic counter += (end tic - start tic) - PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); + PROFILER_UPDATE_TIME(b); PROFILER_VPOP; } } diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index 95f8cf2fd506..de9abfa98878 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -397,7 +397,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo // get end tic PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); // tic counter += (end tic - start tic) - PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); + PROFILER_UPDATE_TIME(&b); PROFILER_VPOP; } } @@ -467,4 +467,3 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo FlushIcache(); return start; } - diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index c8049295e32a..0a793443098c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -51,13 +51,11 @@ struct JitBlock }; std::vector linkData; -#ifdef _WIN32 // we don't really need to save start and stop // TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;) u64 ticStart; // for profiling - time. u64 ticStop; // for profiling - time. u64 ticCounter; // for profiling - time. -#endif #ifdef USE_VTUNE char blockName[32]; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index fd7392f6b3ed..ca4e3b781798 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -8,6 +8,8 @@ #ifdef _WIN32 #include +#else +#include "Common/PerformanceCounter.h" #endif #include "Core/ConfigManager.h" @@ -140,26 +142,20 @@ namespace JitInterface std::vector stats; stats.reserve(jit->GetBlockCache()->GetNumBlocks()); u64 cost_sum = 0; - #ifdef _WIN32 u64 timecost_sum = 0; u64 countsPerSec; - QueryPerformanceFrequency((LARGE_INTEGER *)&countsPerSec); - #endif + QueryPerformanceFrequency((LARGE_INTEGER*)&countsPerSec); for (int i = 0; i < jit->GetBlockCache()->GetNumBlocks(); i++) { const JitBlock *block = jit->GetBlockCache()->GetBlock(i); // Rough heuristic. Mem instructions should cost more. u64 cost = block->originalSize * (block->runCount / 4); - #ifdef _WIN32 u64 timecost = block->ticCounter; - #endif // Todo: tweak. if (block->runCount >= 1) stats.push_back(BlockStat(i, cost)); cost_sum += cost; - #ifdef _WIN32 timecost_sum += timecost; - #endif } sort(stats.begin(), stats.end()); @@ -177,16 +173,11 @@ namespace JitInterface { std::string name = g_symbolDB.GetDescription(block->originalAddress); double percent = 100.0 * (double)stat.cost / (double)cost_sum; - #ifdef _WIN32 double timePercent = 100.0 * (double)block->ticCounter / (double)timecost_sum; - fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t%" PRIu64 "\t%.2lf\t%llf\t%lf\t%i\n", + fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t%" PRIu64 "\t%.2f\t%.2f\t%.2f\t%i\n", block->originalAddress, name.c_str(), stat.cost, block->ticCounter, percent, timePercent, (double)block->ticCounter*1000.0/(double)countsPerSec, block->codeSize); - #else - fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t???\t%.2lf\t???\t???\t%i\n", - block->originalAddress, name.c_str(), stat.cost, percent, block->codeSize); - #endif } } } diff --git a/Source/Core/Core/PowerPC/Profiler.h b/Source/Core/Core/PowerPC/Profiler.h index 7b515892252b..c7c3fe9c03e0 100644 --- a/Source/Core/Core/PowerPC/Profiler.h +++ b/Source/Core/Core/PowerPC/Profiler.h @@ -5,22 +5,26 @@ #pragma once +#include #include #include "Common/CommonTypes.h" -#ifdef _WIN32 +#include "Common/PerformanceCounter.h" + +#if defined(_M_X86_64) #define PROFILER_QUERY_PERFORMANCE_COUNTER(pt) \ - LEA(64, ABI_PARAM1, M(pt)); \ - CALL(QueryPerformanceCounter) + MOV(64, R(ABI_PARAM1), Imm64((u64) pt)); \ + CALL((const void*) QueryPerformanceCounter) -// asm write : (u64) dt += t1-t0 -#define PROFILER_ADD_DIFF_LARGE_INTEGER(pdt, pt1, pt0) \ - MOV(64, R(RSCRATCH), M(pt1)); \ - SUB(64, R(RSCRATCH), M(pt0)); \ - ADD(64, R(RSCRATCH), M(pdt)); \ - MOV(64, M(pdt), R(RSCRATCH)); +// block->ticCounter += block->ticStop - block->ticStart +#define PROFILER_UPDATE_TIME(block) \ + MOV(64, R(RSCRATCH2), Imm64((u64) block)); \ + MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStop))); \ + SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStart))); \ + ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter))); \ + MOV(64, MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter)), R(RSCRATCH)); #define PROFILER_VPUSH \ BitSet32 registersInUse = CallerSavedRegistersInUse(); \ @@ -32,10 +36,7 @@ #else #define PROFILER_QUERY_PERFORMANCE_COUNTER(pt) - -// TODO: Implement generic ways to do this cleanly with all supported architectures -// asm write : (u64) dt += t1-t0 -#define PROFILER_ADD_DIFF_LARGE_INTEGER(pdt, pt1, pt0) +#define PROFILER_UPDATE_TIME(b) #define PROFILER_VPUSH #define PROFILER_VPOP