Permalink
Browse files

Merge pull request #10475 from unknownbrackets/profiler

Make the profiler thread safe
  • Loading branch information...
hrydgard committed Dec 30, 2017
2 parents eb35e88 + 52c4293 commit 5531afbaf4c683ebbf70d785c2fa4ed75f80ae86
Showing with 150 additions and 50 deletions.
  1. +7 −1 GPU/Common/TextureCacheCommon.cpp
  2. +10 −2 UI/ProfilerDraw.cpp
  3. +124 −40 ext/native/profiler/profiler.cpp
  4. +9 −7 ext/native/profiler/profiler.h
@@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <algorithm>
#include "profiler/profiler.h"
#include "Common/ColorConv.h"
#include "Common/MemoryUtil.h"
#include "Core/Config.h"
@@ -1472,6 +1473,7 @@ void TextureCacheCommon::ApplyTexture() {
}
if (nextNeedsRehash_) {
PROFILE_THIS_SCOPE("texhash");
// Update the hash on the texture.
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
@@ -1547,7 +1549,11 @@ void TextureCacheCommon::DeleteTexture(TexCache::iterator it) {
bool TextureCacheCommon::CheckFullHash(TexCacheEntry *entry, bool &doDelete) {
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
u32 fullhash = QuickTexHash(replacer_, entry->addr, entry->bufw, w, h, GETextureFormat(entry->format), entry);
u32 fullhash;
{
PROFILE_THIS_SCOPE("texhash");
fullhash = QuickTexHash(replacer_, entry->addr, entry->bufw, w, h, GETextureFormat(entry->format), entry);
}
if (fullhash == entry->fullhash) {
if (g_Config.bTextureBackoffCache) {
View
@@ -55,6 +55,7 @@ void DrawProfile(UIContext &ui) {
#ifdef USE_PROFILER
PROFILE_THIS_SCOPE("timing");
int numCategories = Profiler_GetNumCategories();
int numThreads = Profiler_GetNumThreads();
int historyLength = Profiler_GetHistoryLength();
ui.SetFontStyle(ui.theme->uiFont);
@@ -63,8 +64,11 @@ void DrawProfile(UIContext &ui) {
float legendMinVal = lastMaxVal * (1.0f / 120.0f);
std::vector<float> history(historyLength);
std::vector<int> slowestThread(historyLength);
std::vector<ProfileCatStatus> catStatus(numCategories);
Profiler_GetSlowestThreads(&slowestThread[0], historyLength);
float rowH = 30.0f;
float legendHeight = 0.0f;
float legendWidth = 80.0f;
@@ -75,7 +79,7 @@ void DrawProfile(UIContext &ui) {
continue;
}
Profiler_GetHistory(i, &history[0], historyLength);
Profiler_GetSlowestHistory(i, &slowestThread[0], &history[0], historyLength);
catStatus[i] = PROFILE_CAT_NOLEGEND;
for (int j = 0; j < historyLength; ++j) {
if (history[j] > legendMinVal) {
@@ -94,6 +98,10 @@ void DrawProfile(UIContext &ui) {
}
legendWidth += 20.0f;
if (legendHeight > ui.GetBounds().h) {
legendHeight = ui.GetBounds().h;
}
float legendStartY = legendHeight > ui.GetBounds().centerY() ? ui.GetBounds().y2() - legendHeight : ui.GetBounds().centerY();
float legendStartX = ui.GetBounds().x2() - std::min(legendWidth, 200.0f);
@@ -149,7 +157,7 @@ void DrawProfile(UIContext &ui) {
if (catStatus[i] == PROFILE_CAT_IGNORE) {
continue;
}
Profiler_GetHistory(i, &history[0], historyLength);
Profiler_GetSlowestHistory(i, &slowestThread[0], &history[0], historyLength);
float x = 10;
uint32_t col = nice_colors[i % ARRAY_SIZE(nice_colors)];
@@ -1,8 +1,10 @@
// Ultra-lightweight category profiler with history.
#include <vector>
#include <string>
#include <algorithm>
#include <mutex>
#include <map>
#include <string>
#include <vector>
#include <string.h>
@@ -11,8 +13,9 @@
#include "gfx_es2/draw_buffer.h"
#include "profiler/profiler.h"
#define MAX_CATEGORIES 32 // Can be any number
#define MAX_DEPTH 16 // Can be any number
#define MAX_CATEGORIES 64 // Can be any number, represents max profiled names.
#define MAX_DEPTH 16 // Can be any number, represents max nesting depth of profiled names.
#define MAX_THREADS 4 // Can be any number, represents concurrent threads calling the profiler.
#define HISTORY_SIZE 128 // Must be power of 2
#ifndef _DEBUG
@@ -27,33 +30,63 @@ struct Category {
struct CategoryFrame {
CategoryFrame() {
memset(time_taken, 0, sizeof(time_taken));
memset(count, 0, sizeof(count));
}
float time_taken[MAX_CATEGORIES];
int count[MAX_CATEGORIES];
};
struct Profiler {
int frameCount;
int historyPos;
int depth;
int parentCategory[MAX_DEPTH];
double eventStart[MAX_CATEGORIES];
int depth[MAX_THREADS];
int parentCategory[MAX_THREADS][MAX_DEPTH];
double eventStart[MAX_THREADS][MAX_CATEGORIES];
double curFrameStart;
};
static Profiler profiler;
static Category categories[MAX_CATEGORIES];
static std::mutex categoriesLock;
static int threadIdAfterLast = 0;
static std::mutex threadsLock;
static CategoryFrame *history;
#if MAX_THREADS > 1
thread_local int profilerThreadId = -1;
#else
static int profilerThreadId = 0;
#endif
void internal_profiler_init() {
memset(&profiler, 0, sizeof(profiler));
for (int i = 0; i < MAX_DEPTH; i++) {
profiler.parentCategory[i] = -1;
#if MAX_THREADS == 1
threadIdAfterLast = 1;
#endif
for (int i = 0; i < MAX_THREADS; i++) {
for (int j = 0; j < MAX_DEPTH; j++) {
profiler.parentCategory[i][j] = -1;
}
}
history = new CategoryFrame[HISTORY_SIZE];
history = new CategoryFrame[HISTORY_SIZE * MAX_THREADS];
}
int internal_profiler_find_cat(const char *category_name) {
static int internal_profiler_find_thread() {
int thread_id = profilerThreadId;
if (thread_id != -1) {
return thread_id;
}
std::lock_guard<std::mutex> guard(threadsLock);
if (threadIdAfterLast < MAX_THREADS) {
thread_id = threadIdAfterLast++;
profilerThreadId = thread_id;
return thread_id;
}
// Just keep reusing the last one.
return threadIdAfterLast - 1;
}
int internal_profiler_find_cat(const char *category_name, bool create_missing) {
int i;
for (i = 0; i < MAX_CATEGORIES; i++) {
const char *catname = categories[i].name;
@@ -68,88 +101,98 @@ int internal_profiler_find_cat(const char *category_name) {
}
}
if (i < MAX_CATEGORIES && category_name) {
categories[i].name = category_name;
if (i < MAX_CATEGORIES && category_name && create_missing) {
std::lock_guard<std::mutex> guard(categoriesLock);
int race_check = internal_profiler_find_cat(category_name, false);
if (race_check == -1)
categories[i].name = category_name;
return i;
}
return -1;
}
// Suspend, also used to prepare for leaving.
static void internal_profiler_suspend(int category, double now) {
double diff = now - profiler.eventStart[category];
history[profiler.historyPos].time_taken[category] += (float)diff;
profiler.eventStart[category] = 0.0;
static void internal_profiler_suspend(int thread_id, int category, double now) {
double diff = now - profiler.eventStart[thread_id][category];
history[MAX_THREADS * profiler.historyPos + thread_id].time_taken[category] += (float)diff;
profiler.eventStart[thread_id][category] = 0.0;
}
// Resume, also used as part of entering.
static void internal_profiler_resume(int category, double now) {
profiler.eventStart[category] = now;
static void internal_profiler_resume(int thread_id, int category, double now) {
profiler.eventStart[thread_id][category] = now;
}
int internal_profiler_enter(const char *category_name) {
int category = internal_profiler_find_cat(category_name);
int internal_profiler_enter(const char *category_name, int *out_thread_id) {
int category = internal_profiler_find_cat(category_name, true);
int thread_id = internal_profiler_find_thread();
if (category == -1 || !history) {
return category;
}
if (profiler.eventStart[category] == 0.0f) {
int &depth = profiler.depth[thread_id];
if (profiler.eventStart[thread_id][category] == 0.0f) {
double now = real_time_now();
int parent = profiler.parentCategory[profiler.depth];
int parent = profiler.parentCategory[thread_id][depth];
// Temporarily suspend the parent on entering a child.
if (parent != -1) {
internal_profiler_suspend(parent, now);
internal_profiler_suspend(thread_id, parent, now);
}
internal_profiler_resume(category, now);
internal_profiler_resume(thread_id, category, now);
} else {
DLOG("profiler: recursive enter (%i - %s)", category, category_name);
}
profiler.depth++;
profiler.parentCategory[profiler.depth] = category;
depth++;
profiler.parentCategory[thread_id][depth] = category;
*out_thread_id = thread_id;
return category;
}
void internal_profiler_leave(int category) {
void internal_profiler_leave(int thread_id, int category) {
if (category == -1 || !history) {
return;
}
int &depth = profiler.depth[thread_id];
if (category < 0 || category >= MAX_CATEGORIES) {
ELOG("Bad category index %d", category);
profiler.depth--;
depth--;
return;
}
double now = real_time_now();
profiler.depth--;
if (profiler.depth < 0) {
depth--;
if (depth < 0) {
FLOG("Profiler enter/leave mismatch!");
}
int parent = profiler.parentCategory[profiler.depth];
int parent = profiler.parentCategory[thread_id][depth];
// When there's recursion, we don't suspend or resume.
if (parent != category) {
internal_profiler_suspend(category, now);
history[profiler.historyPos].count[category]++;
internal_profiler_suspend(thread_id, category, now);
history[MAX_THREADS * profiler.historyPos + thread_id].count[category]++;
if (parent != -1) {
// Resume tracking the parent.
internal_profiler_resume(parent, now);
internal_profiler_resume(thread_id, parent, now);
}
}
}
void internal_profiler_end_frame() {
if (profiler.depth != 0) {
int thread_id = internal_profiler_find_thread();
if (profiler.depth[thread_id] != 0) {
// Threads may be off, but they'll fall into another frame.
FLOG("Can't be inside a profiler scope at end of frame!");
}
profiler.curFrameStart = real_time_now();
profiler.historyPos++;
profiler.historyPos &= (HISTORY_SIZE - 1);
memset(&history[profiler.historyPos], 0, sizeof(history[profiler.historyPos]));
memset(&history[MAX_THREADS * profiler.historyPos], 0, sizeof(CategoryFrame) * MAX_THREADS);
}
const char *Profiler_GetCategoryName(int i) {
@@ -168,13 +211,54 @@ int Profiler_GetNumCategories() {
return 0;
}
void Profiler_GetHistory(int category, float *data, int count) {
int Profiler_GetNumThreads() {
return threadIdAfterLast;
}
void Profiler_GetSlowestThreads(int *data, int count) {
int numCategories = Profiler_GetNumCategories();
for (int i = 0; i < HISTORY_SIZE; i++) {
int x = i - count + profiler.historyPos + 1;
while (x < 0)
x += HISTORY_SIZE;
while (x >= HISTORY_SIZE)
x -= HISTORY_SIZE;
float slowestTime = 0.0f;
data[i] = 0;
for (int thread = 0; thread < threadIdAfterLast; ++thread) {
float sum = 0.0f;
for (int c = 0; c < numCategories; ++c) {
sum += history[MAX_THREADS * x + thread].time_taken[c];
}
if (sum > slowestTime) {
slowestTime = sum;
data[i] = thread;
}
}
}
}
void Profiler_GetSlowestHistory(int category, int *slowestThreads, float *data, int count) {
for (int i = 0; i < HISTORY_SIZE; i++) {
int x = i - count + profiler.historyPos + 1;
while (x < 0)
x += HISTORY_SIZE;
while (x >= HISTORY_SIZE)
x -= HISTORY_SIZE;
int thread = slowestThreads[i];
data[i] = history[MAX_THREADS * x + thread].time_taken[category];
}
}
void Profiler_GetHistory(int category, int thread, float *data, int count) {
for (int i = 0; i < HISTORY_SIZE; i++) {
int x = i - count + profiler.historyPos + 1;
while (x < 0)
x += HISTORY_SIZE;
while (x >= HISTORY_SIZE)
x -= HISTORY_SIZE;
data[i] = history[x].time_taken[category];
data[i] = history[MAX_THREADS * x + thread].time_taken[category];
}
}
@@ -2,8 +2,6 @@
#include <cstdint>
// NOTE : This profiler is very single-threaded. Cannot be used from multiple threads yet.
// #define USE_PROFILER
#ifdef USE_PROFILER
@@ -13,24 +11,28 @@ class DrawBuffer;
void internal_profiler_init();
void internal_profiler_end_frame();
int internal_profiler_enter(const char *category_name); // Returns the category number.
void internal_profiler_leave(int category);
int internal_profiler_enter(const char *category_name, int *thread_id); // Returns the category number.
void internal_profiler_leave(int thread_id, int category);
const char *Profiler_GetCategoryName(int i);
int Profiler_GetNumCategories();
int Profiler_GetHistoryLength();
void Profiler_GetHistory(int i, float *data, int count);
int Profiler_GetNumThreads();
void Profiler_GetSlowestThreads(int *data, int count);
void Profiler_GetSlowestHistory(int category, int *slowestThreads, float *data, int count);
void Profiler_GetHistory(int category, int thread, float *data, int count);
class ProfileThis {
public:
ProfileThis(const char *category) {
cat_ = internal_profiler_enter(category);
cat_ = internal_profiler_enter(category, &thread_);
}
~ProfileThis() {
internal_profiler_leave(cat_);
internal_profiler_leave(thread_, cat_);
}
private:
int cat_;
int thread_;
};
#define PROFILE_INIT() internal_profiler_init();

0 comments on commit 5531afb

Please sign in to comment.