Skip to content

Commit

Permalink
Fix verbose string + refactorings
Browse files Browse the repository at this point in the history
  • Loading branch information
kimkulling committed Oct 7, 2023
1 parent 3ce8af1 commit 2998ad8
Showing 1 changed file with 111 additions and 102 deletions.
213 changes: 111 additions & 102 deletions code/PostProcessing/ImproveCacheLocality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
Open Asset Import Library (assimp)
---------------------------------------------------------------------------
Copyright (c) 2006-2022, assimp team
Copyright (c) 2006-2023, assimp team
All rights reserved.
Expand Down Expand Up @@ -59,31 +57,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include <stack>

using namespace Assimp;
namespace Assimp {

// ------------------------------------------------------------------------------------------------
// Constructor to be privately used by Importer
ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
: mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() :
mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
// empty
}

// ------------------------------------------------------------------------------------------------
// Returns whether the processing step is present in the given flag field.
bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const {
bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const {
return (pFlags & aiProcess_ImproveCacheLocality) != 0;
}

// ------------------------------------------------------------------------------------------------
// Setup configuration
void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) {
void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) {
// AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE);
}

// ------------------------------------------------------------------------------------------------
// Executes the post processing step on the given imported data.
void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
void ImproveCacheLocalityProcess::Execute(aiScene *pScene) {
if (!pScene->mNumMeshes) {
ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
return;
Expand All @@ -93,11 +91,11 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {

float out = 0.f;
unsigned int numf = 0, numm = 0;
for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){
const float res = ProcessMesh( pScene->mMeshes[a],a);
for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) {
const float res = ProcessMesh(pScene->mMeshes[a], a);
if (res) {
numf += pScene->mMeshes[a]->mNumFaces;
out += res;
out += res;
++numm;
}
}
Expand All @@ -109,9 +107,54 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
}
}

// ------------------------------------------------------------------------------------------------
static ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd,
unsigned int configCacheDepth, unsigned int meshNum) {
ai_real fACMR = 0.0f;
unsigned int *piFIFOStack = new unsigned int[configCacheDepth];
memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int));
unsigned int *piCur = piFIFOStack;
const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth;

// count the number of cache misses
unsigned int iCacheMisses = 0;
for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
for (unsigned int qq = 0; qq < 3; ++qq) {
bool bInCache = false;
for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) {
if (*pp == pcFace->mIndices[qq]) {
// the vertex is in cache
bInCache = true;
break;
}
}
if (!bInCache) {
++iCacheMisses;
if (piCurEnd == piCur) {
piCur = piFIFOStack;
}
*piCur++ = pcFace->mIndices[qq];
}
}
}
delete[] piFIFOStack;
fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces;
if (3.0 == fACMR) {
char szBuff[128]; // should be sufficiently large in every case

// the JoinIdenticalVertices process has not been executed on this
// mesh, otherwise this value would normally be at least minimally
// smaller than 3.0 ...
ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum);
ASSIMP_LOG_WARN(szBuff);
return static_cast<ai_real>(0.f);
}
return fACMR;
}

// ------------------------------------------------------------------------------------------------
// Improves the cache coherency of a specific mesh
ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) {
// TODO: rewrite this to use std::vector or boost::shared_array
ai_assert(nullptr != pMesh);

Expand All @@ -126,91 +169,57 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
return static_cast<ai_real>(0.f);
}

if(pMesh->mNumVertices <= mConfigCacheDepth) {
if (pMesh->mNumVertices <= mConfigCacheDepth) {
return static_cast<ai_real>(0.f);
}

ai_real fACMR = 3.f;
const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces;

// Input ACMR is for logging purposes only
if (!DefaultLogger::isNullLogger()) {

unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
unsigned int* piCur = piFIFOStack;
const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;

// count the number of cache misses
unsigned int iCacheMisses = 0;
for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
for (unsigned int qq = 0; qq < 3;++qq) {
bool bInCache = false;
for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
if (*pp == pcFace->mIndices[qq]) {
// the vertex is in cache
bInCache = true;
break;
}
}
if (!bInCache) {
++iCacheMisses;
if (piCurEnd == piCur) {
piCur = piFIFOStack;
}
*piCur++ = pcFace->mIndices[qq];
}
}
}
delete[] piFIFOStack;
fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces;
if (3.0 == fACMR) {
char szBuff[128]; // should be sufficiently large in every case

// the JoinIdenticalVertices process has not been executed on this
// mesh, otherwise this value would normally be at least minimally
// smaller than 3.0 ...
ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
ASSIMP_LOG_WARN(szBuff);
return static_cast<ai_real>(0.f);
}
if (!DefaultLogger::isNullLogger()) {
fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum);
}

// first we need to build a vertex-triangle adjacency list
VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true);

// build a list to store per-vertex caching time stamps
unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
std::vector<unsigned int> piCachingStamps;
piCachingStamps.resize(pMesh->mNumVertices);
memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int));

// allocate an empty output index buffer. We store the output indices in one large array.
// Since the number of triangles won't change the input faces can be reused. This is how
// we save thousands of redundant mini allocations for aiFace::mIndices
const unsigned int iIdxCnt = pMesh->mNumFaces*3;
unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
unsigned int* piCSIter = piIBOutput;
const unsigned int iIdxCnt = pMesh->mNumFaces * 3;
std::vector<unsigned int> piIBOutput;
piIBOutput.resize(iIdxCnt);
std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin();

// allocate the flag array to hold the information
// whether a face has already been emitted or not
std::vector<bool> abEmitted(pMesh->mNumFaces,false);
std::vector<bool> abEmitted(pMesh->mNumFaces, false);

// dead-end vertex index stack
std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
std::stack<unsigned int, std::vector<unsigned int>> sDeadEndVStack;

// create a copy of the piNumTriPtr buffer
unsigned int* const piNumTriPtr = adj.mLiveTriangles;
unsigned int *const piNumTriPtr = adj.mLiveTriangles;
const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);

// get the largest number of referenced triangles and allocate the "candidate buffer"
unsigned int iMaxRefTris = 0; {
const unsigned int* piCur = adj.mLiveTriangles;
const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
for (;piCur != piCurEnd;++piCur) {
iMaxRefTris = std::max(iMaxRefTris,*piCur);
unsigned int iMaxRefTris = 0;
{
const unsigned int *piCur = adj.mLiveTriangles;
const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices;
for (; piCur != piCurEnd; ++piCur) {
iMaxRefTris = std::max(iMaxRefTris, *piCur);
}
}
ai_assert(iMaxRefTris > 0);
unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
std::vector<unsigned int> piCandidates;
piCandidates.resize(iMaxRefTris * 3);
unsigned int iCacheMisses = 0;

// ...................................................................................
Expand Down Expand Up @@ -245,23 +254,23 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me

int ivdx = 0;
int ics = 1;
int iStampCnt = mConfigCacheDepth+1;
while (ivdx >= 0) {
int iStampCnt = mConfigCacheDepth + 1;
while (ivdx >= 0) {

unsigned int icnt = piNumTriPtrNoModify[ivdx];
unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
unsigned int* piCurCandidate = piCandidates;
unsigned int *piList = adj.GetAdjacentTriangles(ivdx);
std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin();

// get all triangles in the neighborhood
for (unsigned int tri = 0; tri < icnt;++tri) {
for (unsigned int tri = 0; tri < icnt; ++tri) {

// if they have not yet been emitted, add them to the output IB
const unsigned int fidx = *piList++;
if (!abEmitted[fidx]) {
if (!abEmitted[fidx]) {

// so iterate through all vertices of the current triangle
const aiFace* pcFace = &pMesh->mFaces[ fidx ];
unsigned nind = pcFace->mNumIndices;
const aiFace *pcFace = &pMesh->mFaces[fidx];
const unsigned nind = pcFace->mNumIndices;
for (unsigned ind = 0; ind < nind; ind++) {
unsigned dp = pcFace->mIndices[ind];

Expand All @@ -281,7 +290,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
*piCSIter++ = dp;

// if the vertex is not yet in cache, set its cache count
if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) {
if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) {
piCachingStamps[dp] = iStampCnt++;
++iCacheMisses;
}
Expand All @@ -297,16 +306,16 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
// get next fanning vertex
ivdx = -1;
int max_priority = -1;
for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) {
const unsigned int dp = *piCur;

// must have live triangles
if (piNumTriPtr[dp] > 0) {
if (piNumTriPtr[dp] > 0) {
int priority = 0;

// will the vertex be in cache, even after fanning occurs?
unsigned int tmp;
if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) {
if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) {
priority = tmp;
}

Expand All @@ -324,7 +333,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
while (!sDeadEndVStack.empty()) {
unsigned int iCachedIdx = sDeadEndVStack.top();
sDeadEndVStack.pop();
if (piNumTriPtr[ iCachedIdx ] > 0) {
if (piNumTriPtr[iCachedIdx] > 0) {
ivdx = iCachedIdx;
break;
}
Expand All @@ -333,9 +342,9 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
if (-1 == ivdx) {
// well, there isn't such a vertex. Simply get the next vertex in input order and
// hope it is not too bad ...
while (ics < (int)pMesh->mNumVertices) {
while (ics < (int)pMesh->mNumVertices) {
++ics;
if (piNumTriPtr[ics] > 0) {
if (piNumTriPtr[ics] > 0) {
ivdx = ics;
break;
}
Expand All @@ -345,29 +354,29 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
}
ai_real fACMR2 = 0.0f;
if (!DefaultLogger::isNullLogger()) {
fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;

fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces);
const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f;
// very intense verbose logging ... prepare for much text if there are many meshes
if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR);
}

fACMR2 *= pMesh->mNumFaces;
}

// sort the output index buffer back to the input array
piCSIter = piIBOutput;
for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
piCSIter = piIBOutput.begin();
for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
unsigned nind = pcFace->mNumIndices;
unsigned * ind = pcFace->mIndices;
if (nind > 0) ind[0] = *piCSIter++;
if (nind > 1) ind[1] = *piCSIter++;
if (nind > 2) ind[2] = *piCSIter++;
unsigned *ind = pcFace->mIndices;
if (nind > 0)
ind[0] = *piCSIter++;
if (nind > 1)
ind[1] = *piCSIter++;
if (nind > 2)
ind[2] = *piCSIter++;
}

// delete temporary storage
delete[] piCachingStamps;
delete[] piIBOutput;
delete[] piCandidates;

return fACMR2;
}

} // namespace Assimp

0 comments on commit 2998ad8

Please sign in to comment.