Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make clusterizer kernels independent of grid size #588

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 87 additions & 86 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,105 +19,106 @@ namespace gpuClustering {
uint32_t const* __restrict__ moduleId, // module id of each module
int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
uint32_t numElements) {
if (blockIdx.x >= moduleStart[0])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very sorry. code-format has made the few (trivial) changes completely swamped by the new indentation

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem - hiding the whitespace changes in git (diff -w) or in GitHub (gear icon --> Hide whitespace changes) alleviates the issue.

return;

auto firstPixel = moduleStart[1 + blockIdx.x];
auto thisModuleId = id[firstPixel];
assert(thisModuleId < MaxNumModules);
assert(thisModuleId == moduleId[blockIdx.x]);
__shared__ int32_t charge[MaxNumClustersPerModules];
__shared__ uint8_t ok[MaxNumClustersPerModules];
__shared__ uint16_t newclusId[MaxNumClustersPerModules];

auto nclus = nClustersInModule[thisModuleId];
if (nclus == 0)
return;
auto firstModule = blockIdx.x;
auto endModule = moduleStart[0];
for (auto module = firstModule; module < endModule; module += gridDim.x) {
auto firstPixel = moduleStart[1 + module];
auto thisModuleId = id[firstPixel];
assert(thisModuleId < MaxNumModules);
assert(thisModuleId == moduleId[module]);

auto nclus = nClustersInModule[thisModuleId];
if (nclus == 0)
continue;

if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules)
printf("Warning too many clusters in module %d in block %d: %d > %d\n",
thisModuleId,
blockIdx.x,
nclus,
MaxNumClustersPerModules);

auto first = firstPixel + threadIdx.x;

if (nclus > MaxNumClustersPerModules) {
// remove excess FIXME find a way to cut charge first....
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
if (clusterId[i] >= MaxNumClustersPerModules) {
id[i] = InvId;
clusterId[i] = InvId;
}
}
nclus = MaxNumClustersPerModules;
}

if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules)
printf("Warning too many clusters in module %d in block %d: %d > %d\n",
thisModuleId,
blockIdx.x,
nclus,
MaxNumClustersPerModules);
#ifdef GPU_DEBUG
if (thisModuleId % 100 == 1)
if (threadIdx.x == 0)
printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
#endif

auto first = firstPixel + threadIdx.x;
assert(nclus <= MaxNumClustersPerModules);
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
charge[i] = 0;
}
__syncthreads();

if (nclus > MaxNumClustersPerModules) {
// remove excess FIXME find a way to cut charge first....
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
if (clusterId[i] >= MaxNumClustersPerModules) {
id[i] = InvId;
clusterId[i] = InvId;
}
atomicAdd(&charge[clusterId[i]], adc[i]);
}
nclus = MaxNumClustersPerModules;
}
__syncthreads();

#ifdef GPU_DEBUG
if (thisModuleId % 100 == 1)
if (threadIdx.x == 0)
printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x);
#endif
auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?)
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0;
}

__shared__ int32_t charge[MaxNumClustersPerModules];
__shared__ uint8_t ok[MaxNumClustersPerModules];
__shared__ uint16_t newclusId[MaxNumClustersPerModules];
__syncthreads();

// renumber
__shared__ uint16_t ws[32];
cms::cuda::blockPrefixScan(newclusId, nclus, ws);

assert(nclus >= newclusId[nclus - 1]);

if (nclus == newclusId[nclus - 1])
continue;

nClustersInModule[thisModuleId] = newclusId[nclus - 1];
__syncthreads();

// mark bad cluster again
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
if (0 == ok[i])
newclusId[i] = InvId + 1;
}
__syncthreads();

// reassign id
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
clusterId[i] = newclusId[clusterId[i]] - 1;
if (clusterId[i] == InvId)
id[i] = InvId;
}

assert(nclus <= MaxNumClustersPerModules);
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
charge[i] = 0;
}
__syncthreads();

for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
atomicAdd(&charge[clusterId[i]], adc[i]);
}
__syncthreads();

auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?)
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0;
}

__syncthreads();

// renumber
__shared__ uint16_t ws[32];
cms::cuda::blockPrefixScan(newclusId, nclus, ws);

assert(nclus >= newclusId[nclus - 1]);

if (nclus == newclusId[nclus - 1])
return;

nClustersInModule[thisModuleId] = newclusId[nclus - 1];
__syncthreads();

// mark bad cluster again
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
if (0 == ok[i])
newclusId[i] = InvId + 1;
}
__syncthreads();

// reassign id
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
clusterId[i] = newclusId[clusterId[i]] - 1;
if (clusterId[i] == InvId)
id[i] = InvId;
}

//done
//done
} // loop on modules
}

} // namespace gpuClustering
Expand Down
Loading