Skip to content

Commit

Permalink
Make clusterizer kernels independent of the grid size (#588)
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn authored and fwyzard committed Dec 29, 2020
1 parent ddf8b5a commit e149c26
Show file tree
Hide file tree
Showing 3 changed files with 304 additions and 322 deletions.
173 changes: 87 additions & 86 deletions RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,105 +19,106 @@ namespace gpuClustering {
uint32_t const* __restrict__ moduleId, // module id of each module
int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
uint32_t numElements) {
if (blockIdx.x >= moduleStart[0])
return;

auto firstPixel = moduleStart[1 + blockIdx.x];
auto thisModuleId = id[firstPixel];
assert(thisModuleId < MaxNumModules);
assert(thisModuleId == moduleId[blockIdx.x]);
__shared__ int32_t charge[MaxNumClustersPerModules];
__shared__ uint8_t ok[MaxNumClustersPerModules];
__shared__ uint16_t newclusId[MaxNumClustersPerModules];

auto nclus = nClustersInModule[thisModuleId];
if (nclus == 0)
return;
auto firstModule = blockIdx.x;
auto endModule = moduleStart[0];
for (auto module = firstModule; module < endModule; module += gridDim.x) {
auto firstPixel = moduleStart[1 + module];
auto thisModuleId = id[firstPixel];
assert(thisModuleId < MaxNumModules);
assert(thisModuleId == moduleId[module]);

auto nclus = nClustersInModule[thisModuleId];
if (nclus == 0)
continue;

if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules)
printf("Warning too many clusters in module %d in block %d: %d > %d\n",
thisModuleId,
blockIdx.x,
nclus,
MaxNumClustersPerModules);

auto first = firstPixel + threadIdx.x;

if (nclus > MaxNumClustersPerModules) {
// remove excess FIXME find a way to cut charge first....
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
if (clusterId[i] >= MaxNumClustersPerModules) {
id[i] = InvId;
clusterId[i] = InvId;
}
}
nclus = MaxNumClustersPerModules;
}

if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules)
printf("Warning too many clusters in module %d in block %d: %d > %d\n",
thisModuleId,
blockIdx.x,
nclus,
MaxNumClustersPerModules);
#ifdef GPU_DEBUG
if (thisModuleId % 100 == 1)
if (threadIdx.x == 0)
printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
#endif

auto first = firstPixel + threadIdx.x;
assert(nclus <= MaxNumClustersPerModules);
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
charge[i] = 0;
}
__syncthreads();

if (nclus > MaxNumClustersPerModules) {
// remove excess FIXME find a way to cut charge first....
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
if (clusterId[i] >= MaxNumClustersPerModules) {
id[i] = InvId;
clusterId[i] = InvId;
}
atomicAdd(&charge[clusterId[i]], adc[i]);
}
nclus = MaxNumClustersPerModules;
}
__syncthreads();

#ifdef GPU_DEBUG
if (thisModuleId % 100 == 1)
if (threadIdx.x == 0)
printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x);
#endif
auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?)
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0;
}

__shared__ int32_t charge[MaxNumClustersPerModules];
__shared__ uint8_t ok[MaxNumClustersPerModules];
__shared__ uint16_t newclusId[MaxNumClustersPerModules];
__syncthreads();

// renumber
__shared__ uint16_t ws[32];
cms::cuda::blockPrefixScan(newclusId, nclus, ws);

assert(nclus >= newclusId[nclus - 1]);

if (nclus == newclusId[nclus - 1])
continue;

nClustersInModule[thisModuleId] = newclusId[nclus - 1];
__syncthreads();

// mark bad cluster again
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
if (0 == ok[i])
newclusId[i] = InvId + 1;
}
__syncthreads();

// reassign id
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
clusterId[i] = newclusId[clusterId[i]] - 1;
if (clusterId[i] == InvId)
id[i] = InvId;
}

assert(nclus <= MaxNumClustersPerModules);
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
charge[i] = 0;
}
__syncthreads();

for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
atomicAdd(&charge[clusterId[i]], adc[i]);
}
__syncthreads();

auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?)
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0;
}

__syncthreads();

// renumber
__shared__ uint16_t ws[32];
cms::cuda::blockPrefixScan(newclusId, nclus, ws);

assert(nclus >= newclusId[nclus - 1]);

if (nclus == newclusId[nclus - 1])
return;

nClustersInModule[thisModuleId] = newclusId[nclus - 1];
__syncthreads();

// mark bad cluster again
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
if (0 == ok[i])
newclusId[i] = InvId + 1;
}
__syncthreads();

// reassign id
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
clusterId[i] = newclusId[clusterId[i]] - 1;
if (clusterId[i] == InvId)
id[i] = InvId;
}

//done
//done
} // loop on modules
}

} // namespace gpuClustering
Expand Down
Loading

0 comments on commit e149c26

Please sign in to comment.