Skip to content

Commit

Permalink
Detect and remove duplicate pixels
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Aug 2, 2022
1 parent aee2e95 commit ce8a57b
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 5 deletions.
1 change: 1 addition & 0 deletions DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h
Expand Up @@ -55,6 +55,7 @@ namespace sipixelconstants {
inline constexpr uint32_t getRow(uint32_t ww) { return ((ww >> ROW_shift) & ROW_mask); }
inline constexpr uint32_t getDCol(uint32_t ww) { return ((ww >> DCOL_shift) & DCOL_mask); }
inline constexpr uint32_t getPxId(uint32_t ww) { return ((ww >> PXID_shift) & PXID_mask); }
inline constexpr uint32_t removeADC(uint32_t ww) { return (ww & ~(ADC_mask << ADC_shift)); }
} // namespace functions
} // namespace sipixelconstants

Expand Down
Expand Up @@ -656,7 +656,8 @@ namespace pixelgpudetails {
std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n";
#endif

findClus<false><<<blocks, threadsPerBlock, 0, stream>>>(digis_d.view().moduleInd(),
findClus<false><<<blocks, threadsPerBlock, 0, stream>>>(digis_d.view().rawIdArr(),
digis_d.view().moduleInd(),
digis_d.view().xx(),
digis_d.view().yy(),
clusters_d.moduleStart(),
Expand Down Expand Up @@ -763,7 +764,8 @@ namespace pixelgpudetails {
threadsPerBlock = 256;
blocks = phase2PixelTopology::numberOfModules;

findClus<true><<<blocks, threadsPerBlock, 0, stream>>>(digis_d.view().moduleInd(),
findClus<true><<<blocks, threadsPerBlock, 0, stream>>>(digis_d.view().rawIdArr(),
digis_d.view().moduleInd(),
digis_d.view().xx(),
digis_d.view().yy(),
clusters_d.moduleStart(),
Expand Down
Expand Up @@ -39,6 +39,8 @@ namespace gpuClustering {
auto endModule = moduleStart[0];
for (auto module = firstModule; module < endModule; module += gridDim.x) {
auto firstPixel = moduleStart[1 + module];
while (id[firstPixel] == invalidModuleId)
++firstPixel; // could be duplicates!
auto thisModuleId = id[firstPixel];
assert(thisModuleId < nMaxModules);
assert(thisModuleId == moduleId[module]);
Expand Down
79 changes: 78 additions & 1 deletion RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h
Expand Up @@ -7,10 +7,54 @@
#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"

namespace gpuClustering {

// Phase-1 pixel modules
constexpr uint32_t pixelSizeX = 160;
constexpr uint32_t pixelSizeY = 416;

namespace pixelStatus {
// Use 0x00, 0x01, 0x03 so each can be OR'ed on top of the previous ones
enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 };

constexpr uint32_t bits = 2;
constexpr uint32_t mask = (0x01 << bits) - 1;
constexpr uint32_t valuesPerWord = sizeof(uint32_t) * 8 / bits;
constexpr uint32_t size = pixelSizeX * pixelSizeY / valuesPerWord;

__device__ static constexpr inline uint32_t getIndex(uint16_t x, uint16_t y) {
return (pixelSizeX * y + x) / valuesPerWord;
}

__device__ constexpr inline uint32_t getShift(uint16_t x, uint16_t y) { return (x % valuesPerWord) * 2; }

__device__ constexpr inline Status getStatus(uint32_t const* __restrict__ status, uint16_t x, uint16_t y) {
uint32_t index = getIndex(x, y);
uint32_t shift = getShift(x, y);
return Status{(status[index] >> shift) & mask};
}

__device__ constexpr inline bool isDuplicate(uint32_t const* __restrict__ status, uint16_t x, uint16_t y) {
return getStatus(status, x, y) == kDuplicate;
}

__device__ constexpr inline void promote(uint32_t* __restrict__ status, const uint16_t x, const uint16_t y) {
uint32_t index = getIndex(x, y);
uint32_t shift = getShift(x, y);
Status old = getStatus(status, x, y);
if (kDuplicate == old) {
// nothing to do
return;
}
Status target = (kEmpty == old) ? kFound : kDuplicate;
atomicOr(&status[index], static_cast<uint32_t>(target) << shift);
}

} // namespace pixelStatus

#ifdef GPU_DEBUG
__device__ uint32_t gMaxHit = 0;
#endif
Expand Down Expand Up @@ -39,14 +83,19 @@ namespace gpuClustering {
}

template <bool isPhase2>
__global__ void findClus(uint16_t const* __restrict__ id, // module id of each pixel
__global__ void findClus(uint32_t* __restrict__ rawIdArr,
uint16_t* __restrict__ id, // module id of each pixel
uint16_t const* __restrict__ x, // local coordinates of each pixel
uint16_t const* __restrict__ y, //
uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
uint32_t* __restrict__ nClustersInModule, // output: number of clusters found in each module
uint32_t* __restrict__ moduleId, // output: module id of each module
int32_t* __restrict__ clusterId, // output: cluster id of each pixel
int numElements) {
// status is only used for Phase-1, but it cannot be declared conditionally only if isPhase2 is false;
// to minimize the impact on Phase-2 reconstruction it is declared with a very small size.
constexpr const uint32_t pixelStatusSize = isPhase2 ? 1 : pixelStatus::size;
__shared__ uint32_t status[pixelStatusSize]; // packed words array used to store the PixelStatus of each pixel
__shared__ int msize;

auto firstModule = blockIdx.x;
Expand Down Expand Up @@ -114,6 +163,34 @@ namespace gpuClustering {
__syncthreads();
#endif

// remove duplicate pixels
if constexpr (not isPhase2) {
if (msize > 1) {
for (uint32_t i = threadIdx.x; i < pixelStatus::size; i += blockDim.x) {
status[i] = 0;
}
__syncthreads();
for (int i = first; i < msize - 1; i += blockDim.x) {
// skip invalid pixels
if (id[i] == invalidModuleId)
continue;
pixelStatus::promote(status, x[i], y[i]);
}
__syncthreads();
for (int i = first; i < msize - 1; i += blockDim.x) {
// skip invalid pixels
if (id[i] == invalidModuleId)
continue;
if (pixelStatus::isDuplicate(status, x[i], y[i])) {
// printf("found dup %d %d %d %d\n", i, id[i], x[i], y[i]);
id[i] = invalidModuleId;
rawIdArr[i] = 0;
}
}
__syncthreads();
}
}

// fill histo
for (int i = first; i < msize; i += blockDim.x) {
if (id[i] == invalidModuleId) // skip invalid pixels
Expand Down
14 changes: 12 additions & 2 deletions RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h
Expand Up @@ -31,13 +31,15 @@ int main(void) {
constexpr SiPixelClusterThresholds clusterThresholds(kSiPixelClusterThresholdsDefaultPhase1);

// these in reality are already on GPU
auto h_raw = std::make_unique<uint32_t[]>(numElements);
auto h_id = std::make_unique<uint16_t[]>(numElements);
auto h_x = std::make_unique<uint16_t[]>(numElements);
auto h_y = std::make_unique<uint16_t[]>(numElements);
auto h_adc = std::make_unique<uint16_t[]>(numElements);
auto h_clus = std::make_unique<int[]>(numElements);

#ifdef __CUDACC__
auto d_raw = cms::cuda::make_device_unique<uint32_t[]>(numElements, nullptr);
auto d_id = cms::cuda::make_device_unique<uint16_t[]>(numElements, nullptr);
auto d_x = cms::cuda::make_device_unique<uint16_t[]>(numElements, nullptr);
auto d_y = cms::cuda::make_device_unique<uint16_t[]>(numElements, nullptr);
Expand Down Expand Up @@ -265,6 +267,7 @@ int main(void) {

cms::cuda::launch(findClus<false>,
{blocksPerGrid, threadsPerBlock},
d_raw.get(),
d_id.get(),
d_x.get(),
d_y.get(),
Expand Down Expand Up @@ -305,8 +308,15 @@ int main(void) {
h_moduleStart[0] = nModules;
countModules<false>(h_id.get(), h_moduleStart.get(), h_clus.get(), n);
memset(h_clusInModule.get(), 0, maxNumModules * sizeof(uint32_t));
findClus<false>(
h_id.get(), h_x.get(), h_y.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n);
findClus<false>(h_raw.get(),
h_id.get(),
h_x.get(),
h_y.get(),
h_moduleStart.get(),
h_clusInModule.get(),
h_moduleId.get(),
h_clus.get(),
n);

nModules = h_moduleStart[0];
auto nclus = h_clusInModule.get();
Expand Down

0 comments on commit ce8a57b

Please sign in to comment.