Skip to content

Commit

Permalink
Next prototype of the framework integration (#100)
Browse files Browse the repository at this point in the history
Provide a mechanism for a chain of modules to share a resource, that can be e.g. CUDA device memory or a CUDA stream.
Minimize data movements between the CPU and the device, and support multiple devices.
Allow the same job configuration to be used on all hardware combinations.

See HeterogeneousCore/CUDACore/README.md for a more detailed description and examples.
  • Loading branch information
makortel authored and fwyzard committed Dec 29, 2020
1 parent 12c6b19 commit 034423b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 9 deletions.
10 changes: 4 additions & 6 deletions SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
#include <mutex>

#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudastdAlgorithm.h"
#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
Expand All @@ -14,7 +12,7 @@
using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU;

__global__
void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, const SiPixelClustersCUDA::DeviceConstView *cc, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n)
void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, clusterSLOnGPU::HitsOnGPU const * hhp, ClusterSLGPU const * slp, uint32_t n)
{
assert(slp == slp->me_d);

Expand All @@ -35,7 +33,7 @@ void simLink(const SiPixelDigisCUDA::DeviceConstView *dd, uint32_t ndigis, const

auto ch = pixelgpudetails::pixelToChannel(dd->xx(i), dd->yy(i));
auto first = hh.hitsModuleStart_d[id];
auto cl = first + cc->clus(i);
auto cl = first + dd->clus(i);
assert(cl < 2000 * blockDim.x);

const std::array<uint32_t, 4> me{{id, ch, 0, 0}};
Expand Down Expand Up @@ -162,7 +160,7 @@ namespace clusterSLOnGPU {
cudaCheck(cudaMemsetAsync(slgpu.n2_d, 0, (ClusterSLGPU::MaxNumModules*256)*sizeof(uint32_t), stream));
}

void Kernel::algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) {
void Kernel::algo(SiPixelDigisCUDA const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream) {
zero(stream.id());

ClusterSLGPU const & sl = slgpu;
Expand All @@ -177,7 +175,7 @@ namespace clusterSLOnGPU {
blocks = (ndigis + threadsPerBlock - 1) / threadsPerBlock;

assert(sl.me_d);
simLink<<<blocks, threadsPerBlock, 0, stream.id()>>>(dd.digis_d.view(), ndigis, dd.clusters_d.view(), hh.gpu_d, sl.me_d, n);
simLink<<<blocks, threadsPerBlock, 0, stream.id()>>>(dd.view(), ndigis, hh.gpu_d, sl.me_d, n);
cudaCheck(cudaGetLastError());

if (doDump) {
Expand Down
5 changes: 2 additions & 3 deletions SimTracker/TrackerHitAssociation/plugins/ClusterSLOnGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <cuda_runtime.h>
#include <cuda/api_wrappers.h>

#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "RecoLocalTracker/SiPixelClusterizer/plugins/siPixelRawToClusterHeterogeneousProduct.h"
#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h"

#include "trackerHitAssociationHeterogeneousProduct.h"
Expand All @@ -15,15 +15,14 @@ namespace clusterSLOnGPU {
using ClusterSLGPU = trackerHitAssociationHeterogeneousProduct::ClusterSLGPU;
using GPUProduct = trackerHitAssociationHeterogeneousProduct::GPUProduct;

using DigisOnGPU = siPixelRawToClusterHeterogeneousProduct::GPUProduct;
using HitsOnGPU = siPixelRecHitsHeterogeneousProduct::HitsOnGPU;
using HitsOnCPU = siPixelRecHitsHeterogeneousProduct::HitsOnCPU;

class Kernel {
public:
Kernel(cuda::stream_t<>& stream, bool dump);
~Kernel() {deAlloc();}
void algo(DigisOnGPU const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream);
void algo(SiPixelDigisCUDA const & dd, uint32_t ndigis, HitsOnCPU const & hh, uint32_t nhits, uint32_t n, cuda::stream_t<>& stream);
GPUProduct getProduct() { return GPUProduct{slgpu.me_d};}

private:
Expand Down

0 comments on commit 034423b

Please sign in to comment.