[WIP] Add CUDA-side tests

cms-sw · Nov 2, 2022 · 3000469 · 3000469
1 parent 50cbaad
commit 3000469
Show file tree

Hide file tree

Showing 3 changed files with 126 additions and 2 deletions.
diff --git a/HeterogeneousCore/CUDATest/plugins/TestAlgo.cu b/HeterogeneousCore/CUDATest/plugins/TestAlgo.cu
@@ -6,7 +6,7 @@
 
 namespace cudatest {
 
-  static __global__ void testAlgoKernel(cudatest::TestDeviceCollection::View view, int32_t size) {
+  static __global__ void testAlgoFillKernel(cudatest::TestDeviceCollection::View view, int32_t const size) {
     const int32_t thread = blockIdx.x * blockDim.x + threadIdx.x;
     const int32_t stride = blockDim.x * gridDim.x;
     const cudatest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
@@ -19,13 +19,48 @@ namespace cudatest {
     }
   }
 
+  static __global__ void testAlgoCheckKernel(cudatest::TestDeviceCollection::ConstView view, int32_t const size, uint32_t* failed) {
+    const int32_t thread = blockIdx.x * blockDim.x + threadIdx.x;
+    const int32_t stride = blockDim.x * gridDim.x;
+    const cudatest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
+
+    if (thread == 0) {
+      if (view.r() != 1.)
+        atomicInc(failed, UINT_MAX);
+    }
+    for (auto i = thread; i < size; i += stride) {
+      if (view[i].x() != 0.)
+        atomicInc(failed, UINT_MAX);
+      if (view[i].y() != 0.)
+        atomicInc(failed, UINT_MAX);
+      if (view[i].z() != 0.)
+        atomicInc(failed, UINT_MAX);
+      if (view[i].id() != i)
+        atomicInc(failed, UINT_MAX);
+    }
+  }
+
   void TestAlgo::fill(cudatest::TestDeviceCollection& collection, cudaStream_t stream) const {
     const uint32_t maxThreadsPerBlock = 1024;
 
     uint32_t threadsPerBlock = maxThreadsPerBlock;
     uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock;
 
-    testAlgoKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.view(), collection->metadata().size());
+    testAlgoFillKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.view(), collection->metadata().size());
+  }
+
+  void TestAlgo::check(cudatest::TestDeviceCollection const& collection, cudaStream_t stream) const {
+    const uint32_t maxThreadsPerBlock = 1024;
+
+    uint32_t threadsPerBlock = maxThreadsPerBlock;
+    uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock;
+
+    uint32_t* failed;
+    cudaMallocManaged(&failed, sizeof(uint32_t));
+    *failed = 0;
+    testAlgoCheckKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.const_view(), collection->metadata().size(), failed);
+    cudaStreamSynchronize(stream);
+    std::cerr << *failed << " checks failed" << std::endl;
   }
 
 }  // namespace cudatest
diff --git a/HeterogeneousCore/CUDATest/plugins/TestAlgo.h b/HeterogeneousCore/CUDATest/plugins/TestAlgo.h
@@ -10,6 +10,9 @@ namespace cudatest {
   public:
     void fill(cudatest::TestDeviceCollection& collection, cudaStream_t stream) const;
     void fill(cudatest::TestHostCollection& collection) const;
+
+    void check(cudatest::TestDeviceCollection const& collection, cudaStream_t stream) const;
+    void check(cudatest::TestHostCollection const& collection) const;
   };
 
 }  // namespace cudatest

diff --git a/HeterogeneousCore/CUDATest/plugins/TestPortableAnalyzerCUDA.cc b/HeterogeneousCore/CUDATest/plugins/TestPortableAnalyzerCUDA.cc
@@ -0,0 +1,86 @@
+#include <cassert>
+
+#include "CUDADataFormats/Common/interface/Product.h"
+#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/Framework/interface/stream/EDAnalyzer.h"
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Utilities/interface/EDGetToken.h"
+#include "FWCore/Utilities/interface/InputTag.h"
+#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
+
+#include "TestAlgo.h"
+
+namespace {
+
+  template <typename T>
+  class Column {
+  public:
+    Column(T const* data, size_t size) : data_(data), size_(size) {}
+
+    void print(std::ostream& out) const {
+      std::stringstream buffer;
+      buffer << "{ ";
+      if (size_ > 0) {
+        buffer << data_[0];
+      }
+      if (size_ > 1) {
+        buffer << ", " << data_[1];
+      }
+      if (size_ > 2) {
+        buffer << ", " << data_[2];
+      }
+      if (size_ > 3) {
+        buffer << ", ...";
+      }
+      buffer << '}';
+      out << buffer.str();
+    }
+
+  private:
+    T const* const data_;
+    size_t const size_;
+  };
+
+  template <typename T>
+  std::ostream& operator<<(std::ostream& out, Column<T> const& column) {
+    column.print(out);
+    return out;
+  }
+}  // namespace
+
+class TestPortableAnalyzerCUDA : public edm::stream::EDAnalyzer<> {
+public:
+  TestPortableAnalyzerCUDA(edm::ParameterSet const& config)
+      : source_{config.getParameter<edm::InputTag>("source")}, token_{consumes(source_)} {}
+
+  void analyze(edm::Event const& event, edm::EventSetup const&) override {
+    // create a context reusing the same device and queue as the producer of the input collection
+    auto const& input = event.get(token_);
+    cms::cuda::ScopedContextProduce ctx{input};
+    cudatest::TestDeviceCollection const& product = ctx.get(input);
+
+    algo_.check(product, ctx.stream());
+  }
+
+  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+    edm::ParameterSetDescription desc;
+    desc.add<edm::InputTag>("source");
+    descriptions.addWithDefaultLabel(desc);
+  }
+
+private:
+  const edm::InputTag source_;
+  const edm::EDGetTokenT<cms::cuda::Product<cudatest::TestDeviceCollection>> token_;
+
+  // implementation of the algorithm
+  cudatest::TestAlgo algo_;
+};
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(TestPortableAnalyzerCUDA);