Skip to content

Commit

Permalink
[WIP] Add CUDA-side tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Nov 2, 2022
1 parent 50cbaad commit 3000469
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 2 deletions.
39 changes: 37 additions & 2 deletions HeterogeneousCore/CUDATest/plugins/TestAlgo.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace cudatest {

static __global__ void testAlgoKernel(cudatest::TestDeviceCollection::View view, int32_t size) {
static __global__ void testAlgoFillKernel(cudatest::TestDeviceCollection::View view, int32_t const size) {
const int32_t thread = blockIdx.x * blockDim.x + threadIdx.x;
const int32_t stride = blockDim.x * gridDim.x;
const cudatest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
Expand All @@ -19,13 +19,48 @@ namespace cudatest {
}
}

static __global__ void testAlgoCheckKernel(cudatest::TestDeviceCollection::ConstView view, int32_t const size, uint32_t* failed) {
const int32_t thread = blockIdx.x * blockDim.x + threadIdx.x;
const int32_t stride = blockDim.x * gridDim.x;
const cudatest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};

if (thread == 0) {
if (view.r() != 1.)
atomicInc(failed, UINT_MAX);
}
for (auto i = thread; i < size; i += stride) {
if (view[i].x() != 0.)
atomicInc(failed, UINT_MAX);
if (view[i].y() != 0.)
atomicInc(failed, UINT_MAX);
if (view[i].z() != 0.)
atomicInc(failed, UINT_MAX);
if (view[i].id() != i)
atomicInc(failed, UINT_MAX);
}
}

void TestAlgo::fill(cudatest::TestDeviceCollection& collection, cudaStream_t stream) const {
const uint32_t maxThreadsPerBlock = 1024;

uint32_t threadsPerBlock = maxThreadsPerBlock;
uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock;

testAlgoKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.view(), collection->metadata().size());
testAlgoFillKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.view(), collection->metadata().size());
}

void TestAlgo::check(cudatest::TestDeviceCollection const& collection, cudaStream_t stream) const {
const uint32_t maxThreadsPerBlock = 1024;

uint32_t threadsPerBlock = maxThreadsPerBlock;
uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock;

uint32_t* failed;
cudaMallocManaged(&failed, sizeof(uint32_t));
*failed = 0;
testAlgoCheckKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.const_view(), collection->metadata().size(), failed);
cudaStreamSynchronize(stream);
std::cerr << *failed << " checks failed" << std::endl;
}

} // namespace cudatest
3 changes: 3 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestAlgo.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ namespace cudatest {
public:
void fill(cudatest::TestDeviceCollection& collection, cudaStream_t stream) const;
void fill(cudatest::TestHostCollection& collection) const;

void check(cudatest::TestDeviceCollection const& collection, cudaStream_t stream) const;
void check(cudatest::TestHostCollection const& collection) const;
};

} // namespace cudatest
Expand Down
86 changes: 86 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestPortableAnalyzerCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include <cassert>

#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/EventSetup.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/stream/EDAnalyzer.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"

#include "TestAlgo.h"

namespace {

template <typename T>
class Column {
public:
Column(T const* data, size_t size) : data_(data), size_(size) {}

void print(std::ostream& out) const {
std::stringstream buffer;
buffer << "{ ";
if (size_ > 0) {
buffer << data_[0];
}
if (size_ > 1) {
buffer << ", " << data_[1];
}
if (size_ > 2) {
buffer << ", " << data_[2];
}
if (size_ > 3) {
buffer << ", ...";
}
buffer << '}';
out << buffer.str();
}

private:
T const* const data_;
size_t const size_;
};

template <typename T>
std::ostream& operator<<(std::ostream& out, Column<T> const& column) {
column.print(out);
return out;
}
} // namespace

class TestPortableAnalyzerCUDA : public edm::stream::EDAnalyzer<> {
public:
TestPortableAnalyzerCUDA(edm::ParameterSet const& config)
: source_{config.getParameter<edm::InputTag>("source")}, token_{consumes(source_)} {}

void analyze(edm::Event const& event, edm::EventSetup const&) override {
// create a context reusing the same device and queue as the producer of the input collection
auto const& input = event.get(token_);
cms::cuda::ScopedContextProduce ctx{input};
cudatest::TestDeviceCollection const& product = ctx.get(input);

algo_.check(product, ctx.stream());
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("source");
descriptions.addWithDefaultLabel(desc);
}

private:
const edm::InputTag source_;
const edm::EDGetTokenT<cms::cuda::Product<cudatest::TestDeviceCollection>> token_;

// implementation of the algorithm
cudatest::TestAlgo algo_;
};

#include "FWCore/Framework/interface/MakerMacros.h"
DEFINE_FWK_MODULE(TestPortableAnalyzerCUDA);

0 comments on commit 3000469

Please sign in to comment.