Skip to content

Commit

Permalink
Reimplement in CUDA the PortableTestObjects tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Sep 6, 2022
1 parent 47d2715 commit c1f6600
Show file tree
Hide file tree
Showing 14 changed files with 458 additions and 0 deletions.
7 changes: 7 additions & 0 deletions CUDADataFormats/PortableTestObjects/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<use name="rootcore"/>
<use name="CUDADataFormats/Common"/>
<use name="DataFormats/Common"/>
<use name="DataFormats/PortableTestObjects"/>
<export>
<lib name="1"/>
</export>
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef CUDADataFormats_PortableTestObjects_interface_TestDeviceCollection_h
#define CUDADataFormats_PortableTestObjects_interface_TestDeviceCollection_h

#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"

namespace cudatest {

// SoA with x, y, z, id fields in device global memory
using TestDeviceCollection = cms::cuda::PortableDeviceCollection<portabletest::TestSoA>;

} // namespace cudatest

#endif // CUDADataFormats_PortableTestObjects_interface_TestDeviceCollection_h
14 changes: 14 additions & 0 deletions CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef CUDADataFormats_PortableTestObjects_interface_TestHostCollection_h
#define CUDADataFormats_PortableTestObjects_interface_TestHostCollection_h

#include "CUDADataFormats/Common/interface/PortableHostCollection.h"
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"

namespace cudatest {

// SoA with x, y, z, id fields in host memory
using TestHostCollection = cms::cuda::PortableHostCollection<portabletest::TestSoA>;

} // namespace cudatest

#endif // CUDADataFormats_PortableTestObjects_interface_TestHostCollection_h
5 changes: 5 additions & 0 deletions CUDADataFormats/PortableTestObjects/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestDeviceCollection.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"
#include "DataFormats/Common/interface/Wrapper.h"
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
21 changes: 21 additions & 0 deletions CUDADataFormats/PortableTestObjects/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<lcgdict>
<class name="cudatest::TestHostCollection"/>
<read
sourceClass="cudatest::TestHostCollection"
targetClass="cudatest::TestHostCollection"
version="[1-]"
source="portabletest::TestSoA layout_;"
target="buffer_"
embed="false">
<![CDATA[
cudatest::TestHostCollection::ROOTReadStreamer(newObj, onfile.layout_);
]]>
</read>
<class name="edm::Wrapper<cudatest::TestHostCollection>" splitLevel="0"/>

<class name="cudatest::TestDeviceCollection" persistent="false"/>
<class name="edm::Wrapper<cudatest::TestDeviceCollection>" persistent="false"/>

<class name="cms::cuda::Product<cudatest::TestDeviceCollection>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<cudatest::TestDeviceCollection>>" persistent="false"/>
</lcgdict>
19 changes: 19 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestAlgo.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"

#include "TestAlgo.h"

namespace {

void testAlgoKernel(cudatest::TestHostCollection::View view, int32_t size) {
view.r() = 1.;

for (auto i = 0; i < size; ++i) {
view[i] = {0., 0., 0., i};
}
}

} // namespace

void TestAlgo::fill(cudatest::TestHostCollection& collection) const {
testAlgoKernel(collection.view(), collection->metadata().size());
}
30 changes: 30 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestAlgo.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <cuda_runtime.h>

#include "CUDADataFormats/PortableTestObjects/interface/TestDeviceCollection.h"

#include "TestAlgo.h"

namespace {

__global__ void testAlgoKernel(cudatest::TestDeviceCollection::View view, int32_t size) {
const int32_t thread = blockIdx.x * blockDim.x + threadIdx.x;
const int32_t stride = blockDim.x * gridDim.x;

if (thread == 0) {
view.r() = 1.;
}
for (auto i = thread; i < size; i += stride) {
view[i] = {0., 0., 0., i};
}
}

} // namespace

void TestAlgo::fill(cudatest::TestDeviceCollection& collection, cudaStream_t stream) const {
const uint32_t maxThreadsPerBlock = 1024;

uint32_t threadsPerBlock = maxThreadsPerBlock;
uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock;

testAlgoKernel<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(collection.view(), collection->metadata().size());
}
13 changes: 13 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestAlgo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef HeterogeneousCore_CUDATest_plugins_TestAlgo_h
#define HeterogeneousCore_CUDATest_plugins_TestAlgo_h

#include "CUDADataFormats/PortableTestObjects/interface/TestDeviceCollection.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"

class TestAlgo {
public:
void fill(cudatest::TestDeviceCollection& collection, cudaStream_t stream) const;
void fill(cudatest::TestHostCollection& collection) const;
};

#endif // HeterogeneousCore_CUDATest_plugins_TestAlgo_h
62 changes: 62 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestPortableAnalyzer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include <cassert>

#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/EventSetup.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/stream/EDAnalyzer.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"

class TestPortableAnalyzer : public edm::stream::EDAnalyzer<> {
public:
TestPortableAnalyzer(edm::ParameterSet const& config)
: source_{config.getParameter<edm::InputTag>("source")}, token_{consumes(source_)} {}

void analyze(edm::Event const& event, edm::EventSetup const&) override {
cudatest::TestHostCollection const& product = event.get(token_);

auto const& view = product.const_view();
for (int32_t i = 0; i < view.metadata().size(); ++i) {
assert(view[i].id() == i);
}

edm::LogInfo msg("TestPortableAnalyzer");
msg << source_.encode() << ".size() = " << view.metadata().size() << '\n';
msg << " data = " << product.buffer().get() << ",\n"
<< " x = " << view.metadata().addressOf_x() << ",\n"
<< " y = " << view.metadata().addressOf_y() << ",\n"
<< " z = " << view.metadata().addressOf_z() << ",\n"
<< " id = " << view.metadata().addressOf_id() << ",\n"
<< " r = " << view.metadata().addressOf_r() << '\n';
msg << std::hex << " [y - x] = 0x"
<< reinterpret_cast<intptr_t>(view.metadata().addressOf_y()) -
reinterpret_cast<intptr_t>(view.metadata().addressOf_x())
<< " [z - y] = 0x"
<< reinterpret_cast<intptr_t>(view.metadata().addressOf_z()) -
reinterpret_cast<intptr_t>(view.metadata().addressOf_y())
<< " [id - z] = 0x"
<< reinterpret_cast<intptr_t>(view.metadata().addressOf_id()) -
reinterpret_cast<intptr_t>(view.metadata().addressOf_z())
<< " [r - id] = 0x"
<< reinterpret_cast<intptr_t>(view.metadata().addressOf_r()) -
reinterpret_cast<intptr_t>(view.metadata().addressOf_id());
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("source");
descriptions.addWithDefaultLabel(desc);
}

private:
const edm::InputTag source_;
const edm::EDGetTokenT<cudatest::TestHostCollection> token_;
};

#include "FWCore/Framework/interface/MakerMacros.h"
DEFINE_FWK_MODULE(TestPortableAnalyzer);
45 changes: 45 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestPortableProducerCPU.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/EventSetup.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "FWCore/Utilities/interface/StreamID.h"

#include "TestAlgo.h"

class TestPortableProducerCPU : public edm::stream::EDProducer<> {
public:
TestPortableProducerCPU(edm::ParameterSet const& config)
: hostToken_{produces()}, size_{config.getParameter<int32_t>("size")} {}

void produce(edm::Event& event, edm::EventSetup const&) override {
// run the algorithm
cudatest::TestHostCollection hostProduct{size_};
algo_.fill(hostProduct);

// put the product into the event
event.emplace(hostToken_, std::move(hostProduct));
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<int32_t>("size");
descriptions.addWithDefaultLabel(desc);
}

private:
const edm::EDPutTokenT<cudatest::TestHostCollection> hostToken_;
const int32_t size_;

// implementation of the algorithm
TestAlgo algo_;
};

#include "FWCore/Framework/interface/MakerMacros.h"
DEFINE_FWK_MODULE(TestPortableProducerCPU);
58 changes: 58 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestPortableProducerCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestDeviceCollection.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/EventSetup.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "FWCore/Utilities/interface/StreamID.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"

#include "TestAlgo.h"

class TestPortableProducerCUDA : public edm::stream::EDProducer<> {
public:
TestPortableProducerCUDA(edm::ParameterSet const& config)
: deviceToken_{produces()}, size_{config.getParameter<int32_t>("size")} {}

void beginStream(edm::StreamID) override {
edm::Service<CUDAService> service;
if (not service->enabled()) {
throw cms::Exception("Configuration") << "CUDAService is disabled.";
}
}

void produce(edm::Event& event, edm::EventSetup const&) override {
// create a context based on the EDM stream number
cms::cuda::ScopedContextProduce ctx(event.streamID());

// run the algorithm, potentially asynchronously
cudatest::TestDeviceCollection deviceProduct{size_, ctx.stream()};
algo_.fill(deviceProduct, ctx.stream());

// put the asynchronous product into the event without waiting
ctx.emplace(event, deviceToken_, std::move(deviceProduct));
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<int32_t>("size");
descriptions.addWithDefaultLabel(desc);
}

private:
const edm::EDPutTokenT<cms::cuda::Product<cudatest::TestDeviceCollection>> deviceToken_;
const int32_t size_;

// implementation of the algorithm
TestAlgo algo_;
};

#include "FWCore/Framework/interface/MakerMacros.h"
DEFINE_FWK_MODULE(TestPortableProducerCUDA);
67 changes: 67 additions & 0 deletions HeterogeneousCore/CUDATest/plugins/TestPortableTranscriber.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestDeviceCollection.h"
#include "CUDADataFormats/PortableTestObjects/interface/TestHostCollection.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/EventSetup.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "FWCore/Utilities/interface/StreamID.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"

class TestPortableTranscriber : public edm::stream::EDProducer<edm::ExternalWork> {
public:
TestPortableTranscriber(edm::ParameterSet const& config)
: deviceToken_{consumes(config.getParameter<edm::InputTag>("source"))}, hostToken_{produces()} {}

void beginStream(edm::StreamID) override {
edm::Service<CUDAService> service;
if (not service->enabled()) {
throw cms::Exception("Configuration") << "CUDAService is disabled.";
}
}

void acquire(edm::Event const& event, edm::EventSetup const& setup, edm::WaitingTaskWithArenaHolder task) override {
// create a context reusing the same device and queue as the producer of the input collection
auto const& input = event.get(deviceToken_);
cms::cuda::ScopedContextAcquire ctx{input, std::move(task)};

cudatest::TestDeviceCollection const& deviceProduct = ctx.get(input);

// allocate a host product based on the metadata of the device product
hostProduct_ = cudatest::TestHostCollection{deviceProduct->metadata().size(), ctx.stream()};

// copy the content of the device product to the host product
cms::cuda::copyAsync(hostProduct_.buffer(), deviceProduct.const_buffer(), deviceProduct.bufferSize(), ctx.stream());

// do not wait for the asynchronous operation to complete
}

void produce(edm::Event& event, edm::EventSetup const&) override {
// produce() is called once the asynchronous operation has completed, so there is no need for an explicit wait
event.emplace(hostToken_, std::move(hostProduct_));
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("source");
descriptions.addWithDefaultLabel(desc);
}

private:
const edm::EDGetTokenT<cms::cuda::Product<cudatest::TestDeviceCollection>> deviceToken_;
const edm::EDPutTokenT<cudatest::TestHostCollection> hostToken_;

// hold the output product between acquire() and produce()
cudatest::TestHostCollection hostProduct_;
};

#include "FWCore/Framework/interface/MakerMacros.h"
DEFINE_FWK_MODULE(TestPortableTranscriber);
27 changes: 27 additions & 0 deletions HeterogeneousCore/CUDATest/python/reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import FWCore.ParameterSet.Config as cms

process = cms.Process('Reader')

# read the products from a 'test.root' file
process.source = cms.Source('PoolSource',
fileNames = cms.untracked.vstring('file:test.root')
)

# enable logging for the TestPortableAnalyzer
process.MessageLogger.TestPortableAnalyzer = cms.untracked.PSet()

# analyse the first product
process.testAnalyzer = cms.EDAnalyzer('TestPortableAnalyzer',
source = cms.InputTag('testProducer')
)

# analyse the second product
process.testAnalyzerSerial = cms.EDAnalyzer('TestPortableAnalyzer',
source = cms.InputTag('testProducerSerial')
)

process.cuda_path = cms.Path(process.testAnalyzer)

process.serial_path = cms.Path(process.testAnalyzerSerial)

process.maxEvents.input = 10

0 comments on commit c1f6600

Please sign in to comment.