Skip to content

Commit

Permalink
Produce a copy of the beamspot data on the GPU
Browse files Browse the repository at this point in the history
Implement the beamspot host-to-device transfer in its own EDProducer,
making use of beginStream()-allocated write-combined memory for the transfer.
  • Loading branch information
makortel authored and fwyzard committed Aug 13, 2020
1 parent 72bdd0a commit 54c3781
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 8 deletions.
8 changes: 8 additions & 0 deletions CUDADataFormats/BeamSpot/BuildFile.xml
@@ -0,0 +1,8 @@
<use name="rootcore"/>
<use name="CUDADataFormats/Common"/>
<use name="DataFormats/Common"/>
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
32 changes: 32 additions & 0 deletions CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
@@ -0,0 +1,32 @@
#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

#include <cuda_runtime.h>

class BeamSpotCUDA {
public:
// alignas(128) doesn't really make sense as there is only one
// beamspot per event?
struct Data {
float x, y, z; // position
// TODO: add covariance matrix

float sigmaZ;
float beamWidthX, beamWidthY;
float dxdz, dydz;
float emittanceX, emittanceY;
float betaStar;
};

BeamSpotCUDA() = default;
BeamSpotCUDA(Data const* data_h, cudaStream_t stream);

Data const* data() const { return data_d_.get(); }

private:
cms::cuda::device::unique_ptr<Data> data_d_;
};

#endif
9 changes: 9 additions & 0 deletions CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
@@ -0,0 +1,9 @@
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cudaStream_t stream) {
data_d_ = cms::cuda::make_device_unique<Data>(stream);
cudaCheck(cudaMemcpyAsync(data_d_.get(), data_h, sizeof(Data), cudaMemcpyHostToDevice, stream));
}
8 changes: 8 additions & 0 deletions CUDADataFormats/BeamSpot/src/classes.h
@@ -0,0 +1,8 @@
#ifndef CUDADataFormats_BeamSpot_classes_h
#define CUDADataFormats_BeamSpot_classes_h

#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"

#endif
4 changes: 4 additions & 0 deletions CUDADataFormats/BeamSpot/src/classes_def.xml
@@ -0,0 +1,4 @@
<lcgdict>
<class name="cms::cuda::Product<BeamSpotCUDA>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<BeamSpotCUDA>>" persistent="false"/>
</lcgdict>
83 changes: 83 additions & 0 deletions RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc
@@ -0,0 +1,83 @@
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "DataFormats/BeamSpot/interface/BeamSpot.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Framework/interface/global/EDProducer.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h"

#include <cuda_runtime.h>

namespace {
class BSHost {
public:
BSHost() : bs{cms::cuda::make_host_noncached_unique<BeamSpotCUDA::Data>(cudaHostAllocWriteCombined)} {}
BeamSpotCUDA::Data* get() { return bs.get(); }

private:
cms::cuda::host::noncached::unique_ptr<BeamSpotCUDA::Data> bs;
};
} // namespace

class BeamSpotToCUDA : public edm::global::EDProducer<edm::StreamCache<BSHost>> {
public:
explicit BeamSpotToCUDA(const edm::ParameterSet& iConfig);
~BeamSpotToCUDA() override = default;

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);

std::unique_ptr<BSHost> beginStream(edm::StreamID) const override {
edm::Service<CUDAService> cs;
if (cs->enabled()) {
return std::make_unique<BSHost>();
} else {
return nullptr;
}
}
void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override;

private:
edm::EDGetTokenT<reco::BeamSpot> bsGetToken_;
edm::EDPutTokenT<cms::cuda::Product<BeamSpotCUDA>> bsPutToken_;
};

BeamSpotToCUDA::BeamSpotToCUDA(const edm::ParameterSet& iConfig)
: bsGetToken_{consumes<reco::BeamSpot>(iConfig.getParameter<edm::InputTag>("src"))},
bsPutToken_{produces<cms::cuda::Product<BeamSpotCUDA>>()} {}

void BeamSpotToCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("src", edm::InputTag("offlineBeamSpot"));
descriptions.add("offlineBeamSpotCUDA", desc);
}

void BeamSpotToCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const {
cms::cuda::ScopedContextProduce ctx{streamID};

const reco::BeamSpot& bs = iEvent.get(bsGetToken_);

BeamSpotCUDA::Data* bsHost = streamCache(streamID)->get();

bsHost->x = bs.x0();
bsHost->y = bs.y0();
bsHost->z = bs.z0();

bsHost->sigmaZ = bs.sigmaZ();
bsHost->beamWidthX = bs.BeamWidthX();
bsHost->beamWidthY = bs.BeamWidthY();
bsHost->dxdz = bs.dxdz();
bsHost->dydz = bs.dydz();
bsHost->emittanceX = bs.emittanceX();
bsHost->emittanceY = bs.emittanceY();
bsHost->betaStar = bs.betaStar();

ctx.emplace(iEvent, bsPutToken_, bsHost, ctx.stream());
}

DEFINE_FWK_MODULE(BeamSpotToCUDA);
21 changes: 13 additions & 8 deletions RecoVertex/BeamSpotProducer/plugins/BuildFile.xml
@@ -1,13 +1,12 @@
<use name="root"/>
<use name="rootminuit"/>
<use name="CondCore/DBOutputService"/>
<use name="CondFormats/BeamSpotObjects"/>
<use name="CondFormats/DataRecord"/>
<use name="FWCore/Framework"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/Utilities"/>
<use name="FWCore/ServiceRegistry"/>
<use name="CondFormats/BeamSpotObjects"/>
<use name="CondFormats/DataRecord"/>
<use name="CondCore/DBOutputService"/>

<use name="root"/>
<use name="rootminuit"/>
<use name="FWCore/Utilities"/>
<use name="RecoVertex/BeamSpotProducer"/>

<library file="BeamSpotProducer.cc" name="BeamSpotProducer">
Expand Down Expand Up @@ -40,4 +39,10 @@
<library file="OfflineToTransientBeamSpotESProducer.cc" name="OfflineToTransientBeamSpotESProducer">
<flags EDM_PLUGIN="1"/>
</library>

<library file="BeamSpotToCUDA.cc" name="BeamSpotToCUDA">
<use name="cuda"/>
<use name="CUDADataFormats/BeamSpot"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="HeterogeneousCore/CUDAServices"/>
<flags EDM_PLUGIN="1"/>
</library>
7 changes: 7 additions & 0 deletions RecoVertex/BeamSpotProducer/python/BeamSpot_cff.py
@@ -1,4 +1,11 @@
import FWCore.ParameterSet.Config as cms

from RecoVertex.BeamSpotProducer.BeamSpot_cfi import *
from RecoVertex.BeamSpotProducer.offlineBeamSpotCUDA_cfi import offlineBeamSpotCUDA

offlineBeamSpotTask = cms.Task(offlineBeamSpot)

from Configuration.ProcessModifiers.gpu_cff import gpu
_offlineBeamSpotTask_gpu = offlineBeamSpotTask.copy()
_offlineBeamSpotTask_gpu.add(offlineBeamSpotCUDA)
gpu.toReplaceWith(offlineBeamSpotTask, _offlineBeamSpotTask_gpu)

0 comments on commit 54c3781

Please sign in to comment.