Skip to content

Commit

Permalink
add files containing ESProduct for development. TEST* will be used as…
Browse files Browse the repository at this point in the history
… backup.
  • Loading branch information
bfontana authored and bfonta committed Sep 15, 2020
1 parent 3ce4851 commit f428e94
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 0 deletions.
54 changes: 54 additions & 0 deletions UserCode/CodeGPU/plugins/HeterogeneousESProduct.cc
@@ -0,0 +1,54 @@
#include "UserCode/CodeGPU/plugins/TEST_HeterogeneousESProduct.h"

HeterogeneousGeometryESProductWrapper::HeterogeneousGeometryESProductWrapper(HeterogeneousGeometryESProduct const& cpuGeometry) {
cudaCheck(cudaMallocHost(&payload_array_, sizeof(float)*nelements_));
for(unsigned int i=0; i<nelements_; ++i)
payload_array_[i] = i;
payload_var_ = 1;
}

HeterogeneousGeometryESProduct const *HeterogeneousGeometryESProductWrapper::getHeterogeneousGeometryESProductAsync(cudaStream_t stream) const {
// cms::cuda::ESProduct<T> essentially holds an array of GPUData objects,
// one per device. If the data have already been transferred to the
// current device (or the transfer has been queued), the helper just
// returns a reference to that GPUData object. Otherwise, i.e. data are
// not yet on the current device, the helper calls the lambda to do the
// necessary memory allocations and to queue the transfers.
auto const& data = gpuData_.dataForCurrentDeviceAsync(stream,
[this](GPUData& data, cudaStream_t stream)
{
// Allocate memory. Currently this can be with the CUDA API,
// sometime we'll migrate to the caching allocator. Assumption is
// that IOV changes are rare enough that adding global synchronization
// points is not that bad (for now).

// Allocate the payload object on pinned host memory.
cudaCheck(cudaMallocHost(&data.host, sizeof(HeterogeneousGeometryESProduct)));
// Allocate the payload array(s) on device memory.
cudaCheck(cudaMalloc(&data.host->payload_array, sizeof(float)*nelements_));

// Allocate the payload object on the device memory.
cudaCheck(cudaMalloc(&data.device, sizeof(HeterogeneousGeometryESProduct)));

// Complete the host-side information on the payload
data.host->payload_var = this->payload_var_;

// Transfer the payload, first the array(s) ...
cudaCheck(cudaMemcpyAsync(data.host->payload_array, this->payload_array_, sizeof(float)*nelements_, cudaMemcpyHostToDevice, stream));
// ... and then the payload object
cudaCheck(cudaMemcpyAsync(data.device, data.host, sizeof(HeterogeneousGeometryESProduct), cudaMemcpyHostToDevice, stream));
}); //gpuData_.dataForCurrentDeviceAsync

// Returns the payload object on the memory of the current device
return data.device;
}

// Destructor frees all member pointers
HeterogeneousGeometryESProductWrapper::GPUData::~GPUData() {
if(host != nullptr)
{
cudaCheck(cudaFree(host->payload_array));
cudaCheck(cudaFreeHost(host));
}
cudaCheck(cudaFree(device));
}
45 changes: 45 additions & 0 deletions UserCode/CodeGPU/plugins/HeterogeneousESProduct.h
@@ -0,0 +1,45 @@
#include "HeterogeneousCore/CUDACore/interface/ESProduct.h"

// Declare the struct for the payload to be transferred. Here the
// example is an array with (potentially) dynamic size. Note that all of
// below becomes simpler if the array has compile-time size.
struct HeterogeneousGeometryESProduct {
float *payload_array;
unsigned int payload_var;
};

// Declare the wrapper ESProduct. The corresponding ESProducer should
// produce objects of this type.
class HeterogeneousGeometryESProductWrapper {
public:
// Constructor takes the standard CPU ESProduct, and transforms the
// necessary data to array(s) in pinned host memory
HeterogeneousGeometryESProductWrapper(HeterogeneousGeometryESProduct const&);

// Deallocates all pinned host memory
~HeterogeneousGeometryESProductWrapper();

// Function to return the actual payload on the memory of the current device
HeterogeneousGeometryESProduct const *getHeterogeneousGeometryESProductAsync(cudaStream_t stream) const;

private:
// Holds the data in pinned CPU memory
float *payload_array_;
unsigned int payload_var_;
const unsigned int nelements_ = 10;

// Helper struct to hold all information that has to be allocated and
// deallocated per device
struct GPUData {
// Destructor should free all member pointers
~GPUData();
// internal pointers are on device, struct itself is on CPU
HeterogeneousGeometryESProduct *host = nullptr;
// internal pounters and struct are on device
HeterogeneousGeometryESProduct *device = nullptr;
};

// Helper that takes care of complexity of transferring the data to
// multiple devices
cms::cuda::ESProduct<GPUData> gpuData_;
};

0 comments on commit f428e94

Please sign in to comment.