add files containing ESProduct for development. TEST* will be used as…

… backup.
cms-sw · Sep 15, 2020 · f428e94 · f428e94
1 parent 3ce4851
commit f428e94
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 0 deletions.
diff --git a/UserCode/CodeGPU/plugins/HeterogeneousESProduct.cc b/UserCode/CodeGPU/plugins/HeterogeneousESProduct.cc
@@ -0,0 +1,54 @@
+#include "UserCode/CodeGPU/plugins/TEST_HeterogeneousESProduct.h"
+
+HeterogeneousGeometryESProductWrapper::HeterogeneousGeometryESProductWrapper(HeterogeneousGeometryESProduct const& cpuGeometry) {
+  cudaCheck(cudaMallocHost(&payload_array_, sizeof(float)*nelements_));
+  for(unsigned int i=0; i<nelements_; ++i)
+    payload_array_[i] = i;
+  payload_var_ = 1;
+}
+
+HeterogeneousGeometryESProduct const *HeterogeneousGeometryESProductWrapper::getHeterogeneousGeometryESProductAsync(cudaStream_t stream) const {
+  // cms::cuda::ESProduct<T> essentially holds an array of GPUData objects,
+  // one per device. If the data have already been transferred to the
+  // current device (or the transfer has been queued), the helper just
+  // returns a reference to that GPUData object. Otherwise, i.e. data are
+  // not yet on the current device, the helper calls the lambda to do the
+  // necessary memory allocations and to queue the transfers.
+  auto const& data = gpuData_.dataForCurrentDeviceAsync(stream,
+	  [this](GPUData& data, cudaStream_t stream)
+	  {
+	    // Allocate memory. Currently this can be with the CUDA API,
+	    // sometime we'll migrate to the caching allocator. Assumption is
+	    // that IOV changes are rare enough that adding global synchronization
+	    // points is not that bad (for now).
+
+	    // Allocate the payload object on pinned host memory.
+	    cudaCheck(cudaMallocHost(&data.host, sizeof(HeterogeneousGeometryESProduct)));
+	    // Allocate the payload array(s) on device memory.
+	    cudaCheck(cudaMalloc(&data.host->payload_array, sizeof(float)*nelements_));
+
+	    // Allocate the payload object on the device memory.
+	    cudaCheck(cudaMalloc(&data.device, sizeof(HeterogeneousGeometryESProduct)));
+
+	    // Complete the host-side information on the payload
+	    data.host->payload_var = this->payload_var_;
+
+	    // Transfer the payload, first the array(s) ...
+	    cudaCheck(cudaMemcpyAsync(data.host->payload_array, this->payload_array_, sizeof(float)*nelements_, cudaMemcpyHostToDevice, stream));
+	    // ... and then the payload object
+	    cudaCheck(cudaMemcpyAsync(data.device, data.host, sizeof(HeterogeneousGeometryESProduct), cudaMemcpyHostToDevice, stream));
+	  }); //gpuData_.dataForCurrentDeviceAsync
+
+  // Returns the payload object on the memory of the current device
+  return data.device;
+}
+
+// Destructor frees all member pointers
+HeterogeneousGeometryESProductWrapper::GPUData::~GPUData() {
+  if(host != nullptr) 
+    {
+      cudaCheck(cudaFree(host->payload_array));
+      cudaCheck(cudaFreeHost(host));
+    }
+  cudaCheck(cudaFree(device));
+}
diff --git a/UserCode/CodeGPU/plugins/HeterogeneousESProduct.h b/UserCode/CodeGPU/plugins/HeterogeneousESProduct.h
@@ -0,0 +1,45 @@
+#include "HeterogeneousCore/CUDACore/interface/ESProduct.h"
+
+// Declare the struct for the payload to be transferred. Here the
+// example is an array with (potentially) dynamic size. Note that all of
+// below becomes simpler if the array has compile-time size.
+struct HeterogeneousGeometryESProduct {
+  float *payload_array;
+  unsigned int payload_var;
+};
+
+// Declare the wrapper ESProduct. The corresponding ESProducer should
+// produce objects of this type.
+class HeterogeneousGeometryESProductWrapper {
+ public:
+  // Constructor takes the standard CPU ESProduct, and transforms the
+  // necessary data to array(s) in pinned host memory
+  HeterogeneousGeometryESProductWrapper(HeterogeneousGeometryESProduct const&);
+
+  // Deallocates all pinned host memory
+  ~HeterogeneousGeometryESProductWrapper();
+
+  // Function to return the actual payload on the memory of the current device
+  HeterogeneousGeometryESProduct const *getHeterogeneousGeometryESProductAsync(cudaStream_t stream) const;
+
+ private:
+  // Holds the data in pinned CPU memory
+  float *payload_array_;
+  unsigned int payload_var_;
+  const unsigned int nelements_ = 10;
+
+  // Helper struct to hold all information that has to be allocated and
+  // deallocated per device
+  struct GPUData {
+    // Destructor should free all member pointers
+    ~GPUData();
+    // internal pointers are on device, struct itself is on CPU
+    HeterogeneousGeometryESProduct *host = nullptr;
+    // internal pounters and struct are on device
+    HeterogeneousGeometryESProduct *device = nullptr;
+  };
+
+  // Helper that takes care of complexity of transferring the data to
+  // multiple devices
+  cms::cuda::ESProduct<GPUData> gpuData_;
+};