diff --git a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml b/HeterogeneousCore/CUDAServices/bin/BuildFile.xml
index 5b5b205f6d3a8..ae3d37a5f6b4f 100644
--- a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml
+++ b/HeterogeneousCore/CUDAServices/bin/BuildFile.xml
@@ -1,10 +1,9 @@
-
+
-
+
-
diff --git a/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp b/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp
index 5a65575873116..6630cfeeb183e 100644
--- a/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp
+++ b/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp
@@ -1,4 +1,5 @@
-// C++ standard headers
+// C/C++ standard headers
+#include
#include
#include
@@ -6,18 +7,26 @@
#include
// CMSSW headers
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "isCudaDeviceSupported.h"
int main() {
int devices = 0;
- cudaCheck(cudaGetDeviceCount(&devices));
+ cudaError_t status = cudaGetDeviceCount(&devices);
+ if (status != cudaSuccess) {
+ std::cerr << "cudaComputeCapabilities: " << cudaGetErrorString(status) << std::endl;
+ return EXIT_FAILURE;
+ }
for (int i = 0; i < devices; ++i) {
cudaDeviceProp properties;
cudaGetDeviceProperties(&properties, i);
std::cout << std::setw(4) << i << " " << std::setw(2) << properties.major << "." << properties.minor << " "
- << properties.name << std::endl;
+ << properties.name;
+ if (not isCudaDeviceSupported(i)) {
+ std::cout << " (unsupported)";
+ }
+ std::cout << std::endl;
}
- return 0;
+ return EXIT_SUCCESS;
}
diff --git a/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp b/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp
index eb21f22cd0c5c..b8847568f44e2 100644
--- a/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp
+++ b/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp
@@ -1,10 +1,13 @@
-#include
-#include
+// C/C++ headers
#include
-#include
+// CUDA headers
#include
+// local headers
+#include "isCudaDeviceSupported.h"
+
+// returns EXIT_SUCCESS if at least one visible CUDA device can be used, or EXIT_FAILURE otherwise
int main() {
int devices = 0;
auto status = cudaGetDeviceCount(&devices);
@@ -12,20 +15,12 @@ int main() {
return EXIT_FAILURE;
}
- int minimumMajor = 6; // min minor is implicitly 0
-
- // This approach (requiring all devices are supported) is rather
- // conservative. In principle we could consider just dropping the
- // unsupported devices. Currently that would be easiest to achieve
- // in CUDAService though.
+ // check that at least one visible CUDA device can be used
for (int i = 0; i < devices; ++i) {
- cudaDeviceProp properties;
- cudaGetDeviceProperties(&properties, i);
-
- if ((not(properties.major == 3 and properties.minor == 5)) and properties.major < minimumMajor) {
- return EXIT_FAILURE;
- }
+ if (isCudaDeviceSupported(i))
+ return EXIT_SUCCESS;
}
- return EXIT_SUCCESS;
+ // no visible usable devices
+ return EXIT_FAILURE;
}
diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu
new file mode 100644
index 0000000000000..a6b64ef6783a9
--- /dev/null
+++ b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu
@@ -0,0 +1,55 @@
+#include
+
+#include "isCudaDeviceSupported.h"
+
+__global__ static void setSupported(bool* result) { *result = true; }
+
+bool isCudaDeviceSupported(int device) {
+ bool supported = false;
+ bool* supported_d;
+
+ // select the requested device - will fail if the index is invalid
+ cudaError_t status = cudaSetDevice(device);
+ if (status != cudaSuccess)
+ return false;
+
+ // allocate memory for the flag on the device
+ status = cudaMalloc(&supported_d, sizeof(bool));
+ if (status != cudaSuccess)
+ return false;
+
+ // initialise the flag on the device
+ status = cudaMemset(supported_d, 0x00, sizeof(bool));
+ if (status != cudaSuccess)
+ return false;
+
+ // try to set the flag on the device
+ setSupported<<<1, 1>>>(supported_d);
+
+ // check for an eventual error from launching the kernel on an unsupported device
+ status = cudaGetLastError();
+ if (status != cudaSuccess)
+ return false;
+
+ // wait for the kernelto run
+ status = cudaDeviceSynchronize();
+ if (status != cudaSuccess)
+ return false;
+
+ // copy the flag back to the host
+ status = cudaMemcpy(&supported, supported_d, sizeof(bool), cudaMemcpyDeviceToHost);
+ if (status != cudaSuccess)
+ return false;
+
+ // free the device memory
+ status = cudaFree(supported_d);
+ if (status != cudaSuccess)
+ return false;
+
+ // reset the device
+ status = cudaDeviceReset();
+ if (status != cudaSuccess)
+ return false;
+
+ return supported;
+}
diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h
new file mode 100644
index 0000000000000..b010f51fb72a6
--- /dev/null
+++ b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h
@@ -0,0 +1,6 @@
+#ifndef HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h
+#define HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h
+
+bool isCudaDeviceSupported(int device);
+
+#endif // HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h