diff --git a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml b/HeterogeneousCore/CUDAServices/bin/BuildFile.xml index 5b5b205f6d3a8..ae3d37a5f6b4f 100644 --- a/HeterogeneousCore/CUDAServices/bin/BuildFile.xml +++ b/HeterogeneousCore/CUDAServices/bin/BuildFile.xml @@ -1,10 +1,9 @@ - + - + - diff --git a/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp b/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp index 5a65575873116..6630cfeeb183e 100644 --- a/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp +++ b/HeterogeneousCore/CUDAServices/bin/cudaComputeCapabilities.cpp @@ -1,4 +1,5 @@ -// C++ standard headers +// C/C++ standard headers +#include #include #include @@ -6,18 +7,26 @@ #include // CMSSW headers -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "isCudaDeviceSupported.h" int main() { int devices = 0; - cudaCheck(cudaGetDeviceCount(&devices)); + cudaError_t status = cudaGetDeviceCount(&devices); + if (status != cudaSuccess) { + std::cerr << "cudaComputeCapabilities: " << cudaGetErrorString(status) << std::endl; + return EXIT_FAILURE; + } for (int i = 0; i < devices; ++i) { cudaDeviceProp properties; cudaGetDeviceProperties(&properties, i); std::cout << std::setw(4) << i << " " << std::setw(2) << properties.major << "." << properties.minor << " " - << properties.name << std::endl; + << properties.name; + if (not isCudaDeviceSupported(i)) { + std::cout << " (unsupported)"; + } + std::cout << std::endl; } - return 0; + return EXIT_SUCCESS; } diff --git a/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp b/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp index eb21f22cd0c5c..b8847568f44e2 100644 --- a/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp +++ b/HeterogeneousCore/CUDAServices/bin/cudaIsEnabled.cpp @@ -1,10 +1,13 @@ -#include -#include +// C/C++ headers #include -#include +// CUDA headers #include +// local headers +#include "isCudaDeviceSupported.h" + +// returns EXIT_SUCCESS if at least one visible CUDA device can be used, or EXIT_FAILURE otherwise int main() { int devices = 0; auto status = cudaGetDeviceCount(&devices); @@ -12,20 +15,12 @@ int main() { return EXIT_FAILURE; } - int minimumMajor = 6; // min minor is implicitly 0 - - // This approach (requiring all devices are supported) is rather - // conservative. In principle we could consider just dropping the - // unsupported devices. Currently that would be easiest to achieve - // in CUDAService though. + // check that at least one visible CUDA device can be used for (int i = 0; i < devices; ++i) { - cudaDeviceProp properties; - cudaGetDeviceProperties(&properties, i); - - if ((not(properties.major == 3 and properties.minor == 5)) and properties.major < minimumMajor) { - return EXIT_FAILURE; - } + if (isCudaDeviceSupported(i)) + return EXIT_SUCCESS; } - return EXIT_SUCCESS; + // no visible usable devices + return EXIT_FAILURE; } diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu new file mode 100644 index 0000000000000..a6b64ef6783a9 --- /dev/null +++ b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.cu @@ -0,0 +1,55 @@ +#include + +#include "isCudaDeviceSupported.h" + +__global__ static void setSupported(bool* result) { *result = true; } + +bool isCudaDeviceSupported(int device) { + bool supported = false; + bool* supported_d; + + // select the requested device - will fail if the index is invalid + cudaError_t status = cudaSetDevice(device); + if (status != cudaSuccess) + return false; + + // allocate memory for the flag on the device + status = cudaMalloc(&supported_d, sizeof(bool)); + if (status != cudaSuccess) + return false; + + // initialise the flag on the device + status = cudaMemset(supported_d, 0x00, sizeof(bool)); + if (status != cudaSuccess) + return false; + + // try to set the flag on the device + setSupported<<<1, 1>>>(supported_d); + + // check for an eventual error from launching the kernel on an unsupported device + status = cudaGetLastError(); + if (status != cudaSuccess) + return false; + + // wait for the kernelto run + status = cudaDeviceSynchronize(); + if (status != cudaSuccess) + return false; + + // copy the flag back to the host + status = cudaMemcpy(&supported, supported_d, sizeof(bool), cudaMemcpyDeviceToHost); + if (status != cudaSuccess) + return false; + + // free the device memory + status = cudaFree(supported_d); + if (status != cudaSuccess) + return false; + + // reset the device + status = cudaDeviceReset(); + if (status != cudaSuccess) + return false; + + return supported; +} diff --git a/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h new file mode 100644 index 0000000000000..b010f51fb72a6 --- /dev/null +++ b/HeterogeneousCore/CUDAServices/bin/isCudaDeviceSupported.h @@ -0,0 +1,6 @@ +#ifndef HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h +#define HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h + +bool isCudaDeviceSupported(int device); + +#endif // HeterogeneousCore_CUDAServices_bin_isCudaDeviceSupported_h