diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..823b61cd --- /dev/null +++ b/.travis.yml @@ -0,0 +1,42 @@ +language: cpp + +sudo: enabled + +compiler: + - gcc + +matrix: + include: + - name: CUDA 9 + env: + - CUDA=9.2.148-1 + - CUDA_SHORT=9.2 + - CUDA_APT=9-2 + - UBUNTU_VERSION=ubuntu1604 + dist: xenial + - name: CUDA 10 + env: + - CUDA=10.1.105-1 + - CUDA_APT=10-1 + - CUDA_SHORT=10.1 + - UBUNTU_VERSION=ubuntu1804 + dist: bionic + +before_install: + - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb + - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER} + - sudo dpkg -i ${INSTALLER} + - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub + - sudo apt-key add 7fa2af80.pub + - sudo apt update -qq + - sudo apt install -y cuda-core-${CUDA_APT} cuda-cudart-dev-${CUDA_APT} cuda-nvtx-${CUDA_APT} + - sudo apt clean + - CUDA_HOME=/usr/local/cuda-${CUDA_SHORT} + - LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} + - PATH=${CUDA_HOME}/bin:${PATH} + +before_script: + - cmake . + +script: + - VERBOSE=1 make examples diff --git a/CMakeLists.txt b/CMakeLists.txt index d4bc0d00..11814848 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ # We need version 3.8 for native CUDA support in CMake cmake_minimum_required(VERSION 3.8 FATAL_ERROR) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") # ----------------------------------- # Project name, version & build type @@ -68,10 +68,22 @@ option(EXPORT_BUILD_DIR "Enables external use without install" OFF) # CUDA # ------------- -#find_package(CUDA 7.0 REQUIRED) # Why do I to do this damn it ?! -#include_directories( "${CUDA_TOOLKIT_INCLUDE}" ) +# While this should not be necessary with CMake 3.8 and later, +# it apparently _is_ necessary to achieve the following: +# +# 1. Allow non-CUDA C++ code access to the CUDA libraries +# 2. Determine the gencode/arch/code flags for the GPUs on the host (= target) machine +# +# so... +include(FindCUDA) + include_directories( ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) -include(HandleCUDAComputeCapability) + +cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS_TMP Auto) +set(CUDA_ARCH_FLAGS ${CUDA_ARCH_FLAGS_TMP} CACHE STRING "CUDA -gencode parameters") +string(REPLACE ";" " " CUDA_ARCH_FLAGS_STR "${CUDA_ARCH_FLAGS}") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS_STR}") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall" ) set(CMAKE_CUDA_STANDARD 11) @@ -81,6 +93,7 @@ set(CMAKE_CUDA_EXTENSIONS ON) set(CUDA_SEPARABLE_COMPILATION ON) # Does this work with native CUDA support? set(CUDA_PROPAGATE_HOST_FLAGS OFF) # Does this work with native CUDA support? + # ----------------------- # Main target(s) # ----------------------- @@ -102,17 +115,23 @@ target_include_directories( "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" ) -if(WIN32) - # Windows users report that CMake has trouble - set(CUDA_LIBRARIES "cudadevrt.lib;cudart.lib") - target_link_libraries(${PROJECT_NAME} PUBLIC ${CUDA_LIBRARIES}) -endif() +target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES}) # ----------------------- # Examples / Tests # ----------------------- -link_libraries(${CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES} cuda-api-wrappers) +# This next line should have been enough to make the example programs +# link against the CUDA runtime library. However, users have reported +# that doesn't actually happen in some cases / on some platforms; see +# the project page for details and specifically issue #106. It has +# been removed in favor of relying on the find_package(CUDA) line +# above, despite the deprecation of that method of locating CUDA +# libraries. +# +#link_libraries(${CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES}) + +link_libraries(cuda-api-wrappers) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "examples/bin") add_executable(vectorAdd EXCLUDE_FROM_ALL examples/modified_cuda_samples/vectorAdd/vectorAdd.cu) diff --git a/README.md b/README.md index bbd5d52b..4ea28788 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # cuda-api-wrappers:
Thin C++-flavored wrappers for the CUDA runtime API +Branch Build Status: Master [![Master Build Status](https://travis-ci.org/eyalroz/cuda-api-wrappers.svg?branch=master)](https://travis-ci.org/eyalroz/cuda-api-wrappers) | Development: [![Development Build Status](https://travis-ci.org/eyalroz/cuda-api-wrappers.svg?branch=development)](https://travis-ci.org/eyalroz/cuda-api-wrappers) + nVIDIA's [Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) for [CUDA](http://www.nvidia.com/object/cuda_home_new.html) is intended for use both in C and C++ code. As such, it uses a C-style API, the lower common denominator (with a few [notable exceptions](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__HIGHLEVEL.html) of templated function overloads). This library of wrappers around the Runtime API is intended to allow us to embrace many of the features of C++ (including some C++11) for using the runtime API - but without reducing expressivity or increasing the level of abstraction (as in, e.g., the [Thrust](https://thrust.github.io/) library). Using cuda-api-wrappers, you still have your devices, streams, events and so on - but they will be more convenient to work with in more C++-idiomatic ways. diff --git a/cmake/Modules/FindCUDAAPIWrappers.cmake b/cmake/Modules/FindCUDAAPIWrappers.cmake index bfd7e49e..91b0c442 100644 --- a/cmake/Modules/FindCUDAAPIWrappers.cmake +++ b/cmake/Modules/FindCUDAAPIWrappers.cmake @@ -17,7 +17,7 @@ find_package(PkgConfig) # will this even help us at all? find_path( CUDA_API_WRAPPERS_INCLUDE_DIR - cuda/api_wrappers.h + cuda/api_wrappers.hpp HINTS ${CUDA_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/cuda-api-wrappers diff --git a/cmake/Modules/HandleCUDAComputeCapability.cmake b/cmake/Modules/HandleCUDAComputeCapability.cmake deleted file mode 100644 index b6dc1499..00000000 --- a/cmake/Modules/HandleCUDAComputeCapability.cmake +++ /dev/null @@ -1,88 +0,0 @@ -# This module determines which compute capability / SM version -# we should be compiling our CUDA code for, and adds the appropriate -# switch to the NVCC compiler flags - so that you don't have to worry -# about it. -# -# TODO: -# * Be willing to take CUDA_CC, CUDA_TARGET_COMPUTE_CAPABILITY, -# CUDA_TARGET_COMPUTE or CUDA_TARGET_COMPUTE_CAP and maybe even -# those without the CUDA_ prefix -# * Support for CMake versions under 3.8 (shouldn't be difficult, -# just different variable names -# * Support "roll-up" of redundant existing nvcc flags -# * Support clang instead of nvcc -# -cmake_minimum_required(VERSION 3.8 FATAL_ERROR) - -if (NOT CUDA_TARGET_COMPUTE_CAPABILITY) - if (CMAKE_CUDA_COMPILER_LOADED) - set(QUERY_CUDA_COMPUTE_CAPABILITY_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/query_gpu_compute_capability.cu) - file(WRITE ${QUERY_CUDA_COMPUTE_CAPABILITY_SOURCE} -"#include \n\ -#include \n\ -#include \n\ -#include \n\ -#include \n\ -#include \n\ -int main()\n\ -{\n\ - cudaDeviceProp prop;\n\ - cudaError_t status;\n\ - int device_count;\n\ - status = cudaGetDeviceCount(&device_count);\n\ - std::vector sm_values;\n\ - if (status != cudaSuccess) {\n\ - fprintf(stderr,\"cudaGetDeviceCount() failed: %s\\n\", cudaGetErrorString(status));\n\ - return -1;\n\ - }\n\ - for(int device_index = 0; device_index < device_count; device_index++){\n\ - status = cudaGetDeviceProperties(&prop, device_index);\n\ - if (status != cudaSuccess) {\n\ - fprintf(stderr,\"cudaGetDeviceProperties() for device ${device_index} failed: %s\\n\", cudaGetErrorString(status));\n\ - return -1;\n\ - }\n\ - std::ostringstream ss;\n\ - ss << prop.major << \".\" << prop.minor;\n\ - sm_values.push_back(ss.str());\n\ - }\n\ - std::vector::iterator end = std::unique(sm_values.begin(), sm_values.end()); ;\n\ - sm_values.resize(std::distance(sm_values.begin(), end));\n\ - for(std::vector::iterator it = sm_values.begin(); it != sm_values.end(); it++) {\n\ - std::cout << *it;\n\ - if((it+1) != sm_values.end()) std::cout << \";\";\n\ - }\n\ - return 0;\n\ -}") - try_run( - QUERY_CUDA_COMPUTE_CAPABILITY_RUN_RESULT - QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_RESULT - ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY} - ${QUERY_CUDA_COMPUTE_CAPABILITY_SOURCE} - RUN_OUTPUT_VARIABLE CUDA_TARGET_COMPUTE_CAPABILITIES_ - COMPILE_OUTPUT_VARIABLE QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_OUTPUT) - if(NOT "${QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_RESULT}") - message(SEND_ERROR "CUDA device compute capability query: Compilation failure") - message(SEND_ERROR "${QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_OUTPUT}") - elseif(NOT ("${QUERY_CUDA_COMPUTE_CAPABILITY_RUN_RESULT}" EQUAL "0")) - message(SEND_ERROR "CUDA device compute capability query: Runtime error") - message(SEND_ERROR "${CUDA_TARGET_COMPUTE_CAPABILITIES_}") - endif() - endif() -endif() - -set(CUDA_TARGET_COMPUTE_CAPABILITY "${CUDA_TARGET_COMPUTE_CAPABILITIES_}" CACHE STRING "List of CUDA compute capabilities of the targeted \ - CUDA devices in X.Y format; list items separated by semicolons; see table of features and capabilities by \ - capability X.Y value at https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications") - -string(REPLACE ";" ", " COMPUTE_CAPABILITIES_FORMATTED_FOR_PRINTING "${CUDA_TARGET_COMPUTE_CAPABILITY}") -message(STATUS "CUDA compilation target architecture(s): ${COMPUTE_CAPABILITIES_FORMATTED_FOR_PRINTING}") - -set(NVCC_TARGET_COMPUTE_CAPABILITY_FLAGS "") -foreach(COMPUTE_CAPABILITY ${CUDA_TARGET_COMPUTE_CAPABILITY}) - string(REPLACE "." "" COMPUTE_CAPABILITY "${COMPUTE_CAPABILITY}") - # nvcc's documentation is rather confusing regarding what we should actually set these two variables, arch and code, to. It - # seems they've unified the set of possible values, so we're just going to go with something simplistic which works. - string(APPEND NVCC_TARGET_COMPUTE_CAPABILITY_FLAGS " -gencode arch=compute_${COMPUTE_CAPABILITY},code=compute_${COMPUTE_CAPABILITY}") -endforeach(COMPUTE_CAPABILITY) - -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_TARGET_COMPUTE_CAPABILITY_FLAGS}") diff --git a/cmake/Modules/sugar/sugar_generate_warning_flags.cmake b/cmake/Modules/sugar/sugar_generate_warning_flags.cmake deleted file mode 100644 index 6c837124..00000000 --- a/cmake/Modules/sugar/sugar_generate_warning_flags.cmake +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright (c) 2014, Ruslan Baratov -# All rights reserved. - -include(sugar_add_this_to_sourcelist) -sugar_add_this_to_sourcelist() - -include(CMakeParseArguments) # cmake_parse_arguments -include(sugar_fatal_error) -include(sugar_generate_warning_flag_by_name) -include(sugar_generate_warning_xcode_attr_by_name) -include(sugar_get_all_xcode_warning_attrs) -include(sugar_status_debug) -include(sugar_warning_unpack_one) - -# MS Visual Studio: http://msdn.microsoft.com/en-us/library/thxezb7y.aspx -# Clang: http://clang.llvm.org/docs/UsersManual.html -# GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html -function(sugar_generate_warning_flags) - ### Detect compilers: is_clang, is_msvc, is_gcc - string(COMPARE EQUAL "${CMAKE_CXX_COMPILER_ID}" "Clang" is_clang) - string(COMPARE EQUAL "${CMAKE_CXX_COMPILER_ID}" "AppleClang" is_apple_clang) - if(is_clang OR is_apple_clang) - set(is_clang TRUE) - else() - set(is_clang FALSE) - endif() - set(is_msvc ${MSVC}) - set(is_gcc ${CMAKE_COMPILER_IS_GNUCXX}) - - if(is_clang OR is_msvc OR is_gcc) - # Supported compilers - else() - sugar_fatal_error("Compiler (${CMAKE_CXX_COMPILER_ID}) is not supported") - endif() - - set(multi DISABLE ENABLE TREAT_AS_ERROR) - set(opts CLEAR_GLOBAL) - cmake_parse_arguments(x "${opts}" "" "${multi}" ${ARGV}) - - ### Remove warning flags from global variable - set(new_cmake_cxx_flags "${CMAKE_CXX_FLAGS}") - string(REPLACE "/W3" "" new_cmake_cxx_flags "${new_cmake_cxx_flags}") - string(COMPARE NOTEQUAL "${new_cmake_cxx_flags}" "${CMAKE_CXX_FLAGS}" x) - - if(x) - if(x_CLEAR_GLOBAL) - set(CMAKE_CXX_FLAGS "${new_cmake_cxx_flags}" PARENT_SCOPE) - else() - message( - WARNING - "CMAKE_CXX_FLAGS variable contains warning flag" - " that may cause a conflict." - " Consider using CLEAR_GLOBAL suboption to remove warning" - " flags from CMAKE_CXX_FLAGS." - ) - endif() - endif() - - ### Unpack warning groups - set(new_list "") - foreach(warning ${x_DISABLE}) - sugar_warning_unpack_one(warning) - list(APPEND new_list ${warning}) - endforeach() - set(x_DISABLE ${new_list}) - - set(new_list "") - foreach(warning ${x_ENABLE}) - sugar_warning_unpack_one(warning) - list(APPEND new_list ${warning}) - endforeach() - set(x_ENABLE ${new_list}) - - set(new_list "") - foreach(warning ${x_TREAT_AS_ERROR}) - sugar_warning_unpack_one(warning) - list(APPEND new_list ${warning}) - endforeach() - set(x_TREAT_AS_ERROR ${new_list}) - - ### Length - list(LENGTH x_UNPARSED_ARGUMENTS unparsed_length) - list(LENGTH x_DISABLE disable_length) - list(LENGTH x_ENABLE enable_length) - list(LENGTH x_TREAT_AS_ERROR treat_as_error_length) - - ### Find special warning `ALL` - list(FIND x_DISABLE "ALL" disable_all) - list(FIND x_ENABLE "ALL" enable_all) - list(FIND x_TREAT_AS_ERROR "ALL" treat_as_error_all) - - ### Convert to BOOL - if(disable_all EQUAL -1) - set(disable_all NO) - else() - set(disable_all YES) - endif() - - if(enable_all EQUAL -1) - set(enable_all NO) - else() - set(enable_all YES) - endif() - - if(treat_as_error_all EQUAL -1) - set(treat_as_error_all NO) - else() - set(treat_as_error_all YES) - endif() - - ### If special option ALL present, check there is no others - if(disable_all AND NOT disable_length EQUAL 1) - sugar_fatal_error("If ALL present there must be no other warnings") - endif() - - if(enable_all AND NOT enable_length EQUAL 1) - sugar_fatal_error("If ALL present there must be no other warnings") - endif() - - if(treat_as_error_all AND NOT treat_as_error_length EQUAL 1) - sugar_fatal_error("If ALL present there must be no other warnings") - endif() - - ### Verify result variable ### - if(unparsed_length EQUAL 0) - sugar_fatal_error("Expected 2 result variables") - endif() - if(NOT unparsed_length EQUAL 2) - sugar_fatal_error("Unparsed: ${x_UNPARSED_ARGUMENTS}") - endif() - list(GET x_UNPARSED_ARGUMENTS 0 result_opts) - list(GET x_UNPARSED_ARGUMENTS 1 result_props) - sugar_status_debug( - "Generate warnings (COMPILE_OPTIONS) for variable `${result_opts}`" - ) - sugar_status_debug( - "Generate warnings (PROPERTIES) for variable `${result_props}`" - ) - set(${result_opts} "") - set(${result_props} "") - - ### Clear default Xcode flags - if(XCODE_VERSION) - list(APPEND ${result_props} XCODE_ATTRIBUTE_WARNING_CFLAGS) - list(APPEND ${result_props} " ") - endif() - - ### Disable all - if(disable_all) - # Set all Xcode attributes to NO; - # Note that some of them may be rewritten further (so resulting list - # may contain several values for attributes with same name, last used) - sugar_get_all_xcode_warning_attrs(attr_list) - foreach(attr ${attr_list}) - list(APPEND ${result_props} ${attr}) - list(APPEND ${result_props} NO) - endforeach() - if(is_msvc) - list(APPEND ${result_opts} "/w" "/W0") - elseif(is_clang OR is_gcc) - list(APPEND ${result_opts} "-w") - else() - sugar_fatal_error("") - endif() - endif() - - ### Enable all - if(enable_all) - # Set all Xcode attributes to YES (See note above) - sugar_get_all_xcode_warning_attrs(attr_list) - foreach(attr ${attr_list}) - list(APPEND ${result_props} ${attr}) - list(APPEND ${result_props} YES) - endforeach() - if(is_msvc) - list(APPEND ${result_opts} "/Wall") - elseif(is_gcc) - list(APPEND ${result_opts} "-Wall" "-Wextra" "-Wpedantic") - elseif(is_clang) - list(APPEND ${result_opts} "-Wall" "-Weverything" "-pedantic") - else() - sugar_fatal_error("") - endif() - endif() - - ### All treat as error - if(treat_as_error_all) - if(is_msvc) - list(APPEND ${result_opts} "/WX") - elseif(is_gcc OR is_clang) - list(APPEND ${result_opts} "-Werror") - else() - sugar_fatal_error("") - endif() - endif() - - ### DISABLE and ENABLE must not intersects - foreach(warning ${x_DISABLE}) - list(FIND x_ENABLE "${warning}" x) - if(NOT x EQUAL -1) - sugar_fatal_error( - "Warning `${warning}` in both DISABLE and ENABLE sections" - ) - endif() - endforeach() - - ### DISABLE and TREAT_AS_ERROR must not intersects - foreach(warning ${x_DISABLE}) - list(FIND x_TREAT_AS_ERROR "${warning}" x) - if(NOT x EQUAL -1) - sugar_fatal_error( - "Warning `${warning}` in both DISABLE and TREAT_AS_ERROR sections" - ) - endif() - endforeach() - - ### Generate ENABLE - foreach(warning ${x_ENABLE}) - sugar_generate_warning_xcode_attr_by_name(warning_flag ${warning}) - if(warning_flag) - list(APPEND ${result_props} ${warning_flag}) - list(APPEND ${result_props} YES) - else() - sugar_generate_warning_flag_by_name(warning_flag ${warning}) - foreach(x ${warning_flag}) - if(is_msvc) - list(APPEND ${result_opts} "/w1${x}") - elseif(is_gcc OR is_clang) - list(APPEND ${result_opts} "-W${x}") - else() - sugar_fatal_error("") - endif() - endforeach() - endif() - endforeach() - - ### Generate DISABLE - foreach(warning ${x_DISABLE}) - sugar_generate_warning_xcode_attr_by_name(warning_flag ${warning}) - if(warning_flag) - list(APPEND ${result_props} ${warning_flag}) - list(APPEND ${result_props} NO) - endif() - # If xcode attribute set to NO then no flags will be generated, so - # generate '-Wno-' flag explicitly - sugar_generate_warning_flag_by_name(warning_flag ${warning}) - foreach(x ${warning_flag}) - if(is_msvc) - list(APPEND ${result_opts} "/wd${x}") - elseif(is_gcc OR is_clang) - list(APPEND ${result_opts} "-Wno-${x}") - else() - sugar_fatal_error("") - endif() - endforeach() - endforeach() - - ### Generate TREAT_AS_ERROR - foreach(warning ${x_TREAT_AS_ERROR}) - sugar_generate_warning_flag_by_name(warning_flags ${warning}) - foreach(x ${warning_flags}) - if(is_msvc) - list(APPEND ${result_opts} "/we${x}") - elseif(is_gcc OR is_clang) - list(APPEND ${result_opts} "-Werror=${x}") - else() - sugar_fatal_error("") - endif() - endforeach() - endforeach() - - sugar_status_debug("Generated from:") - sugar_status_debug(" DISABLE: ${x_DISABLE}") - sugar_status_debug(" ENABLE: ${x_ENABLE}") - sugar_status_debug(" TREAT_AS_ERROR: ${x_TREAT_AS_ERROR}") - sugar_status_debug("Generated (COMPILE_OPTIONS): ${${result_opts}}") - sugar_status_debug("Generated (PROPERTIES): ${${result_props}}") - - set(${result_opts} "${${result_opts}}" PARENT_SCOPE) - set(${result_props} "${${result_props}}" PARENT_SCOPE) -endfunction() diff --git a/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu b/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu index 679591dd..64affe5b 100644 --- a/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu +++ b/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu @@ -266,7 +266,7 @@ int main(int argc, char **argv) // pointers to data and init value in the device memory auto d_a = cuda::memory::device::make_unique(cuda_device_id, n); auto d_c = cuda::memory::device::make_unique(cuda_device_id); - cuda::memory::copy_single(*d_c.get(), c); + cuda::memory::copy_single(d_c.get(), &c); std::cout << "\nStarting Test\n"; diff --git a/src/cuda/api/device_properties.hpp b/src/cuda/api/device_properties.hpp index 093e5038..1b38d361 100644 --- a/src/cuda/api/device_properties.hpp +++ b/src/cuda/api/device_properties.hpp @@ -15,6 +15,18 @@ #include + +// The following un-definitions avoid warnings about +// the use of `major` and `minor` in certain versions +// of the GNU C library +#ifdef major +#undef major +#endif + +#ifdef minor +#undef minor +#endif + namespace cuda { namespace device { @@ -193,6 +205,8 @@ struct properties_t : public cudaDeviceProp { grid_block_dimension_t max_threads_per_block() const noexcept { return maxThreadsPerBlock; } grid_block_dimension_t max_warps_per_block() const noexcept { return maxThreadsPerBlock / warp_size; } + size_t max_shared_memory_per_block() const noexcept { return sharedMemPerBlock; } + size_t global_memory_size() const noexcept { return totalGlobalMem; } bool can_map_host_memory() const noexcept { return canMapHostMemory != 0; } }; diff --git a/src/cuda/api/memory.hpp b/src/cuda/api/memory.hpp index 3bb86663..9b5bf744 100644 --- a/src/cuda/api/memory.hpp +++ b/src/cuda/api/memory.hpp @@ -228,9 +228,9 @@ inline void copy(void *destination, const void *source, size_t num_bytes) * device's global memory */ template -inline void copy_single(T& destination, const T& source) +inline void copy_single(T* destination, const T* source) { - copy(&destination, &source, sizeof(T)); + copy(destination, source, sizeof(T)); } namespace async { @@ -553,7 +553,7 @@ namespace async { * devices. */ inline void prefetch( - void* managed_ptr, + const void* managed_ptr, size_t num_bytes, cuda::device::id_t destination, stream::id_t stream_id)