diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..823b61cd
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,42 @@
+language: cpp
+
+sudo: enabled
+
+compiler:
+ - gcc
+
+matrix:
+ include:
+ - name: CUDA 9
+ env:
+ - CUDA=9.2.148-1
+ - CUDA_SHORT=9.2
+ - CUDA_APT=9-2
+ - UBUNTU_VERSION=ubuntu1604
+ dist: xenial
+ - name: CUDA 10
+ env:
+ - CUDA=10.1.105-1
+ - CUDA_APT=10-1
+ - CUDA_SHORT=10.1
+ - UBUNTU_VERSION=ubuntu1804
+ dist: bionic
+
+before_install:
+ - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
+ - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
+ - sudo dpkg -i ${INSTALLER}
+ - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
+ - sudo apt-key add 7fa2af80.pub
+ - sudo apt update -qq
+ - sudo apt install -y cuda-core-${CUDA_APT} cuda-cudart-dev-${CUDA_APT} cuda-nvtx-${CUDA_APT}
+ - sudo apt clean
+ - CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
+ - LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+ - PATH=${CUDA_HOME}/bin:${PATH}
+
+before_script:
+ - cmake .
+
+script:
+ - VERBOSE=1 make examples
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4bc0d00..11814848 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@
# We need version 3.8 for native CUDA support in CMake
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
# -----------------------------------
# Project name, version & build type
@@ -68,10 +68,22 @@ option(EXPORT_BUILD_DIR "Enables external use without install" OFF)
# CUDA
# -------------
-#find_package(CUDA 7.0 REQUIRED) # Why do I to do this damn it ?!
-#include_directories( "${CUDA_TOOLKIT_INCLUDE}" )
+# While this should not be necessary with CMake 3.8 and later,
+# it apparently _is_ necessary to achieve the following:
+#
+# 1. Allow non-CUDA C++ code access to the CUDA libraries
+# 2. Determine the gencode/arch/code flags for the GPUs on the host (= target) machine
+#
+# so...
+include(FindCUDA)
+
include_directories( ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} )
-include(HandleCUDAComputeCapability)
+
+cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS_TMP Auto)
+set(CUDA_ARCH_FLAGS ${CUDA_ARCH_FLAGS_TMP} CACHE STRING "CUDA -gencode parameters")
+string(REPLACE ";" " " CUDA_ARCH_FLAGS_STR "${CUDA_ARCH_FLAGS}")
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS_STR}")
+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall" )
set(CMAKE_CUDA_STANDARD 11)
@@ -81,6 +93,7 @@ set(CMAKE_CUDA_EXTENSIONS ON)
set(CUDA_SEPARABLE_COMPILATION ON) # Does this work with native CUDA support?
set(CUDA_PROPAGATE_HOST_FLAGS OFF) # Does this work with native CUDA support?
+
# -----------------------
# Main target(s)
# -----------------------
@@ -102,17 +115,23 @@ target_include_directories(
"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
)
-if(WIN32)
- # Windows users report that CMake has trouble
- set(CUDA_LIBRARIES "cudadevrt.lib;cudart.lib")
- target_link_libraries(${PROJECT_NAME} PUBLIC ${CUDA_LIBRARIES})
-endif()
+target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})
# -----------------------
# Examples / Tests
# -----------------------
-link_libraries(${CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES} cuda-api-wrappers)
+# This next line should have been enough to make the example programs
+# link against the CUDA runtime library. However, users have reported
+# that doesn't actually happen in some cases / on some platforms; see
+# the project page for details and specifically issue #106. It has
+# been removed in favor of relying on the find_package(CUDA) line
+# above, despite the deprecation of that method of locating CUDA
+# libraries.
+#
+#link_libraries(${CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES})
+
+link_libraries(cuda-api-wrappers)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "examples/bin")
add_executable(vectorAdd EXCLUDE_FROM_ALL examples/modified_cuda_samples/vectorAdd/vectorAdd.cu)
diff --git a/README.md b/README.md
index bbd5d52b..4ea28788 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
# cuda-api-wrappers:
Thin C++-flavored wrappers for the CUDA runtime API
+Branch Build Status: Master [![Master Build Status](https://travis-ci.org/eyalroz/cuda-api-wrappers.svg?branch=master)](https://travis-ci.org/eyalroz/cuda-api-wrappers) | Development: [![Development Build Status](https://travis-ci.org/eyalroz/cuda-api-wrappers.svg?branch=development)](https://travis-ci.org/eyalroz/cuda-api-wrappers)
+
nVIDIA's [Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) for [CUDA](http://www.nvidia.com/object/cuda_home_new.html) is intended for use both in C and C++ code. As such, it uses a C-style API, the lower common denominator (with a few [notable exceptions](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__HIGHLEVEL.html) of templated function overloads).
This library of wrappers around the Runtime API is intended to allow us to embrace many of the features of C++ (including some C++11) for using the runtime API - but without reducing expressivity or increasing the level of abstraction (as in, e.g., the [Thrust](https://thrust.github.io/) library). Using cuda-api-wrappers, you still have your devices, streams, events and so on - but they will be more convenient to work with in more C++-idiomatic ways.
diff --git a/cmake/Modules/FindCUDAAPIWrappers.cmake b/cmake/Modules/FindCUDAAPIWrappers.cmake
index bfd7e49e..91b0c442 100644
--- a/cmake/Modules/FindCUDAAPIWrappers.cmake
+++ b/cmake/Modules/FindCUDAAPIWrappers.cmake
@@ -17,7 +17,7 @@ find_package(PkgConfig) # will this even help us at all?
find_path(
CUDA_API_WRAPPERS_INCLUDE_DIR
- cuda/api_wrappers.h
+ cuda/api_wrappers.hpp
HINTS
${CUDA_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/cuda-api-wrappers
diff --git a/cmake/Modules/HandleCUDAComputeCapability.cmake b/cmake/Modules/HandleCUDAComputeCapability.cmake
deleted file mode 100644
index b6dc1499..00000000
--- a/cmake/Modules/HandleCUDAComputeCapability.cmake
+++ /dev/null
@@ -1,88 +0,0 @@
-# This module determines which compute capability / SM version
-# we should be compiling our CUDA code for, and adds the appropriate
-# switch to the NVCC compiler flags - so that you don't have to worry
-# about it.
-#
-# TODO:
-# * Be willing to take CUDA_CC, CUDA_TARGET_COMPUTE_CAPABILITY,
-# CUDA_TARGET_COMPUTE or CUDA_TARGET_COMPUTE_CAP and maybe even
-# those without the CUDA_ prefix
-# * Support for CMake versions under 3.8 (shouldn't be difficult,
-# just different variable names
-# * Support "roll-up" of redundant existing nvcc flags
-# * Support clang instead of nvcc
-#
-cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
-
-if (NOT CUDA_TARGET_COMPUTE_CAPABILITY)
- if (CMAKE_CUDA_COMPILER_LOADED)
- set(QUERY_CUDA_COMPUTE_CAPABILITY_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/query_gpu_compute_capability.cu)
- file(WRITE ${QUERY_CUDA_COMPUTE_CAPABILITY_SOURCE}
-"#include \n\
-#include \n\
-#include \n\
-#include \n\
-#include \n\
-#include \n\
-int main()\n\
-{\n\
- cudaDeviceProp prop;\n\
- cudaError_t status;\n\
- int device_count;\n\
- status = cudaGetDeviceCount(&device_count);\n\
- std::vector sm_values;\n\
- if (status != cudaSuccess) {\n\
- fprintf(stderr,\"cudaGetDeviceCount() failed: %s\\n\", cudaGetErrorString(status));\n\
- return -1;\n\
- }\n\
- for(int device_index = 0; device_index < device_count; device_index++){\n\
- status = cudaGetDeviceProperties(&prop, device_index);\n\
- if (status != cudaSuccess) {\n\
- fprintf(stderr,\"cudaGetDeviceProperties() for device ${device_index} failed: %s\\n\", cudaGetErrorString(status));\n\
- return -1;\n\
- }\n\
- std::ostringstream ss;\n\
- ss << prop.major << \".\" << prop.minor;\n\
- sm_values.push_back(ss.str());\n\
- }\n\
- std::vector::iterator end = std::unique(sm_values.begin(), sm_values.end()); ;\n\
- sm_values.resize(std::distance(sm_values.begin(), end));\n\
- for(std::vector::iterator it = sm_values.begin(); it != sm_values.end(); it++) {\n\
- std::cout << *it;\n\
- if((it+1) != sm_values.end()) std::cout << \";\";\n\
- }\n\
- return 0;\n\
-}")
- try_run(
- QUERY_CUDA_COMPUTE_CAPABILITY_RUN_RESULT
- QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_RESULT
- ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}
- ${QUERY_CUDA_COMPUTE_CAPABILITY_SOURCE}
- RUN_OUTPUT_VARIABLE CUDA_TARGET_COMPUTE_CAPABILITIES_
- COMPILE_OUTPUT_VARIABLE QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_OUTPUT)
- if(NOT "${QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_RESULT}")
- message(SEND_ERROR "CUDA device compute capability query: Compilation failure")
- message(SEND_ERROR "${QUERY_CUDA_COMPUTE_CAPABILITY_COMPILE_OUTPUT}")
- elseif(NOT ("${QUERY_CUDA_COMPUTE_CAPABILITY_RUN_RESULT}" EQUAL "0"))
- message(SEND_ERROR "CUDA device compute capability query: Runtime error")
- message(SEND_ERROR "${CUDA_TARGET_COMPUTE_CAPABILITIES_}")
- endif()
- endif()
-endif()
-
-set(CUDA_TARGET_COMPUTE_CAPABILITY "${CUDA_TARGET_COMPUTE_CAPABILITIES_}" CACHE STRING "List of CUDA compute capabilities of the targeted \
- CUDA devices in X.Y format; list items separated by semicolons; see table of features and capabilities by \
- capability X.Y value at https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications")
-
-string(REPLACE ";" ", " COMPUTE_CAPABILITIES_FORMATTED_FOR_PRINTING "${CUDA_TARGET_COMPUTE_CAPABILITY}")
-message(STATUS "CUDA compilation target architecture(s): ${COMPUTE_CAPABILITIES_FORMATTED_FOR_PRINTING}")
-
-set(NVCC_TARGET_COMPUTE_CAPABILITY_FLAGS "")
-foreach(COMPUTE_CAPABILITY ${CUDA_TARGET_COMPUTE_CAPABILITY})
- string(REPLACE "." "" COMPUTE_CAPABILITY "${COMPUTE_CAPABILITY}")
- # nvcc's documentation is rather confusing regarding what we should actually set these two variables, arch and code, to. It
- # seems they've unified the set of possible values, so we're just going to go with something simplistic which works.
- string(APPEND NVCC_TARGET_COMPUTE_CAPABILITY_FLAGS " -gencode arch=compute_${COMPUTE_CAPABILITY},code=compute_${COMPUTE_CAPABILITY}")
-endforeach(COMPUTE_CAPABILITY)
-
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_TARGET_COMPUTE_CAPABILITY_FLAGS}")
diff --git a/cmake/Modules/sugar/sugar_generate_warning_flags.cmake b/cmake/Modules/sugar/sugar_generate_warning_flags.cmake
deleted file mode 100644
index 6c837124..00000000
--- a/cmake/Modules/sugar/sugar_generate_warning_flags.cmake
+++ /dev/null
@@ -1,281 +0,0 @@
-# Copyright (c) 2014, Ruslan Baratov
-# All rights reserved.
-
-include(sugar_add_this_to_sourcelist)
-sugar_add_this_to_sourcelist()
-
-include(CMakeParseArguments) # cmake_parse_arguments
-include(sugar_fatal_error)
-include(sugar_generate_warning_flag_by_name)
-include(sugar_generate_warning_xcode_attr_by_name)
-include(sugar_get_all_xcode_warning_attrs)
-include(sugar_status_debug)
-include(sugar_warning_unpack_one)
-
-# MS Visual Studio: http://msdn.microsoft.com/en-us/library/thxezb7y.aspx
-# Clang: http://clang.llvm.org/docs/UsersManual.html
-# GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
-function(sugar_generate_warning_flags)
- ### Detect compilers: is_clang, is_msvc, is_gcc
- string(COMPARE EQUAL "${CMAKE_CXX_COMPILER_ID}" "Clang" is_clang)
- string(COMPARE EQUAL "${CMAKE_CXX_COMPILER_ID}" "AppleClang" is_apple_clang)
- if(is_clang OR is_apple_clang)
- set(is_clang TRUE)
- else()
- set(is_clang FALSE)
- endif()
- set(is_msvc ${MSVC})
- set(is_gcc ${CMAKE_COMPILER_IS_GNUCXX})
-
- if(is_clang OR is_msvc OR is_gcc)
- # Supported compilers
- else()
- sugar_fatal_error("Compiler (${CMAKE_CXX_COMPILER_ID}) is not supported")
- endif()
-
- set(multi DISABLE ENABLE TREAT_AS_ERROR)
- set(opts CLEAR_GLOBAL)
- cmake_parse_arguments(x "${opts}" "" "${multi}" ${ARGV})
-
- ### Remove warning flags from global variable
- set(new_cmake_cxx_flags "${CMAKE_CXX_FLAGS}")
- string(REPLACE "/W3" "" new_cmake_cxx_flags "${new_cmake_cxx_flags}")
- string(COMPARE NOTEQUAL "${new_cmake_cxx_flags}" "${CMAKE_CXX_FLAGS}" x)
-
- if(x)
- if(x_CLEAR_GLOBAL)
- set(CMAKE_CXX_FLAGS "${new_cmake_cxx_flags}" PARENT_SCOPE)
- else()
- message(
- WARNING
- "CMAKE_CXX_FLAGS variable contains warning flag"
- " that may cause a conflict."
- " Consider using CLEAR_GLOBAL suboption to remove warning"
- " flags from CMAKE_CXX_FLAGS."
- )
- endif()
- endif()
-
- ### Unpack warning groups
- set(new_list "")
- foreach(warning ${x_DISABLE})
- sugar_warning_unpack_one(warning)
- list(APPEND new_list ${warning})
- endforeach()
- set(x_DISABLE ${new_list})
-
- set(new_list "")
- foreach(warning ${x_ENABLE})
- sugar_warning_unpack_one(warning)
- list(APPEND new_list ${warning})
- endforeach()
- set(x_ENABLE ${new_list})
-
- set(new_list "")
- foreach(warning ${x_TREAT_AS_ERROR})
- sugar_warning_unpack_one(warning)
- list(APPEND new_list ${warning})
- endforeach()
- set(x_TREAT_AS_ERROR ${new_list})
-
- ### Length
- list(LENGTH x_UNPARSED_ARGUMENTS unparsed_length)
- list(LENGTH x_DISABLE disable_length)
- list(LENGTH x_ENABLE enable_length)
- list(LENGTH x_TREAT_AS_ERROR treat_as_error_length)
-
- ### Find special warning `ALL`
- list(FIND x_DISABLE "ALL" disable_all)
- list(FIND x_ENABLE "ALL" enable_all)
- list(FIND x_TREAT_AS_ERROR "ALL" treat_as_error_all)
-
- ### Convert to BOOL
- if(disable_all EQUAL -1)
- set(disable_all NO)
- else()
- set(disable_all YES)
- endif()
-
- if(enable_all EQUAL -1)
- set(enable_all NO)
- else()
- set(enable_all YES)
- endif()
-
- if(treat_as_error_all EQUAL -1)
- set(treat_as_error_all NO)
- else()
- set(treat_as_error_all YES)
- endif()
-
- ### If special option ALL present, check there is no others
- if(disable_all AND NOT disable_length EQUAL 1)
- sugar_fatal_error("If ALL present there must be no other warnings")
- endif()
-
- if(enable_all AND NOT enable_length EQUAL 1)
- sugar_fatal_error("If ALL present there must be no other warnings")
- endif()
-
- if(treat_as_error_all AND NOT treat_as_error_length EQUAL 1)
- sugar_fatal_error("If ALL present there must be no other warnings")
- endif()
-
- ### Verify result variable ###
- if(unparsed_length EQUAL 0)
- sugar_fatal_error("Expected 2 result variables")
- endif()
- if(NOT unparsed_length EQUAL 2)
- sugar_fatal_error("Unparsed: ${x_UNPARSED_ARGUMENTS}")
- endif()
- list(GET x_UNPARSED_ARGUMENTS 0 result_opts)
- list(GET x_UNPARSED_ARGUMENTS 1 result_props)
- sugar_status_debug(
- "Generate warnings (COMPILE_OPTIONS) for variable `${result_opts}`"
- )
- sugar_status_debug(
- "Generate warnings (PROPERTIES) for variable `${result_props}`"
- )
- set(${result_opts} "")
- set(${result_props} "")
-
- ### Clear default Xcode flags
- if(XCODE_VERSION)
- list(APPEND ${result_props} XCODE_ATTRIBUTE_WARNING_CFLAGS)
- list(APPEND ${result_props} " ")
- endif()
-
- ### Disable all
- if(disable_all)
- # Set all Xcode attributes to NO;
- # Note that some of them may be rewritten further (so resulting list
- # may contain several values for attributes with same name, last used)
- sugar_get_all_xcode_warning_attrs(attr_list)
- foreach(attr ${attr_list})
- list(APPEND ${result_props} ${attr})
- list(APPEND ${result_props} NO)
- endforeach()
- if(is_msvc)
- list(APPEND ${result_opts} "/w" "/W0")
- elseif(is_clang OR is_gcc)
- list(APPEND ${result_opts} "-w")
- else()
- sugar_fatal_error("")
- endif()
- endif()
-
- ### Enable all
- if(enable_all)
- # Set all Xcode attributes to YES (See note above)
- sugar_get_all_xcode_warning_attrs(attr_list)
- foreach(attr ${attr_list})
- list(APPEND ${result_props} ${attr})
- list(APPEND ${result_props} YES)
- endforeach()
- if(is_msvc)
- list(APPEND ${result_opts} "/Wall")
- elseif(is_gcc)
- list(APPEND ${result_opts} "-Wall" "-Wextra" "-Wpedantic")
- elseif(is_clang)
- list(APPEND ${result_opts} "-Wall" "-Weverything" "-pedantic")
- else()
- sugar_fatal_error("")
- endif()
- endif()
-
- ### All treat as error
- if(treat_as_error_all)
- if(is_msvc)
- list(APPEND ${result_opts} "/WX")
- elseif(is_gcc OR is_clang)
- list(APPEND ${result_opts} "-Werror")
- else()
- sugar_fatal_error("")
- endif()
- endif()
-
- ### DISABLE and ENABLE must not intersects
- foreach(warning ${x_DISABLE})
- list(FIND x_ENABLE "${warning}" x)
- if(NOT x EQUAL -1)
- sugar_fatal_error(
- "Warning `${warning}` in both DISABLE and ENABLE sections"
- )
- endif()
- endforeach()
-
- ### DISABLE and TREAT_AS_ERROR must not intersects
- foreach(warning ${x_DISABLE})
- list(FIND x_TREAT_AS_ERROR "${warning}" x)
- if(NOT x EQUAL -1)
- sugar_fatal_error(
- "Warning `${warning}` in both DISABLE and TREAT_AS_ERROR sections"
- )
- endif()
- endforeach()
-
- ### Generate ENABLE
- foreach(warning ${x_ENABLE})
- sugar_generate_warning_xcode_attr_by_name(warning_flag ${warning})
- if(warning_flag)
- list(APPEND ${result_props} ${warning_flag})
- list(APPEND ${result_props} YES)
- else()
- sugar_generate_warning_flag_by_name(warning_flag ${warning})
- foreach(x ${warning_flag})
- if(is_msvc)
- list(APPEND ${result_opts} "/w1${x}")
- elseif(is_gcc OR is_clang)
- list(APPEND ${result_opts} "-W${x}")
- else()
- sugar_fatal_error("")
- endif()
- endforeach()
- endif()
- endforeach()
-
- ### Generate DISABLE
- foreach(warning ${x_DISABLE})
- sugar_generate_warning_xcode_attr_by_name(warning_flag ${warning})
- if(warning_flag)
- list(APPEND ${result_props} ${warning_flag})
- list(APPEND ${result_props} NO)
- endif()
- # If xcode attribute set to NO then no flags will be generated, so
- # generate '-Wno-' flag explicitly
- sugar_generate_warning_flag_by_name(warning_flag ${warning})
- foreach(x ${warning_flag})
- if(is_msvc)
- list(APPEND ${result_opts} "/wd${x}")
- elseif(is_gcc OR is_clang)
- list(APPEND ${result_opts} "-Wno-${x}")
- else()
- sugar_fatal_error("")
- endif()
- endforeach()
- endforeach()
-
- ### Generate TREAT_AS_ERROR
- foreach(warning ${x_TREAT_AS_ERROR})
- sugar_generate_warning_flag_by_name(warning_flags ${warning})
- foreach(x ${warning_flags})
- if(is_msvc)
- list(APPEND ${result_opts} "/we${x}")
- elseif(is_gcc OR is_clang)
- list(APPEND ${result_opts} "-Werror=${x}")
- else()
- sugar_fatal_error("")
- endif()
- endforeach()
- endforeach()
-
- sugar_status_debug("Generated from:")
- sugar_status_debug(" DISABLE: ${x_DISABLE}")
- sugar_status_debug(" ENABLE: ${x_ENABLE}")
- sugar_status_debug(" TREAT_AS_ERROR: ${x_TREAT_AS_ERROR}")
- sugar_status_debug("Generated (COMPILE_OPTIONS): ${${result_opts}}")
- sugar_status_debug("Generated (PROPERTIES): ${${result_props}}")
-
- set(${result_opts} "${${result_opts}}" PARENT_SCOPE)
- set(${result_props} "${${result_props}}" PARENT_SCOPE)
-endfunction()
diff --git a/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu b/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu
index 679591dd..64affe5b 100644
--- a/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu
+++ b/examples/modified_cuda_samples/simpleStreams/simpleStreams.cu
@@ -266,7 +266,7 @@ int main(int argc, char **argv)
// pointers to data and init value in the device memory
auto d_a = cuda::memory::device::make_unique(cuda_device_id, n);
auto d_c = cuda::memory::device::make_unique(cuda_device_id);
- cuda::memory::copy_single(*d_c.get(), c);
+ cuda::memory::copy_single(d_c.get(), &c);
std::cout << "\nStarting Test\n";
diff --git a/src/cuda/api/device_properties.hpp b/src/cuda/api/device_properties.hpp
index 093e5038..1b38d361 100644
--- a/src/cuda/api/device_properties.hpp
+++ b/src/cuda/api/device_properties.hpp
@@ -15,6 +15,18 @@
#include
+
+// The following un-definitions avoid warnings about
+// the use of `major` and `minor` in certain versions
+// of the GNU C library
+#ifdef major
+#undef major
+#endif
+
+#ifdef minor
+#undef minor
+#endif
+
namespace cuda {
namespace device {
@@ -193,6 +205,8 @@ struct properties_t : public cudaDeviceProp {
grid_block_dimension_t max_threads_per_block() const noexcept { return maxThreadsPerBlock; }
grid_block_dimension_t max_warps_per_block() const noexcept { return maxThreadsPerBlock / warp_size; }
+ size_t max_shared_memory_per_block() const noexcept { return sharedMemPerBlock; }
+ size_t global_memory_size() const noexcept { return totalGlobalMem; }
bool can_map_host_memory() const noexcept { return canMapHostMemory != 0; }
};
diff --git a/src/cuda/api/memory.hpp b/src/cuda/api/memory.hpp
index 3bb86663..9b5bf744 100644
--- a/src/cuda/api/memory.hpp
+++ b/src/cuda/api/memory.hpp
@@ -228,9 +228,9 @@ inline void copy(void *destination, const void *source, size_t num_bytes)
* device's global memory
*/
template
-inline void copy_single(T& destination, const T& source)
+inline void copy_single(T* destination, const T* source)
{
- copy(&destination, &source, sizeof(T));
+ copy(destination, source, sizeof(T));
}
namespace async {
@@ -553,7 +553,7 @@ namespace async {
* devices.
*/
inline void prefetch(
- void* managed_ptr,
+ const void* managed_ptr,
size_t num_bytes,
cuda::device::id_t destination,
stream::id_t stream_id)