Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix memory leak and use ref not omp in device test #731

Merged
merged 4 commits into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/windows-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
$env:PATH="$env:PATH;$pwd\build\windows_shared_library"
mkdir build
cd build
cmake -DCMAKE_CXX_FLAGS=/bigobj -DCMAKE_CXX_FLAGS_DEBUG="/MDd /Zi /Ob1 /Od /RTC1" -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_OMP=OFF ..
cmake -DCMAKE_CXX_FLAGS=/bigobj -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_CXX_FLAGS_DEBUG="/MDd /Zi /Ob1 /Od /RTC1" -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_OMP=OFF ..
cmake --build . -j4 --config ${{ matrix.config.build_type }}
ctest . -C ${{ matrix.config.build_type }} --output-on-failure
- name: install
Expand Down
38 changes: 38 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,26 @@ build/cuda92/intel/cuda/release/static:
- cuda
- gpu

# Build CUDA NVIDIA without omp
build/cuda92/intel/cuda_wo_omp/release/shared:
pratikvn marked this conversation as resolved.
Show resolved Hide resolved
<<: *default_build_with_test
image: localhost:5000/gko-cuda92-gnu7-llvm50-intel2017
variables:
<<: *default_variables
C_COMPILER: "icc"
CXX_COMPILER: "icpc"
BUILD_CUDA: "ON"
BUILD_HWLOC: "OFF"
BUILD_TYPE: "Release"
CUDA_ARCH: 35
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- gpu

# cuda 10.0 and friends
# Make sure that our jobs run when using self-installed
# third-party HWLOC.
Expand Down Expand Up @@ -597,6 +617,24 @@ build/amd/clang/hip/release/static:
- amd
- gpu

# Build HIP AMD without omp
build/amd/clang/hip_wo_omp/release/shared:
<<: *default_build_with_test
image: localhost:5000/gko-amd-gnu8-llvm7
variables:
<<: *default_variables
C_COMPILER: "clang"
CXX_COMPILER: "clang++"
BUILD_HIP: "ON"
BUILD_TYPE: "Release"
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- amd
- gpu

# no cuda but latest gcc and clang
build/nocuda/gcc/core/debug/static:
<<: *default_build_with_test
Expand Down
46 changes: 26 additions & 20 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ set(GINKGO_HIP_AMDGPU "" CACHE STRING
"The amdgpu_target(s) variable passed to hipcc. The default is none (auto).")
option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF)
option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON)
if(MSVC OR WIN32 OR CYGWIN OR APPLE)
option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" OFF)
else()
option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON)
endif()
option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON)
option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON)
option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF)
Expand Down Expand Up @@ -169,6 +173,11 @@ if(PAPI_sde_FOUND)
set(GINKGO_HAVE_PAPI_SDE 1)
endif()

# Switch off HWLOC for Windows and MacOS
if(GINKGO_BUILD_HWLOC AND (MSVC OR WIN32 OR CYGWIN OR APPLE))
set(GINKGO_BUILD_HWLOC OFF CACHE BOOL "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" FORCE)
message(WARNING "Ginkgo does not support HWLOC on Windows/MacOS, switch GINKGO_BUILD_HWLOC to OFF")
endif()
yhmtsai marked this conversation as resolved.
Show resolved Hide resolved
if(GINKGO_BUILD_HWLOC)
# By default always use external HWLOC
set(GINKGO_USE_EXTERNAL_HWLOC 1)
Expand All @@ -178,10 +187,6 @@ else()
set(GINKGO_HAVE_HWLOC 0)
message(STATUS "HWLOC is being forcibly switched off")
endif()
# Switch off HWLOC for Windows and MacOS
if(MSVC OR WIN32 OR CYGWIN OR APPLE)
set(GINKGO_HAVE_HWLOC 0)
endif()

# We keep using NVCC/HCC for consistency with previous releases even if AMD
# updated everything to use NVIDIA/AMD in ROCM 4.1
Expand Down Expand Up @@ -211,6 +216,21 @@ if(GINKGO_BUILD_HIP)
endif()


if(MSVC)
# This is modified from
# https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace
include(cmake/windows_helpers.cmake)
if(BUILD_SHARED_LIBS)
ginkgo_switch_to_windows_dynamic("CXX")
ginkgo_switch_to_windows_dynamic("C")
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
else()
ginkgo_switch_to_windows_static("CXX")
ginkgo_switch_to_windows_static("C")
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS FALSE)
endif()
endif()

# Try to find the third party packages before using our subdirectories
include(cmake/package_helpers.cmake)
ginkgo_find_package(GTest "GTest::GTest;GTest::Main" FALSE 1.8.1)
Expand All @@ -222,26 +242,12 @@ if(GINKGO_HAVE_HWLOC)
set(GINKGO_USE_EXTERNAL_HWLOC 0)
endif()
endif()
# third_party needs to be after flag modification.
add_subdirectory(third_party) # Third-party tools and libraries

# Load CMake helpers
include(cmake/build_helpers.cmake)
include(cmake/install_helpers.cmake)
include(cmake/windows_helpers.cmake)

# This is modified from
# https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace
if(MSVC)
if(BUILD_SHARED_LIBS)
ginkgo_switch_to_windows_dynamic("CXX")
ginkgo_switch_to_windows_dynamic("C")
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
else()
ginkgo_switch_to_windows_static("CXX")
ginkgo_switch_to_windows_static("C")
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS FALSE)
endif()
endif()

configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY)
Expand Down
22 changes: 13 additions & 9 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ Ginkgo adds the following additional switches to control what is being built:
* `-DGINKGO_HIP_AMDGPU="gpuarch1;gpuarch2"` the amdgpu_target(s) variable
passed to hipcc for the `hcc` HIP backend. The default is none (auto).
* `-DGINKGO_BUILD_HWLOC={ON, OFF}` builds Ginkgo with HWLOC. If system HWLOC
is not found, Ginkgo will try to build it. Default is `ON`.
is not found, Ginkgo will try to build it. Default is `ON` on Linux. Ginkgo
does not support HWLOC on Windows/MacOS, so the default is `OFF` on Windows/MacOS.
* `-DGINKGO_BUILD_DOC={ON, OFF}` creates an HTML version of Ginkgo's documentation
from inline comments in the code. The default is `OFF`.
* `-DGINKGO_DOC_GENERATE_EXAMPLES={ON, OFF}` generates the documentation of examples
Expand Down Expand Up @@ -99,7 +100,7 @@ Ginkgo adds the following additional switches to control what is being built:
list of architectures. Supported values are:

* `Auto`
* `Kepler`, `Maxwell`, `Pascal`, `Volta`, `Ampere`
* `Kepler`, `Maxwell`, `Pascal`, `Volta`, `Turing`, `Ampere`
* `CODE`, `CODE(COMPUTE)`, `(COMPUTE)`

`Auto` will automatically detect the present CUDA-enabled GPU architectures
Expand Down Expand Up @@ -137,7 +138,7 @@ Depending on the configuration settings, some manual work might be required:
* Build Ginkgo as shared library:
Add `PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` into the environment variable `PATH`.
`GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH` is `windows_shared_library` by default. More Details are available in the [Installation page](./INSTALL.md).
* cmd: `set PATH="<PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH>;%PATH%"`
* cmd: `set PATH=<PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH>;%PATH%`
* powershell: `$env:PATH="<PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH>;$env:PATH"`

CMake will give the following error message if the path is not correct.
Expand All @@ -147,16 +148,16 @@ Depending on the configuration settings, some manual work might be required:
where `<path>` is the needed `<PROJECT_BINARY_DIR/GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH>`.
* Build Ginkgo with Debug mode:
Some Debug build specific issues can appear depending on the machine and environment. The known issues are the following:
1. `bigobj` issue: encountering `too many sections` needs the compilation flags `\bigobj` or `-Wa,-mbig-obj`
1. `bigobj` issue: encountering `too many sections` needs the compilation flags `/bigobj` or `-Wa,-mbig-obj`
2. `ld` issue: encountering `ld: error: export ordinal too large` needs the compilation flag `-O1`

The following are the details for different environments:
* _Microsoft Visual Studio_:
1. `bigobj` issue
* `cmake -DCMAKE_CXX_FLAGS=\bigobj <other parameters> <source_folder>` which might overwrite the default settings.
* add `\bigobj` into the environment variable `CXXFLAGS` (only available in the first cmake configuration)
* cmd: `set CXXFLAGS=\bigobj`
* powershell: `$env:CXXFLAGS=\bigobj`
* `cmake -DCMAKE_CXX_FLAGS=/bigobj <other parameters> <source_folder>` which might overwrite the default settings.
* add `/bigobj` into the environment variable `CXXFLAGS` (only available in the first cmake configuration)
* cmd: `set CXXFLAGS=/bigobj`
* powershell: `$env:CXXFLAGS=/bigobj`
2. `ld` issue (_Microsoft Visual Studio_ does not have this issue)
* _Cygwin_:
1. `bigobj` issue
Expand All @@ -175,7 +176,10 @@ Depending on the configuration settings, some manual work might be required:
2. `ld` issue (If building Ginkgo as static library, this is not needed)
* `cmake -DGINKGO_COMPILER_FLAGS="-Wpedantic -O1" <other parameters> <source_folder>` (`GINKGO_COMPILER_FLAGS` is `-Wpedantic` by default)
* add `-O1` in the environement variable `CXX_FLAGS` or `CMAKE_CXX_FLAGS`
* Build Ginkgo in _MinGW_:
* Possible issue when switching static/shared of Ginkgo with MSVC in the same build directory:\
If an issue occurs from mixing MD/MT runtime library when enabling `GINKGO_BUILD_BENCHMARKS`, it means the third-party flags are not updated correctly.
To update the third party flags, turn off `GINKGO_SKIP_DEPENDENCY_UPDATE` (`-DGINKGO_SKIP_DEPENDENCY_UPDATE=OFF`).
* Build Ginkgo in _MinGW_:\
If encountering the issue `cc1plus.exe: out of memory allocating 65536 bytes`, please follow the workaround in
[reference](https://www.intel.com/content/www/us/en/programmable/support/support-resources/knowledge-base/embedded/2016/cc1plus-exe--out-of-memory-allocating-65536-bytes.html),
or trying to compile ginkgo again might work.
Expand Down
12 changes: 6 additions & 6 deletions cuda/base/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
return std::shared_ptr<CudaExecutor>(
new CudaExecutor(device_id, std::move(master), device_reset),
[device_id](CudaExecutor *exec) {
auto device_reset = exec->get_device_reset();
delete exec;
if (!CudaExecutor::get_num_execs(device_id) &&
exec->get_device_reset()) {
if (!CudaExecutor::get_num_execs(device_id) && device_reset) {
cuda::device_guard g(device_id);
cudaDeviceReset();
}
Expand All @@ -76,9 +76,9 @@ void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo)
if (this->get_device_id() < this->get_num_devices() &&
this->get_device_id() >= 0) {
cuda::device_guard g(this->get_device_id());
GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetPCIBusId(
const_cast<char *>(this->get_exec_info().pci_bus_id.data()), 13,
this->get_device_id()));
GKO_ASSERT_NO_CUDA_ERRORS(
cudaDeviceGetPCIBusId(&(this->get_exec_info().pci_bus_id.front()),
13, this->get_device_id()));

auto cuda_hwloc_obj =
mach_topo->get_pci_device(this->get_exec_info().pci_bus_id);
Expand Down Expand Up @@ -230,7 +230,7 @@ void CudaExecutor::set_gpu_property()
GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute(
&max_threads_per_block, cudaDevAttrMaxThreadsPerBlock,
this->get_device_id()));
std::vector<int> max_threads_per_block_dim{3, 0};
std::vector<int> max_threads_per_block_dim(3, 0);
GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute(
&max_threads_per_block_dim[0], cudaDevAttrMaxBlockDimX,
this->get_device_id()));
Expand Down
2 changes: 1 addition & 1 deletion cuda/test/base/array.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/executor.hpp>


#include "core/test/utils.hpp"
#include "cuda/test/utils.hpp"


template <typename T>
Expand Down
2 changes: 1 addition & 1 deletion cuda/test/factorization/par_ic_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class ParIc : public ::testing::Test {
: mtx_size(624, 624),
rand_engine(43456),
ref(gko::ReferenceExecutor::create()),
cuda(gko::CudaExecutor::create(0, gko::OmpExecutor::create()))
cuda(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create()))
{
mtx_l = gko::test::generate_random_lower_triangular_matrix<Csr>(
mtx_size[0], mtx_size[0], false,
Expand Down
8 changes: 4 additions & 4 deletions cuda/test/matrix/diagonal_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ class Diagonal : public ::testing::Test {
diag = gen_diag(mtx_size[0]);
ddiag = Diag::create(cuda);
ddiag->copy_from(diag.get());
dense1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[0]);
dense2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[1]);
denseexpected1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[0]);
denseexpected2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[1]);
dense1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[1]);
dense2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[0]);
denseexpected1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[1]);
denseexpected2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[0]);
ddense1 = Dense::create(cuda);
ddense1->copy_from(dense1.get());
ddense2 = Dense::create(cuda);
Expand Down
2 changes: 1 addition & 1 deletion cuda/test/preconditioner/isai_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ TEST_F(Isai, CudaIsaiGenerateSpdinverseShortIsEquivalentToRef)
true);

GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse);
GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r<value_type>::value);
GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 15 * r<value_type>::value);
GKO_ASSERT_ARRAY_EQ(a1, da1);
GKO_ASSERT_ARRAY_EQ(a2, da2);
ASSERT_EQ(a1.get_const_data()[num_rows], 0);
Expand Down
3 changes: 2 additions & 1 deletion cuda/test/reorder/rcm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class Rcm : public ::testing::Test {


Rcm()
: exec(gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true)),
: exec(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(),
true)),
// clang-format off
p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
{2.0, 5.0, 1.5, 0.0, 0.0},
Expand Down
2 changes: 1 addition & 1 deletion cuda/test/solver/cb_gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "core/solver/cb_gmres_accessor.hpp"
#include "core/solver/cb_gmres_kernels.hpp"
#include "core/test/utils.hpp"
#include "cuda/test/utils.hpp"


namespace {
Expand Down
5 changes: 5 additions & 0 deletions cuda/test/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace {


// Visual Studio does not define the constructor of std::mutex as constexpr,
// causing it to not be initialized when creating this executor (which uses
// the mutex)
#if !defined(_MSC_VER)
// prevent device reset after each test
auto no_reset_exec =
gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), true);
#endif


} // namespace
Expand Down
12 changes: 6 additions & 6 deletions hip/base/executor.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
return std::shared_ptr<HipExecutor>(
new HipExecutor(device_id, std::move(master), device_reset),
[device_id](HipExecutor *exec) {
auto device_reset = exec->get_device_reset();
delete exec;
if (!HipExecutor::get_num_execs(device_id) &&
exec->get_device_reset()) {
if (!HipExecutor::get_num_execs(device_id) && device_reset) {
hip::device_guard g(device_id);
hipDeviceReset();
}
Expand All @@ -76,9 +76,9 @@ void HipExecutor::populate_exec_info(const MachineTopology *mach_topo)
if (this->get_device_id() < this->get_num_devices() &&
this->get_device_id() >= 0) {
hip::device_guard g(this->get_device_id());
GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetPCIBusId(
const_cast<char *>(this->get_exec_info().pci_bus_id.data()), 13,
this->get_device_id()));
GKO_ASSERT_NO_HIP_ERRORS(
hipDeviceGetPCIBusId(&(this->get_exec_info().pci_bus_id.front()),
13, this->get_device_id()));

auto hip_hwloc_obj =
mach_topo->get_pci_device(this->get_exec_info().pci_bus_id);
Expand Down Expand Up @@ -232,7 +232,7 @@ void HipExecutor::set_gpu_property()
this->get_device_id()));
this->get_exec_info().max_workitem_sizes.push_back(
max_threads_per_block);
std::vector<int> max_threads_per_block_dim{3, 0};
std::vector<int> max_threads_per_block_dim(3, 0);
GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute(
&max_threads_per_block_dim[0], hipDeviceAttributeMaxBlockDimX,
this->get_device_id()));
Expand Down
2 changes: 1 addition & 1 deletion hip/test/factorization/par_ic_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class ParIc : public ::testing::Test {
: mtx_size(585, 585),
rand_engine(10667),
ref(gko::ReferenceExecutor::create()),
hip(gko::HipExecutor::create(0, gko::OmpExecutor::create()))
hip(gko::HipExecutor::create(0, gko::ReferenceExecutor::create()))
{
mtx_l = gko::test::generate_random_lower_triangular_matrix<Csr>(
mtx_size[0], mtx_size[0], false,
Expand Down
8 changes: 4 additions & 4 deletions hip/test/matrix/diagonal_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ class Diagonal : public ::testing::Test {
diag = gen_diag(mtx_size[0]);
ddiag = Diag::create(hip);
ddiag->copy_from(diag.get());
dense1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[0]);
dense2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[1]);
denseexpected1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[0]);
denseexpected2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[1]);
dense1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[1]);
dense2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[0]);
denseexpected1 = gen_mtx<Dense>(mtx_size[0], mtx_size[1], mtx_size[1]);
denseexpected2 = gen_mtx<Dense>(mtx_size[1], mtx_size[0], mtx_size[0]);
ddense1 = Dense::create(hip);
ddense1->copy_from(dense1.get());
ddense2 = Dense::create(hip);
Expand Down
2 changes: 1 addition & 1 deletion hip/test/preconditioner/isai_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ TEST_F(Isai, HipIsaiGenerateSpdinverseShortIsEquivalentToRef)
true);

GKO_ASSERT_MTX_EQ_SPARSITY(inverse, d_inverse);
GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 10 * r<value_type>::value);
GKO_ASSERT_MTX_NEAR(inverse, d_inverse, 15 * r<value_type>::value);
GKO_ASSERT_ARRAY_EQ(a1, da1);
GKO_ASSERT_ARRAY_EQ(a2, da2);
ASSERT_EQ(a1.get_const_data()[num_rows], 0);
Expand Down
2 changes: 1 addition & 1 deletion hip/test/solver/cb_gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "core/solver/cb_gmres_accessor.hpp"
#include "core/solver/cb_gmres_kernels.hpp"
#include "core/test/utils.hpp"
#include "hip/test/utils.hip.hpp"


namespace {
Expand Down
Loading