Skip to content

Commit

Permalink
explict type in for, use func not macro to skip, remove dup test
Browse files Browse the repository at this point in the history
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
  • Loading branch information
yhmtsai and upsj committed Jul 20, 2021
1 parent a6b4ccc commit 7802f47
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 366 deletions.
8 changes: 3 additions & 5 deletions common/factorization/par_ilut_select_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ __global__ __launch_bounds__(searchtree_width) void build_searchtree(
// assuming rounding towards zero
auto stride = double(size) / sample_size;
#pragma unroll
for (auto i = decltype(sampleselect_oversampling){0};
i < sampleselect_oversampling; ++i) {
for (int i = 0; i < sampleselect_oversampling; ++i) {
auto lidx = idx * sampleselect_oversampling + i;
auto val = input[static_cast<IndexType>(lidx * stride)];
samples[i] = abs(val);
Expand Down Expand Up @@ -120,8 +119,7 @@ __global__ __launch_bounds__(default_block_size) void count_buckets(
auto el = abs(input[i]);
IndexType tree_idx{};
#pragma unroll
for (auto level = decltype(sampleselect_searchtree_height){0};
level < sampleselect_searchtree_height; ++level) {
for (int level = 0; level < sampleselect_searchtree_height; ++level) {
auto cmp = !(el < sh_tree[tree_idx]);
tree_idx = 2 * tree_idx + 1 + cmp;
}
Expand Down Expand Up @@ -170,7 +168,7 @@ __global__ __launch_bounds__(default_block_size) void block_prefix_sum(
// compute prefix sum over warp-sized blocks
IndexType total{};
auto base_idx = warp_idx * work_per_warp * warp.size();
for (auto step = decltype(work_per_warp){0}; step < work_per_warp; ++step) {
for (IndexType step = 0; step < work_per_warp; ++step) {
auto idx = warp_lane + step * warp.size() + base_idx;
auto val = idx < num_blocks ? local_counters[idx] : zero<IndexType>();
IndexType warp_total{};
Expand Down
2 changes: 1 addition & 1 deletion cuda/test/components/sorting_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ protected:
{
// we want some duplicate elements
std::uniform_int_distribution<gko::int32> dist(0, num_elements / 2);
for (auto i = decltype(num_elements){0}; i < num_elements; ++i) {
for (int i = 0; i < num_elements; ++i) {
ref_shared.get_data()[i] = dist(rng);
}
ddata = gko::Array<gko::int32>{cuda, ref_shared};
Expand Down
2 changes: 1 addition & 1 deletion dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ else ()
target_link_options(ginkgo_dpcpp PUBLIC -fsycl-device-code-split=per_kernel)
endif()
target_link_libraries(ginkgo_dpcpp PUBLIC ginkgo_device)
target_link_libraries(ginkgo_dpcpp PRIVATE $<LINK_ONLY:MKL::MKL_DPCPP>)
target_link_libraries(ginkgo_dpcpp PRIVATE MKL::MKL_DPCPP)
if (GINKGO_DPCPP_SINGLE_MODE)
target_compile_definitions(ginkgo_dpcpp PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
endif()
Expand Down
Loading

0 comments on commit 7802f47

Please sign in to comment.