Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion perf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,17 @@ if(${BOOST_COMPUTE_HAVE_CUDA})
set(CUDA_BENCHMARKS
thrust_accumulate
thrust_count
thrust_exclusive_scan
thrust_inner_product
thrust_merge
thrust_partial_sum
thrust_partition
thrust_reverse
thrust_rotate
thrust_saxpy
thrust_set_difference
thrust_sort
thrust_exclusive_scan
thrust_unique
)

foreach(BENCHMARK ${CUDA_BENCHMARKS})
Expand Down
86 changes: 49 additions & 37 deletions perf/perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,43 +114,55 @@ def run_benchmark(name, sizes, vs=[]):
report.add_sample("compute", size, time)

competitors = {
"thrust" : ["accumulate",
"count",
"inner_product",
"partial_sum",
"sort",
"saxpy"],
"tbb": ["accumulate",
"merge",
"sort"],
"stl": ["accumulate",
"count",
"find_end",
"includes",
"inner_product",
"is_permutation",
"max_element",
"merge",
"next_permutation",
"nth_element",
"partial_sum",
"partition",
"partition_point",
"prev_permutation",
"reverse",
"rotate",
"rotate_copy",
"saxpy",
"search",
"search_n",
"set_difference",
"set_intersection",
"set_symmetric_difference",
"set_union",
"sort",
"stable_partition",
"unique",
"unique_copy"]
"thrust" : [
"accumulate",
"count",
"exclusive_scan",
"inner_product",
"merge",
"partial_sum",
"partition",
"reverse",
"rotate",
"saxpy",
"sort",
"unique"
],
"tbb": [
"accumulate",
"merge",
"sort"
],
"stl": [
"accumulate",
"count",
"find_end",
"includes",
"inner_product",
"is_permutation",
"max_element",
"merge",
"next_permutation",
"nth_element",
"partial_sum",
"partition",
"partition_point",
"prev_permutation",
"reverse",
"rotate",
"rotate_copy",
"saxpy",
"search",
"search_n",
"set_difference",
"set_intersection",
"set_symmetric_difference",
"set_union",
"sort",
"stable_partition",
"unique",
"unique_copy"
]
}

for other in vs:
Expand Down
4 changes: 4 additions & 0 deletions perf/perf_partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ int main(int argc, char *argv[])

perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);

t.start();
boost::compute::partition(
device_vector.begin(), device_vector.end(), _1 < 10, queue
Expand Down
35 changes: 18 additions & 17 deletions perf/perf_set_difference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,39 +36,40 @@ int main(int argc, char *argv[])
std::cout << "device: " << device.name() << std::endl;

// create vectors of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::sort(host_vector.begin(), host_vector.end());

std::vector<int> host_vector2(PERF_N);
std::generate(host_vector2.begin(), host_vector2.end(), rand_int);
std::sort(host_vector2.begin(), host_vector2.end());
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());

// create vectors on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::vector<int> gpu_v1(PERF_N, context);
boost::compute::vector<int> gpu_v2(PERF_N, context);

boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
v1.begin(), v1.end(), gpu_v1.begin(), queue
);

boost::compute::vector<int> device_vector2(PERF_N, context);
boost::compute::copy(
host_vector2.begin(), host_vector2.end(), device_vector2.begin(), queue
v2.begin(), v2.end(), gpu_v2.begin(), queue
);

boost::compute::vector<int> result(PERF_N, context);
boost::compute::vector<int> gpu_v3(PERF_N, context);
boost::compute::vector<int>::iterator gpu_v3_end;

perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::set_difference(
device_vector.begin(), device_vector.begin(),
device_vector2.begin(), device_vector2.end(),
result.begin(), queue
gpu_v3_end = boost::compute::set_difference(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;

return 0;
}
14 changes: 8 additions & 6 deletions perf/perf_stl_set_difference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,30 @@ int main(int argc, char *argv[])

std::cout << "size: " << PERF_N << std::endl;

std::vector<int> v1(PERF_N);
std::generate(v1.begin(), v1.end(), rand_int);
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));

std::vector<int> v2(PERF_N);
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);

std::vector<int> v3(PERF_N);

std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());

std::vector<int> v3(PERF_N);
std::vector<int>::iterator v3_end;

perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::set_difference(
v3_end = std::set_difference(
v1.begin(), v1.end(),
v2.begin(), v2.end(),
v3.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;

return 0;
}
63 changes: 63 additions & 0 deletions perf/perf_thrust_merge.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://kylelutz.github.com/compute for more information.
//---------------------------------------------------------------------------//

#include <iostream>
#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/merge.h>
#include <thrust/sort.h>

#include "perf.hpp"

int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);

std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> v1(std::floor(PERF_N / 2.0));
thrust::host_vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand);
std::generate(v2.begin(), v2.end(), rand);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());

// transfer data to the device
thrust::device_vector<int> gpu_v1 = v1;
thrust::device_vector<int> gpu_v2 = v2;
thrust::device_vector<int> gpu_v3(PERF_N);

perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
thrust::merge(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin()
);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

thrust::host_vector<int> check_v3 = gpu_v3;

thrust::host_vector<int> v3(PERF_N);
std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin());
if(!ok){
std::cerr << "ERROR: merged ranges different" << std::endl;
return -1;
}

return 0;
}
59 changes: 59 additions & 0 deletions perf/perf_thrust_partition.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://kylelutz.github.com/compute for more information.
//---------------------------------------------------------------------------//

#include <algorithm>
#include <cstdlib>

#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/partition.h>

#include "perf.hpp"

int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}

struct less_than_ten : public thrust::unary_function<bool, int>
{
__device__ bool operator()(int x) const
{
return x < 10;
}
};

int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);

std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec(PERF_N);
std::generate(h_vec.begin(), h_vec.end(), rand_int);

thrust::device_vector<int> d_vec(PERF_N);

perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
d_vec = h_vec;

t.start();
thrust::partition(
d_vec.begin(), d_vec.end(), less_than_ten()
);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

return 0;
}
47 changes: 47 additions & 0 deletions perf/perf_thrust_reverse.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://kylelutz.github.com/compute for more information.
//---------------------------------------------------------------------------//

#include <algorithm>
#include <cstdlib>

#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/reverse.h>

#include "perf.hpp"

int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);

std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);

// transfer data to the device
thrust::device_vector<int> d_vec;

perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
d_vec = h_vec;

t.start();
thrust::reverse(d_vec.begin(), d_vec.end());
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;

// transfer data back to host
thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());

return 0;
}
Loading