Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

1014 lines (809 sloc) 27.84 kb
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <tuple>
#include <numeric>
#include <random>
#include <boost/program_options.hpp>
#define VEXCL_USE_CUSPARSE
#include <vexcl/devlist.hpp>
#include <vexcl/vector.hpp>
#include <vexcl/reductor.hpp>
#include <vexcl/random.hpp>
#include <vexcl/tagged_terminal.hpp>
#include <vexcl/element_index.hpp>
#include <vexcl/spmat.hpp>
#include <vexcl/stencil.hpp>
#include <vexcl/sort.hpp>
#include <vexcl/scan.hpp>
#ifdef HAVE_BOOST_COMPUTE
# include <vexcl/external/boost_compute.hpp>
#endif
#ifdef HAVE_CLOGS
# include <vexcl/external/clogs.hpp>
#endif
#ifdef _MSC_VER
# pragma warning(disable : 4267)
#endif
//---------------------------------------------------------------------------
struct Options {
bool bm_saxpy;
bool bm_vector;
bool bm_reductor;
bool bm_stencil;
bool bm_spmv;
bool bm_rng;
bool bm_sort;
bool bm_scan;
bool bm_cpu;
Options() :
bm_saxpy(true),
bm_vector(true),
bm_reductor(true),
bm_stencil(true),
bm_spmv(true),
bm_rng(true),
bm_sort(true),
bm_scan(true),
bm_cpu(true)
{}
void revert() {
bm_saxpy = !bm_saxpy;
bm_vector = !bm_vector;
bm_reductor = !bm_reductor;
bm_stencil = !bm_stencil;
bm_spmv = !bm_spmv;
bm_rng = !bm_rng;
bm_sort = !bm_sort;
bm_scan = !bm_scan;
bm_cpu = !bm_cpu;
}
} options;
//---------------------------------------------------------------------------
template <typename real>
std::vector<real> random_vector(size_t n) {
std::default_random_engine rng( std::rand() );
std::uniform_real_distribution<real> rnd(0.0, 1.0);
std::vector<real> x(n);
for(size_t i = 0; i < n; ++i) x[i] = rnd(rng);
return x;
}
//---------------------------------------------------------------------------
template <typename real>
std::pair<double,double> benchmark_saxpy(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 1024 * 1024;
const size_t M = 1024;
double time_elapsed;
std::vector<real> A(N, 0);
std::vector<real> B = random_vector<real>(N);
std::vector<real> alphavec = random_vector<real>(1);
real alpha = alphavec[0];
vex::vector<real> a(ctx, A);
vex::vector<real> b(ctx, B);
auto ta = vex::tag<1>(a);
ta = alpha * ta + b;
ta = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
ta = alpha * ta + b;
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = (2.0 * N * M) / time_elapsed / 1e9;
double bwidth = (3.0 * N * M * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< "Vector SAXPY (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
prof.tic_cpu("C++");
for(size_t i = 0; i < M; i++)
for(size_t j = 0; j < N; j++)
A[j] = alpha * A[j] + B[j];
time_elapsed = prof.toc("C++");
{
double gflops = (2.0 * N * M) / time_elapsed / 1e9;
double bwidth = (3.0 * N * M * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
vex::copy(A, b);
vex::Reductor<real, vex::SUM> sum(ctx);
a -= b;
std::cout << " res = " << sum(a * a)
<< std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
//---------------------------------------------------------------------------
template <typename real>
std::pair<double,double> benchmark_vector(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 1024 * 1024;
const size_t M = 1024;
double time_elapsed;
std::vector<real> A(N, 0);
std::vector<real> B = random_vector<real>(N);
std::vector<real> C = random_vector<real>(N);
std::vector<real> D = random_vector<real>(N);
vex::vector<real> a(ctx, A);
vex::vector<real> b(ctx, B);
vex::vector<real> c(ctx, C);
vex::vector<real> d(ctx, D);
a += b + c * d;
a = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
a += b + c * d;
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = (3.0 * N * M) / time_elapsed / 1e9;
double bwidth = (5.0 * N * M * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< "Vector arithmetic (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
prof.tic_cpu("C++");
for(size_t i = 0; i < M; i++)
for(size_t j = 0; j < N; j++)
A[j] += B[j] + C[j] * D[j];
time_elapsed = prof.toc("C++");
{
double gflops = (3.0 * N * M) / time_elapsed / 1e9;
double bwidth = (5.0 * N * M * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
vex::copy(A, b);
vex::Reductor<real, vex::SUM> sum(ctx);
a -= b;
std::cout << " res = " << sum(a * a)
<< std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
//---------------------------------------------------------------------------
template <typename real>
std::pair<double, double> benchmark_reductor(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 1024 / 16;
double time_elapsed;
std::vector<real> A = random_vector<real>(N);
std::vector<real> B = random_vector<real>(N);
vex::vector<real> a(ctx, A);
vex::vector<real> b(ctx, B);
vex::Reductor<real, vex::SUM> sum(ctx);
real sum_cl = sum(a * b);
sum_cl = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
sum_cl += sum(a * b);
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = 2.0 * N * M / time_elapsed / 1e9;
double bwidth = 2.0 * N * M * sizeof(real) / time_elapsed / 1e9;
std::cout
<< "Reduction (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
real sum_cpp = 0;
prof.tic_cpu("C++");
for(size_t i = 0; i < M; i++)
sum_cpp += std::inner_product(A.begin(), A.end(), B.begin(), static_cast<real>(0));
time_elapsed = prof.toc("C++");
{
double gflops = 2.0 * N * M / time_elapsed / 1e9;
double bwidth = 2.0 * N * M * sizeof(real) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
std::cout << " res = " << fabs( (sum_cl - sum_cpp) / sum_cpp )
<< std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
//---------------------------------------------------------------------------
template <typename real>
std::pair<double, double> benchmark_stencil(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const long N = 1024 * 1024;
const long M = 1024;
double time_elapsed;
std::vector<real> A = random_vector<real>(N);
std::vector<real> B(N);
std::vector<real> S(21, static_cast<real>(1) / 21);
long center = S.size() / 2;
vex::stencil<real> s(ctx, S, center);
vex::vector<real> a(ctx, A);
vex::vector<real> b(ctx, N);
b = a * s;
prof.tic_cpu("OpenCL");
for(long i = 0; i < M; i++)
b = a * s;
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = 2.0 * S.size() * N * M / time_elapsed / 1e9;
double bwidth = 2.0 * S.size() * N * M * sizeof(real) / time_elapsed / 1e9;
std::cout
<< "Stencil convolution (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
prof.tic_cpu("C++");
for(long j = 0; j < M; j++) {
for(long i = 0; i < N; i++) {
real sum = 0;
for(long k = 0; k < (long)S.size(); k++)
sum += S[k] * A[std::min<long>(N-1, std::max<long>(0, i + k - center))];
B[i] = sum;
}
}
time_elapsed = prof.toc("C++");
{
double gflops = 2.0 * S.size() * N * M / time_elapsed / 1e9;
double bwidth = 2.0 * S.size() * N * M * sizeof(real) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
vex::Reductor<real, vex::MAX> max(ctx);
copy(B, a);
std::cout << " res = " << max(fabs(a - b))
<< std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
//---------------------------------------------------------------------------
template <typename real>
std::pair<double,double> benchmark_spmv(
const vex::Context &ctx, vex::profiler<> &prof
)
{
// Construct matrix for 3D Poisson problem in cubic domain.
const size_t n = 128;
const size_t N = n * n * n;
const size_t M = 1024;
double time_elapsed;
const real h2i = (n - 1) * (n - 1);
std::vector<size_t> row;
std::vector<uint> col;
std::vector<real> val;
std::vector<real> X(n * n * n, static_cast<real>(1e-2));
std::vector<real> Y(n * n * n, 0);
row.reserve(n * n * n + 1);
col.reserve(6 * (n - 2) * (n - 2) * (n - 2) + n * n * n);
val.reserve(6 * (n - 2) * (n - 2) * (n - 2) + n * n * n);
row.push_back(0);
for(size_t k = 0, idx = 0; k < n; k++) {
for(size_t j = 0; j < n; j++) {
for(size_t i = 0; i < n; i++, idx++) {
if (
i == 0 || i == (n - 1) ||
j == 0 || j == (n - 1) ||
k == 0 || k == (n - 1)
)
{
col.push_back(idx);
val.push_back(1);
row.push_back(row.back() + 1);
} else {
col.push_back(idx - n * n);
val.push_back(-h2i);
col.push_back(idx - n);
val.push_back(-h2i);
col.push_back(idx - 1);
val.push_back(-h2i);
col.push_back(idx);
val.push_back(6 * h2i);
col.push_back(idx + 1);
val.push_back(-h2i);
col.push_back(idx + n);
val.push_back(-h2i);
col.push_back(idx + n * n);
val.push_back(-h2i);
row.push_back(row.back() + 7);
}
}
}
}
size_t nnz = row.back();
// Transfer data to compute devices.
vex::SpMat<real,uint> A(ctx, n * n * n, n * n * n, row.data(), col.data(), val.data());
vex::vector<real> x(ctx, X);
vex::vector<real> y(ctx, Y);
// Get timings.
y += A * x;
y = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
y += A * x;
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = M / time_elapsed / 1e9 * (2.0 * nnz + N);
double bwidth = M / time_elapsed / 1e9 * (nnz * (2 * sizeof(real) + sizeof(size_t)) + 4 * N * sizeof(real));
std::cout
<< "SpMV (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
prof.tic_cpu("C++");
for(size_t k = 0; k < M; k++)
for(size_t i = 0; i < N; i++) {
real s = 0;
for(size_t j = row[i]; j < row[i + 1]; j++)
s += val[j] * X[col[j]];
Y[i] += s;
}
time_elapsed = prof.toc("C++");
{
double gflops = M / time_elapsed / 1e9 * (2.0 * nnz + N);
double bwidth = M / time_elapsed / 1e9 * (nnz * (2 * sizeof(real) + sizeof(size_t)) + 4 * N * sizeof(real));
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
copy(Y, x);
y -= x;
vex::Reductor<real, vex::SUM> sum(ctx);
std::cout << " res = " << sum(y * y) << std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
//---------------------------------------------------------------------------
template <typename real>
std::pair<double,double> benchmark_spmv_ccsr(
const vex::Context &ctx, vex::profiler<> &prof
)
{
// Construct matrix for 3D Poisson problem in cubic domain.
const uint n = 128;
const uint N = n * n * n;
const uint M = 1024;
double time_elapsed;
const real h2i = (n - 1) * (n - 1);
std::vector<size_t> idx;
std::vector<size_t> row(3);
std::vector<int> col(8);
std::vector<real> val(8);
std::vector<real> X(n * n * n, static_cast<real>(1e-2));
std::vector<real> Y(n * n * n, 0);
idx.reserve(n * n * n);
row[0] = 0;
row[1] = 1;
row[2] = 8;
col[0] = 0;
val[0] = 1;
col[1] = -static_cast<int>(n * n);
col[2] = -static_cast<int>(n);
col[3] = -1;
col[4] = 0;
col[5] = 1;
col[6] = n;
col[7] = (n * n);
val[1] = -h2i;
val[2] = -h2i;
val[3] = -h2i;
val[4] = h2i * 6;
val[5] = -h2i;
val[6] = -h2i;
val[7] = -h2i;
for(size_t k = 0; k < n; k++) {
for(size_t j = 0; j < n; j++) {
for(size_t i = 0; i < n; i++) {
if (
i == 0 || i == (n - 1) ||
j == 0 || j == (n - 1) ||
k == 0 || k == (n - 1)
)
{
idx.push_back(0);
} else {
idx.push_back(1);
}
}
}
}
size_t nnz = 6 * (n - 2) * (n - 2) * (n - 2) + n * n * n;
// Transfer data to compute devices.
vex::SpMatCCSR<real,int> A(ctx.queue(0), n * n * n, 2,
idx.data(), row.data(), col.data(), val.data());
std::vector<vex::command_queue> q1(1, ctx.queue(0));
vex::vector<real> x(q1, X);
vex::vector<real> y(q1, Y);
// Get timings.
y += A * x;
y = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
y += A * x;
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = (2.0 * nnz + N) * M / time_elapsed / 1e9;
double bwidth = M * (nnz * (2 * sizeof(real) + sizeof(int)) + 4 * N * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< "SpMV (CCSR) (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
prof.tic_cpu("C++");
for(size_t k = 0; k < M; k++)
for(size_t i = 0; i < N; i++) {
real s = 0;
for(size_t j = row[idx[i]]; j < row[idx[i] + 1]; j++)
s += val[j] * X[i + col[j]];
Y[i] += s;
}
time_elapsed = prof.toc("C++");
{
double gflops = (2.0 * nnz + N) * M / time_elapsed / 1e9;
double bwidth = M * (nnz * (2 * sizeof(real) + sizeof(int)) + 4 * N * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
copy(Y, x);
y -= x;
vex::Reductor<real, vex::SUM> sum(q1);
std::cout << " res = " << sum(y * y) << std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
//---------------------------------------------------------------------------
template <typename real, class GF>
double rng_throughput(const vex::Context &ctx, size_t N, size_t M) {
vex::Random<real, GF> rnd;
vex::Reductor<real, vex::MAX> max(ctx);
real s = max( rnd( vex::element_index(0, N), std::rand() ) );
vex::stopwatch<> w;
for(size_t i = 0; i < M; i++)
s = std::max(s, max( rnd( vex::element_index(0, N), std::rand() ) ));
ctx.finish();
return N * M / w.toc();
}
//---------------------------------------------------------------------------
template <typename real>
void benchmark_rng(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 1024;
prof.tic_cpu("OpenCL (threefry)");
double rps = rng_throughput<real, vex::random::threefry>(ctx, N, M);
prof.toc("OpenCL (threefry)");
std::cout
<< "Random numbers per second (" << vex::type_name<real>() << ")\n"
<< " OpenCL (threefry): " << rps << std::endl;
prof.tic_cpu("OpenCL (philox)");
rps = rng_throughput<real, vex::random::philox>(ctx, N, M);
prof.toc("OpenCL (philox)");
std::cout
<< " OpenCL (philox): " << rps << std::endl;
if (options.bm_cpu) {
std::mt19937 rng( std::rand() );
std::uniform_real_distribution<real> rnd(0.0, 1.0);
prof.tic_cpu("C++ (mt19937)");
real s = 0;
for(size_t j = 0; j < N; j++)
s = std::max(s, rnd(rng));
double time_elapsed = prof.toc("C++ (mt19937)");
std::cout
<< " C++ (mt19937): " << N / time_elapsed << std::endl;
}
std::cout << std::endl;
}
//---------------------------------------------------------------------------
template <typename real>
void benchmark_sort(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 16;
typedef typename std::conditional<
std::is_same<float, real>::value, cl_uint, cl_ulong
>::type key_type;
std::default_random_engine rng( std::rand() );
std::uniform_int_distribution<key_type> rnd;
std::vector<key_type> x0(N);
std::vector<key_type> x1(N);
std::generate(x0.begin(), x0.end(), [&]() { return rnd(rng); });
vex::vector<key_type> X0(ctx, x0);
vex::vector<key_type> X1(ctx, N);
X1 = X0;
vex::sort(X1);
double tot_time = 0;
for(size_t i = 0; i < M; i++) {
X1 = X0;
ctx.finish();
prof.tic_cpu("VexCL");
vex::sort(X1);
ctx.finish();
tot_time += prof.toc("VexCL");
}
std::cout
<< "Sort (" << vex::type_name<key_type>() << ")\n"
<< " VexCL: " << N * M / tot_time << " keys/sec\n";
#ifdef HAVE_BOOST_COMPUTE
X1 = X0;
vex::compute::sort(X1);
tot_time = 0;
for(size_t i = 0; i < M; i++) {
X1 = X0;
ctx.finish();
prof.tic_cpu("Boost.Compute");
vex::compute::sort(X1);
ctx.finish();
tot_time += prof.toc("Boost.Compute");
}
std::cout
<< " Boost.Compute: " << N * M / tot_time << " keys/sec\n";
#endif
#ifdef HAVE_CLOGS
X1 = X0;
vex::clogs::sort(X1);
tot_time = 0;
for(size_t i = 0; i < M; i++) {
X1 = X0;
ctx.finish();
prof.tic_cpu("CLOGS");
vex::clogs::sort(X1);
ctx.finish();
tot_time += prof.toc("CLOGS");
}
std::cout
<< " CLOGS: " << N * M / tot_time << " keys/sec\n";
#endif
if (options.bm_cpu) {
tot_time = 0;
for(size_t i = 0; i < M; i++) {
std::copy(x0.begin(), x0.end(), x1.begin());
prof.tic_cpu("STL");
std::sort(x1.begin(), x1.end());
tot_time += prof.toc("STL");
}
std::cout << " STL: " << N * M / tot_time << " keys/sec\n";
}
std::cout << std::endl;
}
//---------------------------------------------------------------------------
template <typename real>
void benchmark_scan(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 16;
typedef typename std::conditional<
std::is_same<float, real>::value, cl_uint, cl_ulong
>::type key_type;
std::default_random_engine rng( std::rand() );
std::uniform_int_distribution<key_type> rnd;
std::vector<key_type> x0(N);
std::vector<key_type> x1(N);
std::generate(x0.begin(), x0.end(), [&]() { return rnd(rng); });
vex::vector<key_type> X0(ctx, x0);
vex::vector<key_type> X1(ctx, N);
vex::exclusive_scan(X0, X1);
ctx.finish();
prof.tic_cpu("VexCL");
for(size_t i = 0; i < M; i++)
vex::exclusive_scan(X0, X1);
ctx.finish();
double tot_time = prof.toc("VexCL");
std::cout
<< "Scan (" << vex::type_name<key_type>() << ")\n"
<< " VexCL: " << N * M / tot_time << " keys/sec\n";
#ifdef HAVE_BOOST_COMPUTE
vex::compute::exclusive_scan(X0, X1);
ctx.finish();
prof.tic_cpu("Boost.Compute");
for(size_t i = 0; i < M; i++)
vex::compute::exclusive_scan(X0, X1);
ctx.finish();
tot_time = prof.toc("Boost.Compute");
std::cout
<< " Boost.Compute: " << N * M / tot_time << " keys/sec\n";
#endif
#ifdef HAVE_CLOGS
vex::clogs::exclusive_scan(X0, X1);
ctx.finish();
prof.tic_cpu("CLOGS");
for(size_t i = 0; i < M; i++)
vex::clogs::exclusive_scan(X0, X1);
ctx.finish();
tot_time = prof.toc("CLOGS");
std::cout
<< " CLOGS: " << N * M / tot_time << " keys/sec\n";
#endif
if (options.bm_cpu) {
prof.tic_cpu("CPU");
for(size_t i = 0; i < M; i++) {
key_type sum = key_type();
for(size_t j = 0; j < N; ++j) {
key_type next = sum + x0[j];
x1[j] = sum;
sum = next;
}
}
tot_time = prof.toc("CPU");
std::cout << " CPU: " << N * M / tot_time << " keys/sec\n";
}
std::cout << std::endl;
}
//---------------------------------------------------------------------------
template <typename real>
void run_tests(const vex::Context &ctx, vex::profiler<> &prof)
{
std::cout
<< "----------------------------------------------------------\n"
<< "Profiling \"" << vex::type_name<real>() << "\" performance\n"
<< "----------------------------------------------------------\n"
<< ctx << std::endl;
std::ostringstream fname;
fname << "profile_" << vex::type_name<real>() << ".dat";
std::ofstream log(fname.str().c_str(), std::ios::app);
log << ctx.size() << " ";
double gflops, bwidth;
prof.tic_cpu( vex::type_name<real>() );
if (options.bm_saxpy) {
prof.tic_cpu("Vector SAXPY");
std::tie(gflops, bwidth) = benchmark_saxpy<real>(ctx, prof);
prof.toc("Vector SAXPY");
log << gflops << " " << bwidth << " ";
}
if (options.bm_vector) {
prof.tic_cpu("Vector arithmetic");
std::tie(gflops, bwidth) = benchmark_vector<real>(ctx, prof);
prof.toc("Vector arithmetic");
log << gflops << " " << bwidth << " ";
}
if (options.bm_reductor) {
prof.tic_cpu("Reduction");
std::tie(gflops, bwidth) = benchmark_reductor<real>(ctx, prof);
prof.toc("Reduction");
log << gflops << " " << bwidth << " ";
}
if (options.bm_stencil) {
prof.tic_cpu("Stencil");
std::tie(gflops, bwidth) = benchmark_stencil<real>(ctx, prof);
prof.toc("Stencil");
log << gflops << " " << bwidth << " ";
}
if (options.bm_spmv) {
prof.tic_cpu("SpMV");
std::tie(gflops, bwidth) = benchmark_spmv<real>(ctx, prof);
prof.toc("SpMV");
log << gflops << " " << bwidth << std::endl;
prof.tic_cpu("SpMV (CCSR)");
std::tie(gflops, bwidth) = benchmark_spmv_ccsr<real>(ctx, prof);
prof.toc("SpMV (CCSR)");
}
if (options.bm_rng) {
prof.tic_cpu("Random number generation");
benchmark_rng<real>(ctx, prof);
prof.toc("Random number generation");
}
if (options.bm_sort) {
prof.tic_cpu("Sorting");
benchmark_sort<real>(ctx, prof);
prof.toc("Sorting");
}
if (options.bm_scan) {
prof.tic_cpu("Scanning");
benchmark_scan<real>(ctx, prof);
prof.toc("Scanning");
}
prof.toc( vex::type_name<real>() );
std::cout << std::endl << std::endl;
}
//---------------------------------------------------------------------------
int main(int argc, char *argv[]) {
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
("help,h", "show help")
("revert,r", "revert options")
("bm_saxpy",
po::value<bool>(&options.bm_saxpy)->default_value(true),
"benchmark SAXPY (on/off)"
)
("bm_vec",
po::value<bool>(&options.bm_vector)->default_value(true),
"benchmark vector arithmetics (on/off)"
)
("bm_rdc",
po::value<bool>(&options.bm_reductor)->default_value(true),
"benchmark reduction (on/off)"
)
("bm_stn",
po::value<bool>(&options.bm_stencil)->default_value(true),
"benchmark stencil convolution (on/off)"
)
("bm_spm",
po::value<bool>(&options.bm_spmv)->default_value(true),
"benchmark sparse matrix - vector product (on/off)"
)
("bm_rng",
po::value<bool>(&options.bm_rng)->default_value(true),
"benchmark random number generation (on/off)"
)
("bm_sort",
po::value<bool>(&options.bm_sort)->default_value(true),
"benchmark sorting (on/off)"
)
("bm_scan",
po::value<bool>(&options.bm_sort)->default_value(true),
"benchmark exclusive scan (on/off)"
)
("bm_cpu",
po::value<bool>(&options.bm_cpu)->default_value(true),
"benchmark host CPU performance (on/off)"
)
;
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
if (vm.count("help")) {
std::cout << desc << std::endl;
return 0;
}
if (vm.count("revert")) {
options.revert();
}
try {
vex::profiler<> prof;
{
vex::Context ctx(vex::Filter::Env && vex::Filter::DoublePrecision);
if (ctx) run_tests<double>(ctx, prof);
}
{
vex::Context ctx(vex::Filter::Env);
if (ctx) run_tests<float>(ctx, prof);
}
std::cout << prof << std::endl;
} catch (const vex::error &e) {
std::cerr << e << std::endl;
return 1;
}
}
// vim: et
Jump to Line
Something went wrong with that request. Please try again.