Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various benchmark improvements #812

Merged
merged 10 commits into from
Jul 6, 2021
6 changes: 6 additions & 0 deletions benchmark/blas/blas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,12 @@ void apply_blas(const char *operation_name, std::shared_ptr<gko::Executor> exec,
} catch (const std::exception &e) {
add_or_set_member(test_case["blas"][operation_name], "completed", false,
allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["blas"][operation_name], "error",
msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
13 changes: 9 additions & 4 deletions benchmark/conversions/conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,22 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to,
matrix_to->copy_from(matrix_from);
}
add_or_set_member(conversion_case[conversion_name], "time",
timer->compute_average_time(), allocator);
ic.compute_average_time(), allocator);
add_or_set_member(conversion_case[conversion_name], "repetitions",
timer->get_num_repetitions(), allocator);
ic.get_num_repetitions(), allocator);

// compute and write benchmark data
add_or_set_member(conversion_case[conversion_name], "completed", true,
allocator);
} catch (const std::exception &e) {
add_or_set_member(test_case["conversions"][conversion_name],
"completed", false, allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["conversions"][conversion_name],
"error", msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down Expand Up @@ -156,8 +162,7 @@ int main(int argc, char *argv[])
try {
auto matrix_from =
share(formats::matrix_factory.at(format_from)(exec, data));
for (const auto &format : formats::matrix_factory) {
const auto format_to = std::get<0>(format);
for (const auto &format_to : formats) {
pratikvn marked this conversation as resolved.
Show resolved Hide resolved
upsj marked this conversation as resolved.
Show resolved Hide resolved
if (format_from == format_to) {
continue;
}
Expand Down
6 changes: 6 additions & 0 deletions benchmark/preconditioner/preconditioner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,12 @@ void run_preconditioner(const char *precond_name,
rapidjson::Value(rapidjson::kObjectType), allocator);
add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
"completed", false, allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
"error", msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
18 changes: 17 additions & 1 deletion benchmark/run_all_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ if [ ! "${EXECUTOR}" ]; then
echo "EXECUTOR environment variable not set - assuming \"${EXECUTOR}\"" 1>&2
fi

if [ ! "${REPETITIONS}" ]; then
REPETITIONS=10
echo "REPETITIONS environment variable not set - assuming ${REPETITIONS}" 1>&2
fi

if [ ! "${SEGMENTS}" ]; then
echo "SEGMENTS environment variable not set - running entire suite" 1>&2
SEGMENTS=1
Expand All @@ -35,6 +40,11 @@ if [ ! "${FORMATS}" ]; then
FORMATS="csr,coo,ell,hybrid,sellp"
fi

if [ ! "${ELL_IMBALANCE_LIMIT}" ]; then
echo "ELL_IMBALANCE_LIMIT environment variable not set - assuming 100" 1>&2
ELL_IMBALANCE_LIMIT=100
upsj marked this conversation as resolved.
Show resolved Hide resolved
fi

if [ ! "${SOLVERS}" ]; then
SOLVERS="bicgstab,cg,cgs,fcg,gmres,cb_gmres_reduce1,idr"
echo "SOLVERS environment variable not set - assuming \"${SOLVERS}\"" 1>&2
Expand Down Expand Up @@ -67,7 +77,7 @@ fi

if [ ! "${SOLVERS_JACOBI_MAX_BS}" ]; then
SOLVERS_JACOBI_MAX_BS="32"
"SOLVERS_JACOBI_MAX_BS environment variable not set - assuming \"${SOLVERS_JACOBI_MAX_BS}\"" 1>&2
echo "SOLVERS_JACOBI_MAX_BS environment variable not set - assuming \"${SOLVERS_JACOBI_MAX_BS}\"" 1>&2
fi

if [ ! "${BENCHMARK_PRECISION}" ]; then
Expand Down Expand Up @@ -202,6 +212,8 @@ run_conversion_benchmarks() {
./conversions/conversions${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \
--executor="${EXECUTOR}" --formats="${FORMATS}" \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
--repetitions="${REPETITIONS}" \
--ell_imbalance_limit="${ELL_IMBALANCE_LIMIT}" \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -218,6 +230,8 @@ run_spmv_benchmarks() {
./spmv/spmv${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \
--executor="${EXECUTOR}" --formats="${FORMATS}" \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
--repetitions="${REPETITIONS}" \
--ell_imbalance_limit="${ELL_IMBALANCE_LIMIT}" \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -239,6 +253,7 @@ run_solver_benchmarks() {
--gpu_timer=${GPU_TIMER} \
--jacobi_max_block_size=${SOLVERS_JACOBI_MAX_BS} --device_id="${DEVICE_ID}" \
--gmres_restart="${SOLVERS_GMRES_RESTART}" \
--repetitions="${REPETITIONS}" \
upsj marked this conversation as resolved.
Show resolved Hide resolved
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -265,6 +280,7 @@ run_preconditioner_benchmarks() {
--jacobi_max_block_size="${bsize}" \
--jacobi_storage="${prec}" \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
--repetitions="${REPETITIONS}" \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't run multiple SpMVs, so I would use the default number of repetitions.

<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
done
Expand Down
6 changes: 6 additions & 0 deletions benchmark/solver/solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,12 @@ void solve_system(const std::string &solver_name,
} catch (const std::exception &e) {
add_or_set_member(test_case["solver"][precond_solver_name], "completed",
false, allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["solver"][precond_solver_name], "error",
msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
8 changes: 7 additions & 1 deletion benchmark/spmv/spmv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ void apply_spmv(const char *format_name, std::shared_ptr<gko::Executor> exec,
for (auto _ : ic_tuning.run()) {
system_matrix->apply(lend(b), lend(x_clone));
}
tuning_case["time"].PushBack(tuning_timer->compute_average_time(),
tuning_case["time"].PushBack(ic_tuning.compute_average_time(),
allocator);
tuning_case["values"].PushBack(val, allocator);
}
Expand All @@ -154,6 +154,12 @@ void apply_spmv(const char *format_name, std::shared_ptr<gko::Executor> exec,
} catch (const std::exception &e) {
add_or_set_member(test_case["spmv"][format_name], "completed", false,
allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["spmv"][format_name], "error",
msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
80 changes: 76 additions & 4 deletions benchmark/utils/formats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/ginkgo.hpp>


#include <algorithm>
#include <map>
#include <string>

Expand Down Expand Up @@ -152,6 +153,10 @@ std::string format_command =
// the formats command-line argument
DEFINE_string(formats, "coo", formats::format_command.c_str());

DEFINE_uint64(
ell_imbalance_limit, 100,
"Maximal storage overhead above which ELL benchmarks will be skipped");
upsj marked this conversation as resolved.
Show resolved Hide resolved


namespace formats {

Expand Down Expand Up @@ -181,6 +186,51 @@ std::unique_ptr<MatrixType> read_matrix_from_data(
return mat;
}


/**
* Creates a CSR strategy of the given type for the given executor if possible,
* falls back to csr::classical for executors without support for this strategy.
*
* @tparam Strategy one of csr::automatical or csr::load_balance
*/
template <typename Strategy>
std::shared_ptr<csr::strategy_type> create_gpu_strategy(
std::shared_ptr<const gko::Executor> exec)
{
if (auto cuda = dynamic_cast<const gko::CudaExecutor *>(exec.get())) {
return std::make_shared<Strategy>(cuda->shared_from_this());
upsj marked this conversation as resolved.
Show resolved Hide resolved
} else if (auto hip = dynamic_cast<const gko::HipExecutor *>(exec.get())) {
return std::make_shared<Strategy>(hip->shared_from_this());
} else {
return std::make_shared<csr::classical>();
}
}


/**
* Checks whether the given matrix data exceeds the ELL imbalance limit set by
* the --ell_imbalance_limit flag
* @throws gko::Error if the imbalance limit is exceeded
upsj marked this conversation as resolved.
Show resolved Hide resolved
*/
void check_ell_admissibility(const gko::matrix_data<etype> &data)
{
if (data.size[0] == 0) {
return;
}
std::vector<gko::size_type> row_lengths(data.size[0]);
for (auto nz : data.nonzeros) {
row_lengths[nz.row]++;
}
auto max_len = *std::max_element(row_lengths.begin(), row_lengths.end());
auto avg_len =
std::max<gko::size_type>(data.nonzeros.size() / data.size[0], 1);
if (max_len / avg_len > FLAGS_ell_imbalance_limit) {
upsj marked this conversation as resolved.
Show resolved Hide resolved
throw gko::Error(__FILE__, __LINE__,
"Matrix exceeds ELL imbalance limit");
}
}


/**
* Creates a Ginkgo matrix from the intermediate data representation format
* gko::matrix_data with support for variable arguments.
Expand All @@ -201,15 +251,36 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
std::shared_ptr<const gko::Executor>,
const gko::matrix_data<etype> &)>>
matrix_factory{
{"csr", READ_MATRIX(csr, std::make_shared<csr::automatical>())},
{"csri", READ_MATRIX(csr, std::make_shared<csr::load_balance>())},
{"csr",
[](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) -> std::unique_ptr<csr> {
auto mat =
csr::create(exec, create_gpu_strategy<csr::automatical>(exec));
mat->read(data);
return mat;
}},
{"csri",
[](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) -> std::unique_ptr<csr> {
auto mat = csr::create(
exec, create_gpu_strategy<csr::load_balance>(exec));
mat->read(data);
return mat;
}},
{"csrm", READ_MATRIX(csr, std::make_shared<csr::merge_path>())},
{"csrc", READ_MATRIX(csr, std::make_shared<csr::classical>())},
{"coo", read_matrix_from_data<gko::matrix::Coo<etype>>},
{"ell", read_matrix_from_data<gko::matrix::Ell<etype>>},
{"ell", [](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) {
check_ell_admissibility(data);
auto mat = gko::matrix::Ell<etype>::create(exec);
mat->read(data);
return mat;
}},
{"ell-mixed",
[](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) {
check_ell_admissibility(data);
gko::matrix_data<gko::next_precision<etype>> conv_data;
conv_data.size = data.size;
conv_data.nonzeros.resize(data.nonzeros.size());
Expand All @@ -220,7 +291,8 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
it->value = el.value;
++it;
}
auto mat = gko::matrix::Ell<gko::next_precision<etype>>::create(std::move(exec));
auto mat = gko::matrix::Ell<gko::next_precision<etype>>::create(
std::move(exec));
mat->read(conv_data);
return mat;
}},
Expand Down
4 changes: 4 additions & 0 deletions benchmark/utils/general.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ DEFINE_string(double_buffer, "",
DEFINE_bool(detailed, true,
"If set, performs several runs to obtain more detailed results");

DEFINE_bool(keep_errors, false,
"If set, writes exception messages during the execution into the "
"JSON output");

DEFINE_bool(nested_names, false, "If set, separately logs nested operations");

DEFINE_uint32(seed, 42, "Seed used for the random number generator");
Expand Down
26 changes: 19 additions & 7 deletions omp/components/format_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/types.hpp>


#include "core/components/prefix_sum.hpp"


namespace gko {
namespace kernels {
namespace omp {
Expand Down Expand Up @@ -84,16 +87,25 @@ inline void convert_unsorted_idxs_to_ptrs(const IndexType *idxs,
template <typename IndexType>
inline void convert_sorted_idxs_to_ptrs(const IndexType *idxs,
size_type num_nonzeros, IndexType *ptrs,
size_type length)
size_type num_rows)
{
ptrs[0] = 0;
ptrs[length - 1] = num_nonzeros;

#pragma omp parallel for schedule( \
static, ceildiv(num_nonzeros, omp_get_max_threads()))
for (size_type i = 0; i < num_nonzeros - 1; i++) {
for (size_type j = idxs[i] + 1; j <= idxs[i + 1]; j++) {
ptrs[j] = i + 1;
if (num_nonzeros == 0) {
#pragma omp parallel for
for (size_type row = 0; row < num_rows; row++) {
ptrs[row + 1] = 0;
}
} else {
// add virtual sentinel values 0 and num_rows to handle empty first and
// last rows
#pragma omp parallel for
for (size_type i = 0; i <= num_nonzeros; i++) {
auto begin_row = i == 0 ? size_type{} : idxs[i - 1];
auto end_row = i == num_nonzeros ? num_rows : idxs[i];
for (auto row = begin_row; row < end_row; row++) {
ptrs[row + 1] = i;
}
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions omp/matrix/coo_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
template <typename IndexType>
void convert_row_idxs_to_ptrs(std::shared_ptr<const OmpExecutor> exec,
const IndexType *idxs, size_type num_nonzeros,
IndexType *ptrs, size_type length)
IndexType *ptrs, size_type num_rows)
{
convert_sorted_idxs_to_ptrs(idxs, num_nonzeros, ptrs, length);
convert_sorted_idxs_to_ptrs(idxs, num_nonzeros, ptrs, num_rows);
}


Expand All @@ -222,8 +222,7 @@ void convert_to_csr(std::shared_ptr<const OmpExecutor> exec,

const auto source_row_idxs = source->get_const_row_idxs();

convert_row_idxs_to_ptrs(exec, source_row_idxs, nnz, row_ptrs,
num_rows + 1);
convert_row_idxs_to_ptrs(exec, source_row_idxs, nnz, row_ptrs, num_rows);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
Loading