Skip to content
Browse files

Fixes issue with empty matrix partitions

refs #9, #10, #11
  • Loading branch information...
1 parent 3cbb61f commit 6e3a069813f0cfdee06c643f669a586bf66b7c5f @ddemidov committed Aug 8, 2012
Showing with 58 additions and 61 deletions.
  1. +17 −18 examples/utests.cpp
  2. +41 −43 vexcl/spmat.hpp
View
35 examples/utests.cpp
@@ -410,7 +410,7 @@ int main(int argc, char *argv[]) {
const size_t n = 1 << 20;
const size_t m = 1 << 10;
const size_t nnz_per_row = 2;
- const size_t start_empty_row = 1 << 8;
+ const size_t start_empty_row = 1 << 8;
std::vector<size_t> row;
std::vector<size_t> col;
@@ -422,32 +422,31 @@ int main(int argc, char *argv[]) {
row.push_back(0);
for(size_t k = 0; k < n; k++) {
- if (k > start_empty_row) {
- row.push_back(col.size());
- continue;
- }
- std::set<size_t> cs;
- while(cs.size() < nnz_per_row)
- cs.insert(rand() % m);
-
- for(auto c = cs.begin(); c != cs.end(); c++) {
- col.push_back(*c);
- val.push_back(static_cast<double>(rand()) / RAND_MAX);
- }
-
- row.push_back(col.size());
+ if (k > start_empty_row) {
+ row.push_back(col.size());
+ continue;
+ }
+ std::set<size_t> cs;
+ while(cs.size() < nnz_per_row)
+ cs.insert(rand() % m);
+
+ for(auto c = cs.begin(); c != cs.end(); c++) {
+ col.push_back(*c);
+ val.push_back(static_cast<double>(rand()) / RAND_MAX);
+ }
+
+ row.push_back(col.size());
}
std::vector<double> x(m);
std::vector<double> y(n);
std::generate(x.begin(), x.end(), []() { return (double)rand() / RAND_MAX; });
vex::SpMat <double> A(ctx.queue(), y.size(), x.size(), row.data(), col.data(), val.data());
- vex::vector<double> X(ctx.queue(), x.size());
+ vex::vector<double> X(ctx.queue(), x);
vex::vector<double> Y(ctx.queue(), y.size());
- X.resize(ctx.queue(), x);
- Y = A * X;
+ Y = A * X;
copy(Y, y);
double res = 0;
View
84 vexcl/spmat.hpp
@@ -833,7 +833,7 @@ SpMat<real,column_t>::SpMatELL::SpMatELL(
}
// Wait for data to be copied before it gets deallocated.
- event.wait();
+ if (loc_ell.w || loc_csr.n || rem_ell.w || rem_csr.n) event.wait();
}
template <typename real, typename column_t>
@@ -951,49 +951,32 @@ void SpMat<real,column_t>::SpMatELL::mul_local(
size_t g_size = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>()
* wgsize[context()] * 4;
- if (loc_ell.w + loc_csr.n) {
- if (loc_ell.w) {
- if (append) {
- uint pos = 0;
- spmv_add[context()].setArg(pos++, n);
- spmv_add[context()].setArg(pos++, loc_ell.w);
- spmv_add[context()].setArg(pos++, pitch);
- spmv_add[context()].setArg(pos++, loc_ell.col);
- spmv_add[context()].setArg(pos++, loc_ell.val);
- spmv_add[context()].setArg(pos++, x);
- spmv_add[context()].setArg(pos++, y);
- spmv_add[context()].setArg(pos++, alpha);
-
- queue.enqueueNDRangeKernel(spmv_add[context()],
- cl::NullRange, g_size, wgsize[context()]);
- } else {
- uint pos = 0;
- spmv_set[context()].setArg(pos++, n);
- spmv_set[context()].setArg(pos++, loc_ell.w);
- spmv_set[context()].setArg(pos++, pitch);
- spmv_set[context()].setArg(pos++, loc_ell.col);
- spmv_set[context()].setArg(pos++, loc_ell.val);
- spmv_set[context()].setArg(pos++, x);
- spmv_set[context()].setArg(pos++, y);
- spmv_set[context()].setArg(pos++, alpha);
-
- queue.enqueueNDRangeKernel(spmv_set[context()],
- cl::NullRange, g_size, wgsize[context()]);
- }
- }
+ if (loc_ell.w) {
+ if (append) {
+ uint pos = 0;
+ spmv_add[context()].setArg(pos++, n);
+ spmv_add[context()].setArg(pos++, loc_ell.w);
+ spmv_add[context()].setArg(pos++, pitch);
+ spmv_add[context()].setArg(pos++, loc_ell.col);
+ spmv_add[context()].setArg(pos++, loc_ell.val);
+ spmv_add[context()].setArg(pos++, x);
+ spmv_add[context()].setArg(pos++, y);
+ spmv_add[context()].setArg(pos++, alpha);
- if (loc_csr.n) {
+ queue.enqueueNDRangeKernel(spmv_add[context()],
+ cl::NullRange, g_size, wgsize[context()]);
+ } else {
uint pos = 0;
- csr_add[context()].setArg(pos++, loc_csr.n);
- csr_add[context()].setArg(pos++, loc_csr.idx);
- csr_add[context()].setArg(pos++, loc_csr.row);
- csr_add[context()].setArg(pos++, loc_csr.col);
- csr_add[context()].setArg(pos++, loc_csr.val);
- csr_add[context()].setArg(pos++, x);
- csr_add[context()].setArg(pos++, y);
- csr_add[context()].setArg(pos++, alpha);
-
- queue.enqueueNDRangeKernel(csr_add[context()],
+ spmv_set[context()].setArg(pos++, n);
+ spmv_set[context()].setArg(pos++, loc_ell.w);
+ spmv_set[context()].setArg(pos++, pitch);
+ spmv_set[context()].setArg(pos++, loc_ell.col);
+ spmv_set[context()].setArg(pos++, loc_ell.val);
+ spmv_set[context()].setArg(pos++, x);
+ spmv_set[context()].setArg(pos++, y);
+ spmv_set[context()].setArg(pos++, alpha);
+
+ queue.enqueueNDRangeKernel(spmv_set[context()],
cl::NullRange, g_size, wgsize[context()]);
}
} else if (!append) {
@@ -1004,6 +987,21 @@ void SpMat<real,column_t>::SpMatELL::mul_local(
queue.enqueueNDRangeKernel(zero[context()],
cl::NullRange, g_size, wgsize[context()]);
}
+
+ if (loc_csr.n) {
+ uint pos = 0;
+ csr_add[context()].setArg(pos++, loc_csr.n);
+ csr_add[context()].setArg(pos++, loc_csr.idx);
+ csr_add[context()].setArg(pos++, loc_csr.row);
+ csr_add[context()].setArg(pos++, loc_csr.col);
+ csr_add[context()].setArg(pos++, loc_csr.val);
+ csr_add[context()].setArg(pos++, x);
+ csr_add[context()].setArg(pos++, y);
+ csr_add[context()].setArg(pos++, alpha);
+
+ queue.enqueueNDRangeKernel(csr_add[context()],
+ cl::NullRange, g_size, wgsize[context()]);
+ }
}
template <typename real, typename column_t>
void SpMat<real,column_t>::SpMatELL::mul_remote(
@@ -1195,7 +1193,7 @@ SpMat<real,column_t>::SpMatCSR::SpMatCSR(
0, &event);
}
- event.wait();
+ if (lrow.back() || !remote_cols.empty()) event.wait();
has_loc = lrow.back();
has_rem = !remote_cols.empty();

0 comments on commit 6e3a069

Please sign in to comment.
Something went wrong with that request. Please try again.