diff --git a/amgcl/backend/builtin.hpp b/amgcl/backend/builtin.hpp index 6e20184e..64bbd110 100644 --- a/amgcl/backend/builtin.hpp +++ b/amgcl/backend/builtin.hpp @@ -47,6 +47,7 @@ THE SOFTWARE. #include #include #include +#include namespace amgcl { namespace backend { @@ -297,22 +298,7 @@ struct crs { for(ptrdiff_t i = 0; i < static_cast(n); ++i) { ptr_type beg = A.ptr[i]; ptr_type end = A.ptr[i + 1]; - insertion_sort(A.col.data() + beg, A.val.data() + beg, end - beg); - } - } - - static void insertion_sort(col_type *col, val_type *val, int n) { - for(int j = 1; j < n; ++j) { - col_type c = col[j]; - val_type v = val[j]; - int i = j - 1; - while(i >= 0 && col[i] > c) { - col[i + 1] = col[i]; - val[i + 1] = val[i]; - i--; - } - col[i + 1] = c; - val[i + 1] = v; + detail::sort_row(A.col.data() + beg, A.val.data() + beg, end - beg); } } diff --git a/amgcl/coarsening/smoothed_aggr_emin.hpp b/amgcl/coarsening/smoothed_aggr_emin.hpp new file mode 100644 index 00000000..2af6252b --- /dev/null +++ b/amgcl/coarsening/smoothed_aggr_emin.hpp @@ -0,0 +1,427 @@ +#ifndef AMGCL_COARSENING_SMOOTHED_AGGR_EMIN_HPP +#define AMGCL_COARSENING_SMOOTHED_AGGR_EMIN_HPP + +/* +The MIT License + +Copyright (c) 2012-2014 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file amgcl/coarsening/smoothed_aggr_emin.hpp + * \author Denis Demidov + * \brief Smoothed aggregation with energy minimization coarsening. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace amgcl { +namespace coarsening { + +template +struct smoothed_aggr_emin { + struct params { + typename Aggregates::params aggr; + }; + + template + static boost::tuple< + boost::shared_ptr< backend::crs >, + boost::shared_ptr< backend::crs > + > + transfer_operators(const backend::crs &A, params &prm) + { + typedef backend::crs matrix; + const size_t n = rows(A); + + TIC("aggregates"); + Aggregates aggr(A, prm.aggr); + prm.aggr.eps_strong *= 0.5; + TOC("aggregates"); + + TIC("interpolation"); + std::vector D(n); + std::vector omega(n); + + boost::shared_ptr P = interpolation(A, aggr, D, omega); + boost::shared_ptr R = restriction (A, aggr, D, omega); + TOC("interpolation"); + + return boost::make_tuple(P, R); + } + + template + static boost::shared_ptr< backend::crs > + coarse_operator( + const backend::crs &A, + const backend::crs &P, + const backend::crs &R, + const params& + ) + { + return detail::galerkin(A, P, R); + } + + private: + template + static boost::shared_ptr< backend::crs > + interpolation( + const backend::crs &A, const Aggregates &aggr, + std::vector &D, std::vector &omega + ) + { + typedef backend::crs matrix; + const size_t n = rows(A); + const size_t nc = aggr.count; + + boost::shared_ptr P = boost::make_shared(); + P->nrows = n; + P->ncols = nc; + P->ptr.resize(n + 1, 0); + + + std::vector omega_p(nc, 0); + std::vector denum(nc, 0); + +#pragma omp parallel + { +#ifdef _OPENMP + int nt = omp_get_num_threads(); + int tid = omp_get_thread_num(); + + size_t chunk_size = (n + nt - 1) / nt; + size_t chunk_start = tid * chunk_size; + size_t chunk_end = std::min(n, chunk_start + chunk_size); +#else + size_t chunk_start = 0; + size_t chunk_end = n; +#endif + + std::vector marker(nc, -1); + + // Compute A * P_tent product. P_tent is stored implicitly in aggr. + + // 1. Compute structure of the product result. + // 2. Store diagonal of filtered matrix. + for(size_t i = chunk_start; i < chunk_end; ++i) { + Val dia = 0; + + for(Ptr j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { + Col c = A.col[j]; + Val v = A.val[j]; + + if (static_cast(c) == i) + dia += v; + else if (!aggr.strong_connection[j]) + dia -= v; + else { + long g = aggr.id[c]; if (g < 0) continue; + + if (marker[g] != i) { + marker[g] = i; + ++( P->ptr[i + 1] ); + } + } + } + + D[i] = dia; + } + + boost::fill(marker, -1); + +#pragma omp barrier +#pragma omp single + { + boost::partial_sum(P->ptr, P->ptr.begin()); + P->col.resize(P->ptr.back()); + P->val.resize(P->ptr.back()); + } + + // 2. Compute the product result. + for(size_t i = chunk_start; i < chunk_end; ++i) { + Ptr row_beg = P->ptr[i]; + Ptr row_end = row_beg; + + for(Ptr j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { + Col c = A.col[j]; + + if (static_cast(c) != i && !aggr.strong_connection[j]) + continue; + + long g = aggr.id[c]; if (g < 0) continue; + + Val v = (static_cast(c) == i ? D[i] : A.val[j]); + + if (marker[g] < row_beg) { + marker[g] = row_end; + P->col[row_end] = g; + P->val[row_end] = v; + ++row_end; + } else { + P->val[marker[g]] += v; + } + } + + // Sort the new row by columns. + amgcl::detail::sort_row( + &P->col[row_beg], + &P->val[row_beg], + row_end - row_beg + ); + } + + boost::fill(marker, -1); + std::vector adap_col(128); + std::vector adap_val(128); + +#pragma omp barrier + + // Compute A * Dinv * AP row by row and compute columnwise scalar products + // necessary for computation of omega_p. The actual results of + // matrix-matrix product are not stored. + for(size_t ia = chunk_start; ia < chunk_end; ++ia) { + adap_col.clear(); + adap_val.clear(); + + // Form current row of ADAP matrix. + for(Ptr ja = A.ptr[ia], ea = A.ptr[ia + 1]; ja < ea; ++ja) { + Col ca = A.col[ja]; + + if (static_cast(ca) != ia && !aggr.strong_connection[ja]) + continue; + + Val dia = D[ca]; + Val va = (ca == ia ? dia : A.val[ja]); + + for(Ptr jb = P->ptr[ca], eb = P->ptr[ca + 1]; jb < eb; ++jb) { + Col cb = P->col[jb]; + Val vb = P->val[jb] / dia; + + if (marker[cb] < 0) { + marker[cb] = adap_col.size(); + adap_col.push_back(cb); + adap_val.push_back(va * vb); + } else { + adap_val[marker[cb]] += va * vb; + } + } + } + + amgcl::detail::sort_row( + adap_col.data(), adap_val.data(), adap_col.size() + ); + + // Update columnwise scalar products (AP,ADAP) and (ADAP,ADAP). + // 1. (AP, ADAP) + for( + Ptr ja = P->ptr[ia], ea = P->ptr[ia + 1], + jb = 0, eb = adap_col.size(); + ja < ea && jb < eb; + ) + { + Col ca = P->col[ja]; + Col cb = adap_col[jb]; + + if (ca < cb) + ++ja; + else if (cb < ca) + ++jb; + else /*ca == cb*/ { +#pragma omp atomic + omega_p[ca] += P->val[ja] * adap_val[jb]; + ++ja; + ++jb; + } + } + + // 2. (ADAP, ADAP) (and clear marker) + for(size_t j = 0, e = adap_col.size(); j < e; ++j) { + Col c = adap_col[j]; + Val v = adap_val[j]; +#pragma omp atomic + denum[c] += v * v; + marker[c] = -1; + } + } + } + + boost::transform(omega_p, denum, omega_p.begin(), std::divides()); + + // Convert omega from (4.13) to (4.14) (Sala, Tuminaro, 2008): +#pragma omp parallel for + for(ptrdiff_t i = 0; i < static_cast(n); ++i) { + Val w = -1; + + for(Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) { + Col c = A.col[j]; + if (static_cast(c) != i && !aggr.strong_connection[j]) + continue; + + long g = aggr.id[c]; if (g < 0) continue; + if (omega_p[g] < w || w < 0) w = omega_p[g]; + } + + omega[i] = std::max(w, static_cast(0)); + } + + // Update AP to obtain P. +#pragma omp parallel for + for(ptrdiff_t i = 0; i < static_cast(n); ++i) { + Val wd = omega[i] / D[i]; + + for(Ptr j = P->ptr[i], e = P->ptr[i + 1]; j < e; ++j) + P->val[j] = (P->col[j] == aggr.id[i] ? 1 : 0) - wd * P->val[j]; + } + + return P; + } + + template + static boost::shared_ptr< backend::crs > + restriction( + const backend::crs &A, const Aggregates &aggr, + const std::vector &D, const std::vector &omega + ) + { + typedef backend::crs matrix; + const size_t n = rows(A); + const size_t nc = aggr.count; + + // Get structure of R_tent from aggr + std::vector R_tent_ptr(nc + 1, 0); + for(size_t i = 0; i < n; ++i) { + long g = aggr.id[i]; if (g < 0) continue; + ++R_tent_ptr[g + 1]; + } + + boost::partial_sum(R_tent_ptr, R_tent_ptr.begin()); + std::vector R_tent_col(R_tent_ptr.back()); + + for(size_t i = 0; i < n; ++i) { + long g = aggr.id[i]; if (g < 0) continue; + R_tent_col[R_tent_ptr[g]++] = i; + } + + std::rotate(R_tent_ptr.begin(), R_tent_ptr.end() - 1, R_tent_ptr.end()); + R_tent_ptr[0] = 0; + + boost::shared_ptr R = boost::make_shared(); + R->nrows = nc; + R->ncols = n; + R->ptr.resize(nc + 1, 0); + + // Compute R_tent * A / D. +#pragma omp parallel + { +#ifdef _OPENMP + int nt = omp_get_num_threads(); + int tid = omp_get_thread_num(); + + size_t chunk_size = (nc + nt - 1) / nt; + size_t chunk_start = tid * chunk_size; + size_t chunk_end = std::min(nc, chunk_start + chunk_size); +#else + size_t chunk_start = 0; + size_t chunk_end = nc; +#endif + + std::vector marker(n, -1); + + for(size_t ir = chunk_start; ir < chunk_end; ++ir) { + for(Ptr jr = R_tent_ptr[ir], er = R_tent_ptr[ir + 1]; jr < er; ++jr) { + Col cr = R_tent_col[jr]; + for(Ptr ja = A.ptr[cr], ea = A.ptr[cr + 1]; ja < ea; ++ja) { + Col ca = A.col[ja]; + if (ca != cr && !aggr.strong_connection[ja]) continue; + + if (marker[ca] != ir) { + marker[ca] = ir; + ++R->ptr[ir + 1]; + } + } + } + } + + boost::fill(marker, -1); + +#pragma omp barrier +#pragma omp single + { + boost::partial_sum(R->ptr, R->ptr.begin()); + R->col.resize(R->ptr.back()); + R->val.resize(R->ptr.back()); + } + + for(size_t ir = chunk_start; ir < chunk_end; ++ir) { + Ptr row_beg = R->ptr[ir]; + Ptr row_end = row_beg; + + for(Ptr jr = R_tent_ptr[ir], er = R_tent_ptr[ir + 1]; jr < er; ++jr) { + Col cr = R_tent_col[jr]; + + for(Ptr ja = A.ptr[cr], ea = A.ptr[cr + 1]; ja < ea; ++ja) { + Col ca = A.col[ja]; + if (ca != cr && !aggr.strong_connection[ja]) continue; + Val va = (ca == cr ? 1 : (A.val[ja] / D[ca])); + + if (marker[ca] < row_beg) { + marker[ca] = row_end; + R->col[row_end] = ca; + R->val[row_end] = va; + ++row_end; + } else { + R->val[marker[ca]] += va; + } + } + } + } + } + + // Update R. +#pragma omp parallel for + for(ptrdiff_t i = 0; i < static_cast(nc); ++i) { + for(Ptr j = R->ptr[i], e = R->ptr[i + 1]; j < e; ++j) { + Col c = R->col[j]; + R->val[j] = (aggr.id[c] == i ? 1 : 0) - omega[c] * R->val[j]; + } + } + + return R; + } +}; + +} // namespace coarsening +} // namespace amgcl + + + +#endif diff --git a/amgcl/coarsening/smoothed_aggregation.hpp b/amgcl/coarsening/smoothed_aggregation.hpp index 1d8f3599..c1defc1b 100644 --- a/amgcl/coarsening/smoothed_aggregation.hpp +++ b/amgcl/coarsening/smoothed_aggregation.hpp @@ -75,9 +75,7 @@ struct smoothed_aggregation { boost::shared_ptr P = boost::make_shared(); P->nrows = n; P->ncols = aggr.count; - P->ptr.resize(n + 1); - - boost::fill(P->ptr, 0); + P->ptr.resize(n + 1, 0); #pragma omp parallel { diff --git a/amgcl/detail/sort_row.hpp b/amgcl/detail/sort_row.hpp new file mode 100644 index 00000000..de5c4078 --- /dev/null +++ b/amgcl/detail/sort_row.hpp @@ -0,0 +1,59 @@ +#ifndef AMGCL_DETAIL_SORT_ROW_HPP +#define AMGCL_DETAIL_SORT_ROW_HPP + +/* +The MIT License + +Copyright (c) 2012-2014 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file amgcl/detail/sort_row.hpp + * \author Denis Demidov + * \brief Sort row of CRS matrix by columns. + */ + +namespace amgcl { +namespace detail { + +template +void sort_row(Col *col, Val *val, int n) { + for(int j = 1; j < n; ++j) { + Col c = col[j]; + Val v = val[j]; + + int i = j - 1; + + while(i >= 0 && col[i] > c) { + col[i + 1] = col[i]; + val[i + 1] = val[i]; + i--; + } + + col[i + 1] = c; + val[i + 1] = v; + } +} + +} // namespace detail +} // namespace amgcl + +#endif diff --git a/examples/solver.cpp b/examples/solver.cpp index 394648ca..248744da 100644 --- a/examples/solver.cpp +++ b/examples/solver.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -26,7 +26,7 @@ int main() { typedef amgcl::amg< amgcl::backend::builtin, - amgcl::coarsening::smoothed_aggregation< + amgcl::coarsening::smoothed_aggr_emin< amgcl::coarsening::plain_aggregates >, amgcl::relaxation::spai0