Provide backend::bytes() function

ddemidov · Jul 23, 2018 · 10dad8a · 10dad8a
1 parent 2d4d0d1
commit 10dad8a
Show file tree

Hide file tree

Showing 23 changed files with 462 additions and 20 deletions.
diff --git a/amgcl/amg.hpp b/amgcl/amg.hpp
@@ -289,6 +289,11 @@ class amg {
             return *system_matrix_ptr();
         }
 
+        size_t bytes() const {
+            size_t b = 0;
+            for(const auto &lvl : levels) b += lvl.bytes();
+            return b;
+        }
     private:
         struct level {
             size_t m_rows, m_nonzeros;
@@ -305,6 +310,23 @@ class amg {
 
             std::shared_ptr<relax_type> relax;
 
+            size_t bytes() const {
+                size_t b = 0;
+
+                if (f) b += backend::bytes(*f);
+                if (u) b += backend::bytes(*u);
+                if (t) b += backend::bytes(*t);
+
+                if (A) b += backend::bytes(*A);
+                if (P) b += backend::bytes(*P);
+                if (R) b += backend::bytes(*R);
+
+                if (solve) b += backend::bytes(*solve);
+                if (relax) b += backend::bytes(*relax);
+
+                return b;
+            }
+
             level() {}
 
             level(std::shared_ptr<build_matrix> A,
@@ -538,26 +560,31 @@ std::ostream& operator<<(std::ostream &os, const amg<B, C, R> &a)
 
     size_t sum_dof = 0;
     size_t sum_nnz = 0;
+    size_t sum_mem = 0;
 
     for(const level &lvl : a.levels) {
         sum_dof += lvl.rows();
         sum_nnz += lvl.nonzeros();
+        sum_mem += lvl.bytes();
     }
 
     os << "Number of levels:    "   << a.levels.size()
         << "\nOperator complexity: " << std::fixed << std::setprecision(2)
         << 1.0 * sum_nnz / a.levels.front().nonzeros()
         << "\nGrid complexity:     " << std::fixed << std::setprecision(2)
         << 1.0 * sum_dof / a.levels.front().rows()
-        << "\n\nlevel     unknowns       nonzeros\n"
-        << "---------------------------------\n";
+        << "\nMemory footprint:    " << human_readable_memory(sum_mem)
+        << "\n\n"
+           "level     unknowns       nonzeros      memory\n"
+           "---------------------------------------------\n";
 
     size_t depth = 0;
     for(const level &lvl : a.levels) {
         os << std::setw(5)  << depth++
             << std::setw(13) << lvl.rows()
-            << std::setw(15) << lvl.nonzeros() << " ("
-            << std::setw(5) << std::fixed << std::setprecision(2)
+            << std::setw(15) << lvl.nonzeros()
+            << std::setw(12) << human_readable_memory(lvl.bytes())
+            << " (" << std::setw(5) << std::fixed << std::setprecision(2)
             << 100.0 * lvl.nonzeros() / sum_nnz
             << "%)" << std::endl;
     }
@@ -567,6 +594,21 @@ std::ostream& operator<<(std::ostream &os, const amg<B, C, R> &a)
     return os;
 }
 
+namespace backend {
+
+template <
+    class B,
+    template <class> class C,
+    template <class> class R
+    >
+struct bytes_impl< amg<B, C, R> > {
+    static size_t get(const amg<B, C, R> &A) {
+        return A.bytes();
+    }
+};
+
+} // namespace backend
+
 } // namespace amgcl
 
 #endif
diff --git a/amgcl/backend/builtin.hpp b/amgcl/backend/builtin.hpp
@@ -924,6 +924,25 @@ struct cols_impl< crs<V, C, P> > {
     }
 };
 
+template < typename V, typename C, typename P >
+struct bytes_impl< crs<V, C, P> > {
+    static size_t get(const crs<V, C, P> &A) {
+        return sizeof(P) * (A.nrows + 1) + sizeof(C) * A.nnz + sizeof(V) * A.nnz;
+    }
+};
+
+template < class Vec >
+struct bytes_impl<
+    Vec,
+    typename std::enable_if< is_builtin_vector<Vec>::value >::type
+    >
+{
+    static size_t get(const Vec &x) {
+        typedef typename backend::value_type<Vec>::type V;
+        return x.size() * sizeof(V);
+    }
+};
+
 template < typename V, typename C, typename P >
 struct ptr_data_impl< crs<V, C, P> > {
     typedef const P* type;

diff --git a/amgcl/backend/cuda.hpp b/amgcl/backend/cuda.hpp
@@ -72,6 +72,13 @@ struct cuda_skyline_lu : solver::skyline_lu<T> {
         static_cast<const Base*>(this)->operator()(_rhs, _x);
         thrust::copy(_x.begin(), _x.end(), x.begin());
     }
+
+    size_t bytes() const {
+        return
+            backend::bytes(*static_cast<const Base*>(this)) +
+            backend::bytes(_rhs) +
+            backend::bytes(_x);
+    }
 };
 
 }
@@ -372,6 +379,31 @@ struct nonzeros_impl< cuda_hyb_matrix<V> > {
     }
 };
 
+template < typename V >
+struct bytes_impl< cuda_hyb_matrix<V> > {
+    static size_t get(const cuda_hyb_matrix<V> &A) {
+        // the cusparse HYB format is opaque; we can only guess here:
+        return
+            sizeof(int) * (A.rows() + 1) +
+            sizeof(int) * A.nonzeros() +
+            sizeof(V)   * A.nonzeros();
+    }
+};
+
+template < typename V >
+struct bytes_impl< thrust::device_vector<V> > {
+    static size_t get(const thrust::device_vector<V> &v) {
+        return v.size() * sizeof(V);
+    }
+};
+
+template < typename V >
+struct bytes_impl< solver::cuda_skyline_lu<V> > {
+    static size_t get(const solver::cuda_skyline_lu<V> &s) {
+        return s.bytes();
+    }
+};
+
 template < typename Alpha, typename Beta, typename V >
 struct spmv_impl<
     Alpha, cuda_hyb_matrix<V>, thrust::device_vector<V>,

diff --git a/amgcl/backend/interface.hpp b/amgcl/backend/interface.hpp
@@ -93,6 +93,15 @@ struct cols_impl {
     typedef typename Matrix::COLS_NOT_IMPLEMENTED type;
 };
 
+/// Implementation for function returning number of bytes allocated for a matrix/vector.
+/** \note Used in bytes() */
+template <class T, class Enable = void>
+struct bytes_impl {
+    static size_t get(const T&) {
+        return 0;
+    }
+};
+
 template <class Matrix, class Enable = void>
 struct ptr_data_impl {
     typedef typename Matrix::PTR_DATA_NOT_IMPLEMENTED type;
@@ -213,6 +222,11 @@ size_t cols(const Matrix &matrix) {
     return cols_impl<Matrix>::get(matrix);
 }
 
+/// Returns number of bytes allocated for the container (matrix / vector)
+template <class T>
+size_t bytes(const T &t) {
+    return bytes_impl<T>::get(t);
+}
 template <class Matrix>
 typename ptr_data_impl<Matrix>::type
 ptr_data(const Matrix &matrix) {

diff --git a/amgcl/backend/vexcl.hpp b/amgcl/backend/vexcl.hpp
@@ -70,6 +70,13 @@ struct vexcl_skyline_lu : solver::skyline_lu<value_type> {
         static_cast<const Base*>(this)->operator()(_rhs, _x);
         vex::copy(_x, x);
     }
+
+    size_t bytes() const {
+        return
+            backend::bytes(*static_cast<const Base*>(this)) +
+            backend::bytes(_rhs) +
+            backend::bytes(_x);
+    }
 };
 
 }
@@ -274,6 +281,30 @@ struct nonzeros_impl< vex::sparse::distributed<vex::sparse::matrix<V,C,P>> > {
     }
 };
 
+template < typename V, typename C, typename P >
+struct bytes_impl< vex::sparse::distributed<vex::sparse::matrix<V,C,P> > > {
+    static size_t get(const vex::sparse::distributed<vex::sparse::matrix<V,C,P> > &A) {
+        return
+            sizeof(P) * (A.rows() + 1) +
+            sizeof(C) * A.nonzeros() +
+            sizeof(V) * A.nonzeros();
+    }
+};
+
+template < typename V >
+struct bytes_impl< vex::vector<V> > {
+    static size_t get(const vex::vector<V> &v) {
+        return v.size() * sizeof(V);
+    }
+};
+
+template < typename V >
+struct bytes_impl< solver::vexcl_skyline_lu<V> > {
+    static size_t get(const solver::vexcl_skyline_lu<V> &s) {
+        return s.bytes();
+    }
+};
+
 template < typename Alpha, typename Beta, typename V, typename C, typename P >
 struct spmv_impl<
     Alpha, vex::sparse::distributed<vex::sparse::matrix<V,C,P>>, vex::vector<V>,

diff --git a/amgcl/make_solver.hpp b/amgcl/make_solver.hpp
@@ -203,8 +203,14 @@ class make_solver {
             return n;
         }
 
+        size_t bytes() const {
+            return backend::bytes(S) + backend::bytes(P);
+        }
+
         friend std::ostream& operator<<(std::ostream &os, const make_solver &p) {
-            return os << p.S << std::endl << p.P;
+            return os
+                << "Solver\n======\n" << p.S << std::endl
+                << "Preconditioner\n==============\n" << p.P;
         }
     private:
         size_t           n;
@@ -377,7 +383,16 @@ class make_scaling_solver {
         std::shared_ptr<vector> t;
 };
 
+namespace backend {
 
+template <class P, class S>
+struct bytes_impl< make_solver<P, S> > {
+    static size_t get(const make_solver<P, S> &s) {
+        return s.bytes();
+    }
+};
+
+} // namespace backend
 } // namespace amgcl
 
 #endif
diff --git a/amgcl/relaxation/as_preconditioner.hpp b/amgcl/relaxation/as_preconditioner.hpp
@@ -87,23 +87,31 @@ class as_preconditioner {
         std::shared_ptr<matrix> system_matrix_ptr() const {
             return A;
         }
+
+        size_t bytes() const {
+            size_t b = 0;
+
+            if (A) b += backend::bytes(*A);
+            if (S) b += backend::bytes(*S);
+
+            return b;
+        }
     private:
         params prm;
 
         std::shared_ptr<matrix>   A;
         std::shared_ptr<smoother> S;
-        std::shared_ptr<vector> tmp;
 
         void init(std::shared_ptr<build_matrix> M, const backend_params &bprm) {
             A = Backend::copy_matrix(M, bprm);
             S = std::make_shared<smoother>(*M, prm, bprm);
-            tmp = Backend::create_vector(backend::rows(*M), bprm);
         }
 
         friend std::ostream& operator<<(std::ostream &os, const as_preconditioner &p) {
             os << "Relaxation as preconditioner" << std::endl;
             os << "  unknowns: " << backend::rows(p.system_matrix()) << std::endl;
             os << "  nonzeros: " << backend::nonzeros(p.system_matrix()) << std::endl;
+            os << "  memory:   " << human_readable_memory(p.bytes()) << std::endl;
 
             return os;
         }

diff --git a/amgcl/relaxation/chebyshev.hpp b/amgcl/relaxation/chebyshev.hpp
@@ -170,6 +170,13 @@ class chebyshev {
             solve(A, rhs, x);
         }
 
+        size_t bytes() const {
+            return 
+                backend::bytes(C) +
+                backend::bytes(*p) +
+                backend::bytes(*q);
+        }
+
     private:
         std::vector<scalar_type> C;
         mutable std::shared_ptr<vector> p, q;
@@ -191,8 +198,17 @@ class chebyshev {
 };
 
 } // namespace relaxation
-} // namespace amgcl
 
+namespace backend {
 
+template <class Backend>
+struct bytes_impl< relaxation::chebyshev<Backend> > {
+    static size_t get(const relaxation::chebyshev<Backend> &R) {
+        return R.bytes();
+    }
+};
+
+} // namespace backend
+} // namespace amgcl
 
 #endif
diff --git a/amgcl/relaxation/cusparse_ilu0.hpp b/amgcl/relaxation/cusparse_ilu0.hpp
@@ -253,6 +253,16 @@ struct ilu0< backend::cuda<real> > {
         solve(x);
     }
 
+    size_t bytes() const {
+        // This is incomplete, as cusparse structs are opaque.
+        return 
+            backend::bytes(ptr) +
+            backend::bytes(col) +
+            backend::bytes(val) +
+            backend::bytes(y) +
+            backend::bytes(buf);
+    }
+
     private:
         static const cusparseSolvePolicy_t policy_M = CUSPARSE_SOLVE_POLICY_NO_LEVEL;
         static const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL;

diff --git a/amgcl/relaxation/damped_jacobi.hpp b/amgcl/relaxation/damped_jacobi.hpp
@@ -134,6 +134,17 @@ struct damped_jacobi {
 };
 
 } // namespace relaxation
+
+namespace backend {
+
+template <class Backend>
+struct bytes_impl< relaxation::damped_jacobi<Backend> > {
+    static size_t get(const relaxation::damped_jacobi<Backend> &R) {
+        return backend::bytes(*R.dia);
+    }
+};
+
+} // namespace backend
 } // namespace amgcl
 
 #endif