From b024aa2a974a2e69d6fd11ee0818b1af143d3f9a Mon Sep 17 00:00:00 2001 From: Alexandre Boulch Date: Mon, 17 Dec 2018 11:19:13 +0100 Subject: [PATCH] Update fix compilaltion --- {include => python}/Normals.h | 19 +- python/normEstHough.cxx | 2 +- python/setup.py | 11 +- third_party_includes/Eigen/Cholesky | 5 + third_party_includes/Eigen/Core | 27 +- third_party_includes/Eigen/Eigenvalues | 4 + third_party_includes/Eigen/LU | 4 + third_party_includes/Eigen/QR | 4 + third_party_includes/Eigen/QtAlignedMalloc | 2 +- third_party_includes/Eigen/SVD | 4 + .../Eigen/src/Cholesky/LDLT.h | 14 +- third_party_includes/Eigen/src/Cholesky/LLT.h | 26 +- third_party_includes/Eigen/src/Core/Array.h | 2 - .../Eigen/src/Core/AssignEvaluator.h | 4 +- .../Eigen/src/Core/Assign_MKL.h | 6 +- .../Eigen/src/Core/ConditionEstimator.h | 2 +- .../Eigen/src/Core/CoreEvaluators.h | 39 +- .../Eigen/src/Core/Diagonal.h | 5 +- third_party_includes/Eigen/src/Core/Dot.h | 17 +- .../Eigen/src/Core/GeneralProduct.h | 21 +- third_party_includes/Eigen/src/Core/Map.h | 17 +- third_party_includes/Eigen/src/Core/MapBase.h | 6 +- .../Eigen/src/Core/MathFunctions.h | 64 +- .../Eigen/src/Core/MathFunctionsImpl.h | 23 + third_party_includes/Eigen/src/Core/Matrix.h | 2 - .../Eigen/src/Core/MatrixBase.h | 35 +- .../Eigen/src/Core/PlainObjectBase.h | 4 + third_party_includes/Eigen/src/Core/Product.h | 10 +- .../Eigen/src/Core/ProductEvaluators.h | 31 +- third_party_includes/Eigen/src/Core/Redux.h | 2 +- third_party_includes/Eigen/src/Core/Ref.h | 2 + .../Eigen/src/Core/SelfAdjointView.h | 6 +- .../Eigen/src/Core/SelfCwiseBinaryOp.h | 4 - .../Eigen/src/Core/SolveTriangular.h | 3 + .../Eigen/src/Core/StableNorm.h | 2 +- .../Eigen/src/Core/Transpositions.h | 2 +- .../Eigen/src/Core/arch/AVX/Complex.h | 36 +- .../Eigen/src/Core/arch/AVX/PacketMath.h | 24 +- .../src/Core/arch/AVX512/MathFunctions.h | 33 +- .../Eigen/src/Core/arch/AVX512/PacketMath.h | 12 +- .../Eigen/src/Core/arch/AltiVec/Complex.h | 35 +- .../Eigen/src/Core/arch/AltiVec/PacketMath.h | 44 +- .../Eigen/src/Core/arch/CUDA/Half.h | 253 +- .../Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 5 +- .../Eigen/src/Core/arch/Default/ConjHelper.h | 29 + .../Eigen/src/Core/arch/NEON/Complex.h | 12 +- .../Eigen/src/Core/arch/NEON/PacketMath.h | 31 + .../Eigen/src/Core/arch/SSE/Complex.h | 40 +- .../Eigen/src/Core/arch/SSE/PacketMath.h | 24 +- .../Eigen/src/Core/arch/SSE/TypeCasting.h | 28 +- .../Eigen/src/Core/arch/ZVector/Complex.h | 3 + .../Eigen/src/Core/functors/BinaryFunctors.h | 25 +- .../Eigen/src/Core/functors/StlFunctors.h | 4 + .../Core/products/GeneralBlockPanelKernel.h | 15 +- .../GeneralMatrixMatrixTriangular_BLAS.h | 10 +- .../Core/products/GeneralMatrixMatrix_BLAS.h | 19 +- .../src/Core/products/GeneralMatrixVector.h | 8 +- .../Core/products/GeneralMatrixVector_BLAS.h | 19 +- .../products/SelfadjointMatrixMatrix_BLAS.h | 48 +- .../products/SelfadjointMatrixVector_BLAS.h | 9 +- .../Core/products/TriangularMatrixMatrix.h | 35 +- .../products/TriangularMatrixMatrix_BLAS.h | 39 +- .../Core/products/TriangularMatrixVector.h | 22 +- .../products/TriangularMatrixVector_BLAS.h | 46 +- .../products/TriangularSolverMatrix_BLAS.h | 40 +- .../src/Core/util/DisableStupidWarnings.h | 14 +- .../Eigen/src/Core/util/MKL_support.h | 10 +- .../Eigen/src/Core/util/Macros.h | 19 +- .../Eigen/src/Core/util/Memory.h | 24 +- .../Eigen/src/Core/util/Meta.h | 42 + .../src/Core/util/ReenableStupidWarnings.h | 2 +- .../Eigen/src/Core/util/StaticAssert.h | 120 +- .../src/Eigenvalues/GeneralizedEigenSolver.h | 5 +- .../src/Eigenvalues/MatrixBaseEigenvalues.h | 2 - .../Eigen/src/Eigenvalues/RealSchur.h | 4 +- .../SelfAdjointEigenSolver_LAPACKE.h | 23 +- .../Eigen/src/Geometry/AngleAxis.h | 2 +- .../Eigen/src/Geometry/Quaternion.h | 31 +- .../BasicPreconditioners.h | 2 +- .../ConjugateGradient.h | 5 +- .../Eigen/src/Jacobi/Jacobi.h | 253 +- .../Eigen/src/LU/InverseImpl.h | 2 +- .../Eigen/src/PaStiXSupport/PaStiXSupport.h | 8 +- third_party_includes/Eigen/src/SVD/BDCSVD.h | 33 +- .../Eigen/src/SVD/JacobiSVD_LAPACKE.h | 5 +- third_party_includes/Eigen/src/SVD/SVDBase.h | 4 +- .../Eigen/src/SparseCore/AmbiVector.h | 2 +- .../ConservativeSparseSparseProduct.h | 67 +- .../Eigen/src/SparseCore/SparseMatrix.h | 6 +- .../src/SparseCore/SparseSelfAdjointView.h | 8 +- .../SparseSparseProductWithPruning.h | 22 +- .../Eigen/src/SparseLU/SparseLU.h | 2 - .../Eigen/src/SparseQR/SparseQR.h | 26 +- .../Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 +- third_party_includes/nanoflann.hpp | 3931 +++++++++-------- 95 files changed, 3258 insertions(+), 2799 deletions(-) rename {include => python}/Normals.h (98%) create mode 100644 third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h diff --git a/include/Normals.h b/python/Normals.h similarity index 98% rename from include/Normals.h rename to python/Normals.h index b2b8fa2..a616ec7 100644 --- a/include/Normals.h +++ b/python/Normals.h @@ -30,8 +30,8 @@ #include #include #include -#include "Eigen/Dense" -#include "nanoflann.hpp" +#include +#include #ifdef _OPENMP #include @@ -91,7 +91,6 @@ class Eigen_Normal_Estimator{ typedef nanoflann::KDTreeEigenMatrixAdaptor< Eigen::MatrixX3d > kd_tree; //a row is a point - // constructor Eigen_Normal_Estimator(const Eigen::MatrixX3d& points, Eigen::MatrixX3d& normals): pts(points),nls(normals){ @@ -128,7 +127,7 @@ class Eigen_Normal_Estimator{ } istr.close(); pts.resize(points.size(),3); - for(int i=0; i vecInt(1000000); - for(int i=0; ibuildIndex(); //create the density estimation for each point @@ -218,7 +217,7 @@ class Eigen_Normal_Estimator{ //knn for k_density+1 because the point is itself include in the search tree tree.index->knnSearch(&pt_query[0], k_density+1, &pointIdxSearch[0], &pointSquaredDistance[0]); double d =0; - for(int i=0; i elems, double d){ unsigned int i1 = 0; unsigned int i2 = elems.size()-1; - unsigned int i3; + unsigned int i3 = (i1+i2)/2; while(i2 > i1){ i3 = (i1+i2)/2; if(elems[i3] == d){break;} @@ -391,13 +390,13 @@ class Eigen_Normal_Estimator{ { std::vector dists; double sum=0; - for(int i=0; i= 9) +#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) +#elif defined(__CUDACC_VER__) +#define EIGEN_CUDACC_VER __CUDACC_VER__ +#else +#define EIGEN_CUDACC_VER 0 +#endif + // Handle NVCC/CUDA/SYCL #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) // Do not try asserts on CUDA and SYCL! @@ -37,9 +53,9 @@ #endif #define EIGEN_DEVICE_FUNC __host__ __device__ - // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro + // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro // works properly on the device side - #include + #include #else #define EIGEN_DEVICE_FUNC #endif @@ -155,6 +171,9 @@ #ifdef __AVX512DQ__ #define EIGEN_VECTORIZE_AVX512DQ #endif + #ifdef __AVX512ER__ + #define EIGEN_VECTORIZE_AVX512ER + #endif #endif // include files @@ -229,7 +248,7 @@ #if defined __CUDACC__ #define EIGEN_VECTORIZE_CUDA #include - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #if EIGEN_CUDACC_VER >= 70500 #define EIGEN_HAS_CUDA_FP16 #endif #endif @@ -352,6 +371,7 @@ using std::ptrdiff_t; #include "src/Core/MathFunctions.h" #include "src/Core/GenericPacketMath.h" #include "src/Core/MathFunctionsImpl.h" +#include "src/Core/arch/Default/ConjHelper.h" #if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" @@ -367,6 +387,7 @@ using std::ptrdiff_t; #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX/TypeCasting.h" + #include "src/Core/arch/SSE/TypeCasting.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" diff --git a/third_party_includes/Eigen/Eigenvalues b/third_party_includes/Eigen/Eigenvalues index 009e529..f3f661b 100644 --- a/third_party_includes/Eigen/Eigenvalues +++ b/third_party_includes/Eigen/Eigenvalues @@ -45,7 +45,11 @@ #include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/Eigenvalues/RealSchur_LAPACKE.h" #include "src/Eigenvalues/ComplexSchur_LAPACKE.h" #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" diff --git a/third_party_includes/Eigen/LU b/third_party_includes/Eigen/LU index 6f6c556..6418a86 100644 --- a/third_party_includes/Eigen/LU +++ b/third_party_includes/Eigen/LU @@ -28,7 +28,11 @@ #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/LU/PartialPivLU_LAPACKE.h" #endif #include "src/LU/Determinant.h" diff --git a/third_party_includes/Eigen/QR b/third_party_includes/Eigen/QR index 80838e3..c7e9144 100644 --- a/third_party_includes/Eigen/QR +++ b/third_party_includes/Eigen/QR @@ -36,7 +36,11 @@ #include "src/QR/ColPivHouseholderQR.h" #include "src/QR/CompleteOrthogonalDecomposition.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h" #endif diff --git a/third_party_includes/Eigen/QtAlignedMalloc b/third_party_includes/Eigen/QtAlignedMalloc index c6571f1..4f07df0 100644 --- a/third_party_includes/Eigen/QtAlignedMalloc +++ b/third_party_includes/Eigen/QtAlignedMalloc @@ -27,7 +27,7 @@ void qFree(void *ptr) void *qRealloc(void *ptr, std::size_t size) { void* newPtr = Eigen::internal::aligned_malloc(size); - memcpy(newPtr, ptr, size); + std::memcpy(newPtr, ptr, size); Eigen::internal::aligned_free(ptr); return newPtr; } diff --git a/third_party_includes/Eigen/SVD b/third_party_includes/Eigen/SVD index 86143c2..5d0e75f 100644 --- a/third_party_includes/Eigen/SVD +++ b/third_party_includes/Eigen/SVD @@ -37,7 +37,11 @@ #include "src/SVD/JacobiSVD.h" #include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/SVD/JacobiSVD_LAPACKE.h" #endif diff --git a/third_party_includes/Eigen/src/Cholesky/LDLT.h b/third_party_includes/Eigen/src/Cholesky/LDLT.h index fcee7b2..15ccf24 100644 --- a/third_party_includes/Eigen/src/Cholesky/LDLT.h +++ b/third_party_includes/Eigen/src/Cholesky/LDLT.h @@ -248,7 +248,7 @@ template class LDLT /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. + * \c NumericalIssue if the factorization failed because of a zero pivot. */ ComputationInfo info() const { @@ -305,7 +305,8 @@ template<> struct ldlt_inplace if (size <= 1) { transpositions.setIdentity(); - if (numext::real(mat.coeff(0,0)) > static_cast(0) ) sign = PositiveSemiDef; + if(size==0) sign = ZeroSign; + else if (numext::real(mat.coeff(0,0)) > static_cast(0) ) sign = PositiveSemiDef; else if (numext::real(mat.coeff(0,0)) < static_cast(0)) sign = NegativeSemiDef; else sign = ZeroSign; return true; @@ -376,6 +377,8 @@ template<> struct ldlt_inplace if((rs>0) && pivot_is_valid) A21 /= realAkk; + else if(rs>0) + ret = ret && (A21.array()==Scalar(0)).all(); if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed else if(!pivot_is_valid) found_zero_pivot = true; @@ -568,13 +571,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons // more precisely, use pseudo-inverse of D (see bug 241) using std::abs; const typename Diagonal::RealReturnType vecD(vectorD()); - // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon - // as motivated by LAPACK's xGELSS: + // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min()) + // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS: // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest // diagonal element is not well justified and leads to numerical issues in some cases. // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. - RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + // Using numeric_limits::min() gives us more robustness to denormals. + RealScalar tolerance = (std::numeric_limits::min)(); for (Index i = 0; i < vecD.size(); ++i) { diff --git a/third_party_includes/Eigen/src/Cholesky/LLT.h b/third_party_includes/Eigen/src/Cholesky/LLT.h index 87ca8d4..e1624d2 100644 --- a/third_party_includes/Eigen/src/Cholesky/LLT.h +++ b/third_party_includes/Eigen/src/Cholesky/LLT.h @@ -24,7 +24,7 @@ template struct LLT_Traits; * * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. - * The other triangular part won't be read. + * The other triangular part won't be read. * * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite * matrix A such that A = LL^* = U^*U, where L is lower triangular. @@ -41,14 +41,18 @@ template struct LLT_Traits; * Example: \include LLT_example.cpp * Output: \verbinclude LLT_example.out * + * \b Performance: for best performance, it is recommended to use a column-major storage format + * with the Lower triangular part (the default), or, equivalently, a row-major storage format + * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization + * step, and rank-updates can be up to 3 times slower. + * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. * + * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered. + * Therefore, the strict lower part does not have to store correct values. + * * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT */ - /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) - * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, - * the strict lower part does not have to store correct values. - */ template class LLT { public: @@ -146,7 +150,7 @@ template class LLT } template - void solveInPlace(MatrixBase &bAndX) const; + void solveInPlace(const MatrixBase &bAndX) const; template LLT& compute(const EigenBase& matrix); @@ -177,7 +181,7 @@ template class LLT /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. + * \c NumericalIssue if the matrix.appears not to be positive definite. */ ComputationInfo info() const { @@ -425,7 +429,8 @@ LLT& LLT::compute(const EigenBase eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); - m_matrix = a.derived(); + if (!internal::is_same_dense(m_matrix, a.derived())) + m_matrix = a.derived(); // Compute matrix L1 norm = max abs column sum. m_l1_norm = RealScalar(0); @@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const * * This version avoids a copy when the right hand side matrix b is not needed anymore. * + * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. + * This function will const_cast it, so constness isn't honored here. + * * \sa LLT::solve(), MatrixBase::llt() */ template template -void LLT::solveInPlace(MatrixBase &bAndX) const +void LLT::solveInPlace(const MatrixBase &bAndX) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==bAndX.rows()); diff --git a/third_party_includes/Eigen/src/Core/Array.h b/third_party_includes/Eigen/src/Core/Array.h index e10020d..16770fc 100644 --- a/third_party_includes/Eigen/src/Core/Array.h +++ b/third_party_includes/Eigen/src/Core/Array.h @@ -153,8 +153,6 @@ class Array : Base(std::move(other)) { Base::_check_template_params(); - if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) - Base::_set_noalias(other); } EIGEN_DEVICE_FUNC Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) diff --git a/third_party_includes/Eigen/src/Core/AssignEvaluator.h b/third_party_includes/Eigen/src/Core/AssignEvaluator.h index b0ec7b7..dbe435d 100644 --- a/third_party_includes/Eigen/src/Core/AssignEvaluator.h +++ b/third_party_includes/Eigen/src/Core/AssignEvaluator.h @@ -39,7 +39,7 @@ struct copy_using_evaluator_traits enum { DstAlignment = DstEvaluator::Alignment, SrcAlignment = SrcEvaluator::Alignment, - DstHasDirectAccess = DstFlags & DirectAccessBit, + DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) }; @@ -83,7 +83,7 @@ struct copy_using_evaluator_traits && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), - MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ diff --git a/third_party_includes/Eigen/src/Core/Assign_MKL.h b/third_party_includes/Eigen/src/Core/Assign_MKL.h index 6c2ab92..6866095 100755 --- a/third_party_includes/Eigen/src/Core/Assign_MKL.h +++ b/third_party_includes/Eigen/src/Core/Assign_MKL.h @@ -84,7 +84,8 @@ class vml_assign_traits struct Assignment, SrcXprNested>, assign_op, \ Dense2Dense, typename enable_if::EnableVml>::type> { \ typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ + resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ if(vml_assign_traits::Traversal==LinearTraversal) { \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ @@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) Dense2Dense, typename enable_if::EnableVml>::type> { \ typedef CwiseBinaryOp, SrcXprNested, \ const CwiseNullaryOp,Plain> > SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ + resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ if(vml_assign_traits::Traversal==LinearTraversal) \ diff --git a/third_party_includes/Eigen/src/Core/ConditionEstimator.h b/third_party_includes/Eigen/src/Core/ConditionEstimator.h index aa7efdc..51a2e5f 100644 --- a/third_party_includes/Eigen/src/Core/ConditionEstimator.h +++ b/third_party_includes/Eigen/src/Core/ConditionEstimator.h @@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco { typedef typename Decomposition::RealScalar RealScalar; eigen_assert(dec.rows() == dec.cols()); - if (dec.rows() == 0) return RealScalar(1); + if (dec.rows() == 0) return NumTraits::infinity(); if (matrix_norm == RealScalar(0)) return RealScalar(0); if (dec.rows() == 1) return RealScalar(1); const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); diff --git a/third_party_includes/Eigen/src/Core/CoreEvaluators.h b/third_party_includes/Eigen/src/Core/CoreEvaluators.h index f7c1eff..910889e 100644 --- a/third_party_includes/Eigen/src/Core/CoreEvaluators.h +++ b/third_party_includes/Eigen/src/Core/CoreEvaluators.h @@ -977,7 +977,7 @@ struct evaluator > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, @@ -987,7 +987,9 @@ struct evaluator > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, PacketAlignment = unpacket_traits::alignment, - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) + && (OuterStrideAtCompileTime!=0) + && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; @@ -1018,14 +1020,16 @@ struct unary_evaluator, IndexBa EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), - m_startCol(block.startCol()) + m_startCol(block.startCol()), + m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0) { } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + RowsAtCompileTime = XprType::RowsAtCompileTime, + ForwardLinearAccess = InnerPanel && bool(evaluator::Flags&LinearAccessBit) }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1037,7 +1041,10 @@ struct unary_evaluator, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.coeff(m_linear_offset.value() + index); + else + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1049,7 +1056,10 @@ struct unary_evaluator, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.coeffRef(m_linear_offset.value() + index); + else + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template @@ -1063,8 +1073,11 @@ struct unary_evaluator, IndexBa EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return packet(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.template packet(m_linear_offset.value() + index); + else + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); } template @@ -1078,15 +1091,19 @@ struct unary_evaluator, IndexBa EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return writePacket(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + if (ForwardLinearAccess) + return m_argImpl.template writePacket(m_linear_offset.value() + index, x); + else + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); } protected: evaluator m_argImpl; const variable_if_dynamic m_startRow; const variable_if_dynamic m_startCol; + const variable_if_dynamic m_linear_offset; }; // TODO: This evaluator does not actually use the child evaluator; diff --git a/third_party_includes/Eigen/src/Core/Diagonal.h b/third_party_includes/Eigen/src/Core/Diagonal.h index 49e7112..afcaf35 100644 --- a/third_party_includes/Eigen/src/Core/Diagonal.h +++ b/third_party_includes/Eigen/src/Core/Diagonal.h @@ -70,7 +70,10 @@ template class Diagonal EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DEVICE_FUNC - explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) + { + eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() ); + } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) diff --git a/third_party_includes/Eigen/src/Core/Dot.h b/third_party_includes/Eigen/src/Core/Dot.h index 06ef18b..1fe7a84 100644 --- a/third_party_includes/Eigen/src/Core/Dot.h +++ b/third_party_includes/Eigen/src/Core/Dot.h @@ -31,7 +31,8 @@ struct dot_nocheck typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC - static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + EIGEN_STRONG_INLINE + static ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.template binaryExpr(b).sum(); } @@ -43,7 +44,8 @@ struct dot_nocheck typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC - static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + EIGEN_STRONG_INLINE + static ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.transpose().template binaryExpr(b).sum(); } @@ -65,6 +67,7 @@ struct dot_nocheck template template EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { @@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala * \sa lpNorm(), dot(), squaredNorm() */ template -inline typename NumTraits::Scalar>::Real MatrixBase::norm() const +EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::norm() const { return numext::sqrt(squaredNorm()); } @@ -117,7 +120,7 @@ inline typename NumTraits::Scalar>::Real Matr * \sa norm(), normalize() */ template -inline const typename MatrixBase::PlainObject +EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::normalized() const { typedef typename internal::nested_eval::type _Nested; @@ -139,7 +142,7 @@ MatrixBase::normalized() const * \sa norm(), normalized() */ template -inline void MatrixBase::normalize() +EIGEN_STRONG_INLINE void MatrixBase::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -160,7 +163,7 @@ inline void MatrixBase::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template -inline const typename MatrixBase::PlainObject +EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::stableNormalized() const { typedef typename internal::nested_eval::type _Nested; @@ -185,7 +188,7 @@ MatrixBase::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template -inline void MatrixBase::stableNormalize() +EIGEN_STRONG_INLINE void MatrixBase::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); diff --git a/third_party_includes/Eigen/src/Core/GeneralProduct.h b/third_party_includes/Eigen/src/Core/GeneralProduct.h index 0f16cd8..6f0cc80 100644 --- a/third_party_includes/Eigen/src/Core/GeneralProduct.h +++ b/third_party_includes/Eigen/src/Core/GeneralProduct.h @@ -24,12 +24,17 @@ template struct product_type_selector; template struct product_size_category { - enum { is_large = MaxSize == Dynamic || - Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || - (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), - value = is_large ? Large - : Size == 1 ? 1 - : Small + enum { + #ifndef EIGEN_CUDA_ARCH + is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), + #else + is_large = 0, + #endif + value = is_large ? Large + : Size == 1 ? 1 + : Small }; }; @@ -379,8 +384,6 @@ template<> struct gemv_dense_selector * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ -#ifndef __CUDACC__ - template template inline const Product @@ -412,8 +415,6 @@ MatrixBase::operator*(const MatrixBase &other) const return Product(derived(), other.derived()); } -#endif // __CUDACC__ - /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be diff --git a/third_party_includes/Eigen/src/Core/Map.h b/third_party_includes/Eigen/src/Core/Map.h index 06d1967..548bf9a 100644 --- a/third_party_includes/Eigen/src/Core/Map.h +++ b/third_party_includes/Eigen/src/Core/Map.h @@ -20,11 +20,17 @@ struct traits > { typedef traits TraitsBase; enum { + PlainObjectTypeInnerSize = ((traits::Flags&RowMajorBit)==RowMajorBit) + ? PlainObjectType::ColsAtCompileTime + : PlainObjectType::RowsAtCompileTime, + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 ? int(PlainObjectType::InnerStrideAtCompileTime) : int(StrideType::InnerStrideAtCompileTime), OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 - ? int(PlainObjectType::OuterStrideAtCompileTime) + ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic + ? Dynamic + : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize)) : int(StrideType::OuterStrideAtCompileTime), Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), @@ -107,10 +113,11 @@ template class Ma EIGEN_DEVICE_FUNC inline Index outerStride() const { - return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() - : IsVectorAtCompileTime ? this->size() - : int(Flags)&RowMajorBit ? this->cols() - : this->rows(); + return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer() + : int(internal::traits::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits::OuterStrideAtCompileTime) + : IsVectorAtCompileTime ? (this->size() * innerStride()) + : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride()) + : (this->rows() * innerStride()); } /** Constructor in the fixed-size case. diff --git a/third_party_includes/Eigen/src/Core/MapBase.h b/third_party_includes/Eigen/src/Core/MapBase.h index 020f939..668922f 100644 --- a/third_party_includes/Eigen/src/Core/MapBase.h +++ b/third_party_includes/Eigen/src/Core/MapBase.h @@ -43,6 +43,7 @@ template class MapBase enum { RowsAtCompileTime = internal::traits::RowsAtCompileTime, ColsAtCompileTime = internal::traits::ColsAtCompileTime, + InnerStrideAtCompileTime = internal::traits::InnerStrideAtCompileTime, SizeAtCompileTime = Base::SizeAtCompileTime }; @@ -187,8 +188,11 @@ template class MapBase void checkSanity(typename internal::enable_if<(internal::traits::Alignment>0),void*>::type = 0) const { #if EIGEN_MAX_ALIGN_BYTES>0 + // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value: + const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime); + EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride); eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits::Alignment) == 0) - || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits::Alignment ) && "data is not aligned"); + || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits::Alignment ) && "data is not aligned"); #endif } diff --git a/third_party_includes/Eigen/src/Core/MathFunctions.h b/third_party_includes/Eigen/src/Core/MathFunctions.h index a648aa0..b249ce0 100644 --- a/third_party_includes/Eigen/src/Core/MathFunctions.h +++ b/third_party_includes/Eigen/src/Core/MathFunctions.h @@ -348,31 +348,7 @@ struct norm1_retval * Implementation of hypot * ****************************************************************************/ -template -struct hypot_impl -{ - typedef typename NumTraits::Real RealScalar; - static inline RealScalar run(const Scalar& x, const Scalar& y) - { - EIGEN_USING_STD_MATH(abs); - EIGEN_USING_STD_MATH(sqrt); - RealScalar _x = abs(x); - RealScalar _y = abs(y); - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - if(p==RealScalar(0)) return RealScalar(0); - return p * sqrt(RealScalar(1) + qp*qp); - } -}; +template struct hypot_impl; template struct hypot_retval @@ -495,7 +471,7 @@ namespace std_fallback { typedef typename NumTraits::Real RealScalar; EIGEN_USING_STD_MATH(log); Scalar x1p = RealScalar(1) + x; - return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); } } @@ -640,21 +616,28 @@ template struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) - { - typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; - if(y=x the result converted to an unsigned long is still correct. - std::size_t range = ScalarX(y)-ScalarX(x); - std::size_t offset = 0; - // rejection sampling - std::size_t divisor = 1; - std::size_t multiplier = 1; - if(range::type ScalarU; + // ScalarX is the widest of ScalarU and unsigned int. + // We'll deal only with ScalarX and unsigned int below thus avoiding signed + // types and arithmetic and signed overflows (which are undefined behavior). + typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX; + // The following difference doesn't overflow, provided our integer types are two's + // complement and have the same number of padding bits in signed and unsigned variants. + // This is the case in most modern implementations of C++. + ScalarX range = ScalarX(y) - ScalarX(x); + ScalarX offset = 0; + ScalarX divisor = 1; + ScalarX multiplier = 1; + const unsigned rand_max = RAND_MAX; + if (range <= rand_max) divisor = (rand_max + 1) / (range + 1); + else multiplier = 1 + range / (rand_max + 1); + // Rejection sampling. do { - offset = (std::size_t(std::rand()) * multiplier) / divisor; + offset = (unsigned(std::rand()) * multiplier) / divisor; } while (offset > range); return Scalar(ScalarX(x) + offset); } @@ -1030,7 +1013,8 @@ inline int log2(int x) /** \returns the square root of \a x. * - * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * It is essentially equivalent to + * \code using std::sqrt; return sqrt(x); \endcode * but slightly faster for float/double and some compilers (e.g., gcc), thanks to * specializations when SSE is enabled. * diff --git a/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h b/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h index 3c9ef22..9c1ceb0 100644 --- a/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h +++ b/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h @@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x) return pdiv(p, q); } +template +EIGEN_STRONG_INLINE +RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) +{ + EIGEN_USING_STD_MATH(sqrt); + RealScalar p, qp; + p = numext::maxi(x,y); + if(p==RealScalar(0)) return RealScalar(0); + qp = numext::mini(y,x) / p; + return p * sqrt(RealScalar(1) + qp*qp); +} + +template +struct hypot_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x, const Scalar& y) + { + EIGEN_USING_STD_MATH(abs); + return positive_real_hypot(abs(x), abs(y)); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/third_party_includes/Eigen/src/Core/Matrix.h b/third_party_includes/Eigen/src/Core/Matrix.h index 90c336d..7f4a7af 100644 --- a/third_party_includes/Eigen/src/Core/Matrix.h +++ b/third_party_includes/Eigen/src/Core/Matrix.h @@ -274,8 +274,6 @@ class Matrix : Base(std::move(other)) { Base::_check_template_params(); - if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) - Base::_set_noalias(other); } EIGEN_DEVICE_FUNC Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) diff --git a/third_party_includes/Eigen/src/Core/MatrixBase.h b/third_party_includes/Eigen/src/Core/MatrixBase.h index ce41218..e6c3590 100644 --- a/third_party_includes/Eigen/src/Core/MatrixBase.h +++ b/third_party_includes/Eigen/src/Core/MatrixBase.h @@ -160,20 +160,11 @@ template class MatrixBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const MatrixBase& other); -#ifdef __CUDACC__ template EIGEN_DEVICE_FUNC - const Product - operator*(const MatrixBase &other) const - { return this->lazyProduct(other); } -#else - - template const Product operator*(const MatrixBase &other) const; -#endif - template EIGEN_DEVICE_FUNC const Product @@ -453,16 +444,24 @@ template class MatrixBase ///////// MatrixFunctions module ///////// typedef typename internal::stem_function::type StemFunction; - const MatrixExponentialReturnValue exp() const; +#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \ + /** \returns an expression of the matrix Description of \c *this. \brief This function requires the unsupported MatrixFunctions module. To compute the coefficient-wise Description use ArrayBase::##Name . */ \ + const ReturnType Name() const; +#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \ + /** \returns an expression of the matrix Description of \c *this. \brief This function requires the unsupported MatrixFunctions module. To compute the coefficient-wise Description use ArrayBase::##Name . */ \ + const ReturnType Name(Argument) const; + + EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential) + /** \brief Helper function for the unsupported MatrixFunctions module.*/ const MatrixFunctionReturnValue matrixFunction(StemFunction f) const; - const MatrixFunctionReturnValue cosh() const; - const MatrixFunctionReturnValue sinh() const; - const MatrixFunctionReturnValue cos() const; - const MatrixFunctionReturnValue sin() const; - const MatrixSquareRootReturnValue sqrt() const; - const MatrixLogarithmReturnValue log() const; - const MatrixPowerReturnValue pow(const RealScalar& p) const; - const MatrixComplexPowerReturnValue pow(const std::complex& p) const; + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine) + EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root) + EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm) + EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p) + EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex& p) protected: EIGEN_DEVICE_FUNC MatrixBase() : Base() {} diff --git a/third_party_includes/Eigen/src/Core/PlainObjectBase.h b/third_party_includes/Eigen/src/Core/PlainObjectBase.h index 77f4f60..1dc7e22 100644 --- a/third_party_includes/Eigen/src/Core/PlainObjectBase.h +++ b/third_party_includes/Eigen/src/Core/PlainObjectBase.h @@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned * \a data pointers. * + * Here is an example using strides: + * \include Matrix_Map_stride.cpp + * Output: \verbinclude Matrix_Map_stride.out + * * \see class Map */ //@{ diff --git a/third_party_includes/Eigen/src/Core/Product.h b/third_party_includes/Eigen/src/Core/Product.h index ae0c94b..676c480 100644 --- a/third_party_includes/Eigen/src/Core/Product.h +++ b/third_party_includes/Eigen/src/Core/Product.h @@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } @@ -127,7 +127,7 @@ class dense_product_base using Base::derived; typedef typename Base::Scalar Scalar; - operator const Scalar() const + EIGEN_STRONG_INLINE operator const Scalar() const { return internal::evaluator(derived()).coeff(0,0); } @@ -162,7 +162,7 @@ class ProductImpl public: - EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); @@ -170,7 +170,7 @@ class ProductImpl return internal::evaluator(derived()).coeff(row,col); } - EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); diff --git a/third_party_includes/Eigen/src/Core/ProductEvaluators.h b/third_party_includes/Eigen/src/Core/ProductEvaluators.h index c42725d..9b99bd7 100644 --- a/third_party_includes/Eigen/src/Core/ProductEvaluators.h +++ b/third_party_includes/Eigen/src/Core/ProductEvaluators.h @@ -32,7 +32,7 @@ struct evaluator > typedef Product XprType; typedef product_evaluator Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" @@ -55,7 +55,7 @@ struct evaluator, const Product > XprType; typedef evaluator > Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -68,7 +68,7 @@ struct evaluator, DiagIndex> > typedef Diagonal, DiagIndex> XprType; typedef evaluator, DiagIndex> > Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(Diagonal, DiagIndex>( Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index() )) @@ -246,19 +246,19 @@ template struct generic_product_impl { template - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } template - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } template - static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; @@ -312,25 +312,25 @@ struct generic_product_impl }; template - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); } template - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); } template - static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); } template - static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); } @@ -785,7 +785,11 @@ struct diagonal_product_evaluator_base _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), - Alignment = evaluator::Alignment + Alignment = evaluator::Alignment, + + AsScalarProduct = (DiagonalType::SizeAtCompileTime==1) + || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft) + || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight) }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -797,7 +801,10 @@ struct diagonal_product_evaluator_base EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { - return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + if(AsScalarProduct) + return m_diagImpl.coeff(0) * m_matImpl.coeff(idx); + else + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); } protected: diff --git a/third_party_includes/Eigen/src/Core/Redux.h b/third_party_includes/Eigen/src/Core/Redux.h index b6e8f88..760e9f8 100644 --- a/third_party_includes/Eigen/src/Core/Redux.h +++ b/third_party_includes/Eigen/src/Core/Redux.h @@ -407,7 +407,7 @@ class redux_evaluator */ template template -typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); diff --git a/third_party_includes/Eigen/src/Core/Ref.h b/third_party_includes/Eigen/src/Core/Ref.h index bdf24f5..9c6e3c5 100644 --- a/third_party_includes/Eigen/src/Core/Ref.h +++ b/third_party_includes/Eigen/src/Core/Ref.h @@ -95,6 +95,8 @@ template class RefBase template EIGEN_DEVICE_FUNC void construct(Expression& expr) { + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression); + if(PlainObjectType::RowsAtCompileTime==1) { eigen_assert(expr.rows()==1 || expr.cols()==1); diff --git a/third_party_includes/Eigen/src/Core/SelfAdjointView.h b/third_party_includes/Eigen/src/Core/SelfAdjointView.h index 504c98f..b2e51f3 100644 --- a/third_party_includes/Eigen/src/Core/SelfAdjointView.h +++ b/third_party_includes/Eigen/src/Core/SelfAdjointView.h @@ -71,7 +71,9 @@ template class SelfAdjointView EIGEN_DEVICE_FUNC explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) - {} + { + EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY); + } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } @@ -189,7 +191,7 @@ template class SelfAdjointView TriangularView >::type(tmp2); } - typedef SelfAdjointView ConjugateReturnType; + typedef SelfAdjointView ConjugateReturnType; /** \sa MatrixBase::conjugate() const */ EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const diff --git a/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h b/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h index 50099df..7c89c2e 100644 --- a/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -17,7 +17,6 @@ namespace Eigen { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } diff --git a/third_party_includes/Eigen/src/Core/SolveTriangular.h b/third_party_includes/Eigen/src/Core/SolveTriangular.h index 049890b..4652e2e 100644 --- a/third_party_includes/Eigen/src/Core/SolveTriangular.h +++ b/third_party_includes/Eigen/src/Core/SolveTriangular.h @@ -169,6 +169,9 @@ void TriangularViewImpl::solveInPlace(const MatrixBase::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1}; typedef typename internal::conditional::stableNorm() const typedef typename internal::nested_eval::type DerivedCopy; typedef typename internal::remove_all::type DerivedCopyClean; - DerivedCopy copy(derived()); + const DerivedCopy copy(derived()); enum { CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) diff --git a/third_party_includes/Eigen/src/Core/Transpositions.h b/third_party_includes/Eigen/src/Core/Transpositions.h index 19c17bb..86da5af 100644 --- a/third_party_includes/Eigen/src/Core/Transpositions.h +++ b/third_party_includes/Eigen/src/Core/Transpositions.h @@ -384,7 +384,7 @@ class Transpose > const Product operator*(const MatrixBase& matrix, const Transpose& trt) { - return Product(matrix.derived(), trt.derived()); + return Product(matrix.derived(), trt); } /** \returns the \a matrix with the inverse transpositions applied to the rows. diff --git a/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h b/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h index 99439c8..7fa6196 100644 --- a/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h +++ b/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h @@ -204,23 +204,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const - { return Packet4cf(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const - { return Packet4cf(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f) template<> EIGEN_STRONG_INLINE Packet4cf pdiv(const Packet4cf& a, const Packet4cf& b) { @@ -400,23 +384,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const - { return Packet2cd(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const - { return Packet2cd(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d) template<> EIGEN_STRONG_INLINE Packet2cd pdiv(const Packet2cd& a, const Packet2cd& b) { diff --git a/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h index 195d40f..923a124 100644 --- a/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h @@ -159,11 +159,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv(const Packet8i& /*a*/, co #ifdef __FMA__ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { -#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) - // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, - // and gcc stupidly generates a vfmadd132ps instruction, - // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate - // the result of the product. +#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) ) + // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, + // and even register spilling with clang>=6.0 (bug 1637). + // Gcc stupidly generates a vfmadd132ps instruction. + // So let's enforce it to generate a vfmadd231ps instruction since the most common use + // case is to accumulate the result of the product. Packet8f res = c; __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); return res; @@ -172,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& #endif } template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { -#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) +#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) ) // see above Packet4d res = c; __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); @@ -308,9 +309,9 @@ template<> EIGEN_STRONG_INLINE void pstore1(int* to, const int& a) } #ifndef EIGEN_VECTORIZE_AVX512 -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif template<> EIGEN_STRONG_INLINE float pfirst(const Packet8f& a) { @@ -333,9 +334,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a) { __m256d tmp = _mm256_shuffle_pd(a,a,5); return _mm256_permute2f128_pd(tmp, tmp, 1); - + #if 0 + // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd + // exhibit the same latency/throughput, but it is here for future reference/benchmarking... __m256d swap_halves = _mm256_permute2f128_pd(a,a,1); return _mm256_permute_pd(swap_halves,5); + #endif } // pabs should be ok diff --git a/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h b/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h index 399be0e..9c1717f 100644 --- a/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -88,9 +88,9 @@ plog(const Packet16f& _x) { // x = x + x - 1.0; // } else { x = x - 1.0; } __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ); - Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps()); + Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x); x = psub(x, p16f_1); - e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps())); + e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1)); x = padd(x, tmp); Packet16f x2 = pmul(x, x); @@ -119,8 +119,9 @@ plog(const Packet16f& _x) { x = padd(x, y2); // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF. - return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf, - _mm512_mask_blend_ps(invalid_mask, p16f_nan, x)); + return _mm512_mask_blend_ps(iszero_mask, + _mm512_mask_blend_ps(invalid_mask, x, p16f_nan), + p16f_minus_inf); } #endif @@ -266,8 +267,7 @@ psqrt(const Packet16f& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ); - Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x), - _mm512_setzero_ps()); + Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x)); // Do a single step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); @@ -289,8 +289,7 @@ psqrt(const Packet8d& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ); - Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x), - _mm512_setzero_pd()); + Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x)); // Do a first step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); @@ -333,20 +332,18 @@ prsqrt(const Packet16f& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ); - Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), - _mm512_rsqrt14_ps(_x)); + Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps()); // Fill in NaNs and Infs for the negative/zero entries. __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ); Packet16f infs_and_nans = _mm512_mask_blend_ps( - neg_mask, p16f_nan, - _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps())); + neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan); // Do a single step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); // Insert NaNs and Infs in all the right places. - return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x); + return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans); } template <> @@ -363,14 +360,12 @@ prsqrt(const Packet8d& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ); - Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), - _mm512_rsqrt14_pd(_x)); + Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd()); // Fill in NaNs and Infs for the negative/zero entries. __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ); Packet8d infs_and_nans = _mm512_mask_blend_pd( - neg_mask, p8d_nan, - _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd())); + neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan); // Do a first step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); @@ -379,9 +374,9 @@ prsqrt(const Packet8d& _x) { x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); // Insert NaNs and Infs in all the right places. - return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x); + return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans); } -#else +#elif defined(EIGEN_VECTORIZE_AVX512ER) template <> EIGEN_STRONG_INLINE Packet16f prsqrt(const Packet16f& x) { return _mm512_rsqrt28_ps(x); diff --git a/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h index f6500a1..5adddc7 100644 --- a/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1(int* to, const int& a) { pstore(to, pa); } -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template <> EIGEN_STRONG_INLINE float pfirst(const Packet16f& a) { @@ -648,13 +648,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a) template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) { // _mm512_abs_ps intrinsic not found, so hack around it - return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff)); + return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff))); } template <> EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { // _mm512_abs_ps intrinsic not found, so hack around it - return (__m512d)_mm512_and_si512((__m512i)a, - _mm512_set1_epi64(0x7fffffffffffffff)); + return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a), + _mm512_set1_epi64(0x7fffffffffffffff))); } #ifdef EIGEN_VECTORIZE_AVX512DQ diff --git a/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h b/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h index 67db2f8..3e66573 100644 --- a/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h @@ -224,23 +224,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const - { return Packet2cf(internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const - { return Packet2cf(internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { @@ -416,23 +400,8 @@ template<> struct conj_helper return pconj(internal::pmul(a, b)); } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const - { return Packet1cd(internal::pmul(x, y.v)); } -}; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const - { return Packet1cd(internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { diff --git a/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h index b3f1ea1..08a27d1 100755 --- a/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4u static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; #else -static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; +static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; @@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } -template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) +{ + #ifdef __VSX__ + Packet4f ret; + __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + #else + return vec_min(a, b); + #endif +} template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) +{ + #ifdef __VSX__ + Packet4f ret; + __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + #else + return vec_max(a, b); + #endif +} template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } @@ -764,7 +782,7 @@ typedef __vector __bool long Packet2bl; static Packet2l p2l_ONE = { 1, 1 }; static Packet2l p2l_ZERO = reinterpret_cast(p4i_ZERO); -static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ZERO = reinterpret_cast(p4f_ZERO); static Packet2d p2d_MZERO = { -0.0, -0.0 }; @@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const // for some weird raisons, it has to be overloaded for packet of integers template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } -template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) +{ + Packet2d ret; + __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + } -template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) +{ + Packet2d ret; + __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; +} template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } @@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) Packet2d v[2], sum; v[0] = vecs[0] + reinterpret_cast(vec_sld(reinterpret_cast(vecs[0]), reinterpret_cast(vecs[0]), 8)); v[1] = vecs[1] + reinterpret_cast(vec_sld(reinterpret_cast(vecs[1]), reinterpret_cast(vecs[1]), 8)); - + #ifdef _BIG_ENDIAN sum = reinterpret_cast(vec_sld(reinterpret_cast(v[0]), reinterpret_cast(v[1]), 8)); #else @@ -1022,7 +1050,7 @@ ptranspose(PacketBlock& kernel) { template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; - Packet2bl mask = vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p2l_ONE)); + Packet2bl mask = reinterpret_cast( vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p2l_ONE)) ); return vec_sel(elsePacket, thenPacket, mask); } #endif // __VSX__ diff --git a/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h b/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h index 294c517..755e620 100644 --- a/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h +++ b/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h @@ -29,7 +29,7 @@ // type Eigen::half (inheriting from CUDA's __half struct) with // operator overloads such that it behaves basically as an arithmetic // type. It will be quite slow on CPUs (so it is recommended to stay -// in fp32 for CPUs, except for simple parameter conversions, I/O +// in float32_bits for CPUs, except for simple parameter conversions, I/O // to disk and the likes), but fast on GPUs. @@ -50,38 +50,45 @@ struct half; namespace half_impl { #if !defined(EIGEN_HAS_CUDA_FP16) - -// Make our own __half definition that is similar to CUDA's. -struct __half { - EIGEN_DEVICE_FUNC __half() {} - explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {} +// Make our own __half_raw definition that is similar to CUDA's. +struct __half_raw { + EIGEN_DEVICE_FUNC __half_raw() : x(0) {} + explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {} unsigned short x; }; - +#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000 +// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw +typedef __half __half_raw; #endif -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x); -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff); -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h); -struct half_base : public __half { +struct half_base : public __half_raw { EIGEN_DEVICE_FUNC half_base() {} - EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {} - EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {} + EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {} + EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {} +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000 + EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {} +#endif }; } // namespace half_impl // Class definition. struct half : public half_impl::half_base { - #if !defined(EIGEN_HAS_CUDA_FP16) - typedef half_impl::__half __half; + #if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000) + typedef half_impl::__half_raw __half_raw; #endif EIGEN_DEVICE_FUNC half() {} - EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} + EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {} EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {} +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000 + EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} +#endif explicit EIGEN_DEVICE_FUNC half(bool b) : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {} @@ -138,71 +145,125 @@ struct half : public half_impl::half_base { } }; +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html + static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; + +// If std::numeric_limits is specialized, should also specialize +// std::numeric_limits, std::numeric_limits, and +// std::numeric_limits +// https://stackoverflow.com/a/16519653/ +template<> +struct numeric_limits : numeric_limits {}; +template<> +struct numeric_limits : numeric_limits {}; +template<> +struct numeric_limits : numeric_limits {}; +} // end namespace std + +namespace Eigen { + namespace half_impl { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 // Intrinsics for native fp16 support. Note that on current hardware, -// these are no faster than fp32 arithmetic (you need to use the half2 +// these are no faster than float32_bits arithmetic (you need to use the half2 // versions to get the ALU speed increased), but you do save the // conversion steps back and forth. -__device__ half operator + (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) { return __hadd(a, b); } -__device__ half operator * (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) { return __hmul(a, b); } -__device__ half operator - (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) { return __hsub(a, b); } -__device__ half operator / (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) { float num = __half2float(a); float denom = __half2float(b); return __float2half(num / denom); } -__device__ half operator - (const half& a) { +EIGEN_STRONG_INLINE __device__ half operator - (const half& a) { return __hneg(a); } -__device__ half& operator += (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) { a = a + b; return a; } -__device__ half& operator *= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) { a = a * b; return a; } -__device__ half& operator -= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) { a = a - b; return a; } -__device__ half& operator /= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) { a = a / b; return a; } -__device__ bool operator == (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) { return __heq(a, b); } -__device__ bool operator != (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) { return __hne(a, b); } -__device__ bool operator < (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) { return __hlt(a, b); } -__device__ bool operator <= (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) { return __hle(a, b); } -__device__ bool operator > (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) { return __hgt(a, b); } -__device__ bool operator >= (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) { return __hge(a, b); } #else // Emulate support for half floats // Definitions for CPUs and older CUDA, mostly working through conversion -// to/from fp32. +// to/from float32_bits. EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) { return half(float(a) + float(b)); @@ -238,10 +299,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) return a; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { - return float(a) == float(b); + return numext::equal_strict(float(a),float(b)); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { - return float(a) != float(b); + return numext::not_equal_strict(float(a), float(b)); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { return float(a) < float(b); @@ -269,34 +330,35 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) { // these in hardware. If we need more performance on older/other CPUs, they are // also possible to vectorize directly. -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) { - __half h; +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) { + __half_raw h; h.x = x; return h; } -union FP32 { +union float32_bits { unsigned int u; float f; }; -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 - return __float2half(ff); +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 + __half tmp_ff = __float2half(ff); + return *(__half_raw*)&tmp_ff; #elif defined(EIGEN_HAS_FP16_C) - __half h; + __half_raw h; h.x = _cvtss_sh(ff, 0); return h; #else - FP32 f; f.f = ff; + float32_bits f; f.f = ff; - const FP32 f32infty = { 255 << 23 }; - const FP32 f16max = { (127 + 16) << 23 }; - const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; + const float32_bits f32infty = { 255 << 23 }; + const float32_bits f16max = { (127 + 16) << 23 }; + const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; unsigned int sign_mask = 0x80000000u; - __half o; + __half_raw o; o.x = static_cast(0x0u); unsigned int sign = f.u & sign_mask; @@ -335,17 +397,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { #endif } -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 return __half2float(h); #elif defined(EIGEN_HAS_FP16_C) return _cvtsh_ss(h.x); #else - const FP32 magic = { 113 << 23 }; + const float32_bits magic = { 113 << 23 }; const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift - FP32 o; + float32_bits o; o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits unsigned int exp = shifted_exp & o.u; // just the exponent @@ -370,7 +432,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) { return (a.x & 0x7fff) == 0x7c00; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return __hisnan(a); #else return (a.x & 0x7fff) > 0x7c00; @@ -386,11 +448,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { - return half(::expf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hexp(a)); +#else + return half(::expf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return Eigen::half(::hlog(a)); +#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 + return half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -402,7 +468,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { - return half(::sqrtf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hsqrt(a)); +#else + return half(::sqrtf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -420,14 +490,22 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hfloor(a)); +#else return half(::floorf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hceil(a)); +#else return half(::ceilf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return __hlt(b, a) ? b : a; #else const float f1 = static_cast(a); @@ -436,7 +514,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { #endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return __hlt(a, b) ? b : a; #else const float f1 = static_cast(a); @@ -474,49 +552,6 @@ template<> struct is_arithmetic { enum { value = true }; }; } // end namespace internal -} // end namespace Eigen - -namespace std { -template<> -struct numeric_limits { - static const bool is_specialized = true; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = false; - static const bool has_infinity = true; - static const bool has_quiet_NaN = true; - static const bool has_signaling_NaN = true; - static const float_denorm_style has_denorm = denorm_present; - static const bool has_denorm_loss = false; - static const std::float_round_style round_style = std::round_to_nearest; - static const bool is_iec559 = false; - static const bool is_bounded = false; - static const bool is_modulo = false; - static const int digits = 11; - static const int digits10 = 2; - //static const int max_digits10 = ; - static const int radix = 2; - static const int min_exponent = -13; - static const int min_exponent10 = -4; - static const int max_exponent = 16; - static const int max_exponent10 = 4; - static const bool traps = true; - static const bool tinyness_before = false; - - static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } - static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } - static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } - static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } - static Eigen::half round_error() { return Eigen::half(0.5); } - static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } - static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } - static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } - static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } -}; -} - -namespace Eigen { - template<> struct NumTraits : GenericNumTraits { @@ -557,7 +592,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return Eigen::half(::hlog(a)); #else return Eigen::half(::logf(float(a))); @@ -591,14 +626,18 @@ struct hash { // Add the missing shfl_xor intrinsic -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { + #if EIGEN_CUDACC_VER < 90000 return static_cast(__shfl_xor(static_cast(var), laneMask, width)); + #else + return static_cast(__shfl_xor_sync(0xFFFFFFFF, static_cast(var), laneMask, width)); + #endif } #endif -// ldg() has an overload for __half, but we also need one for Eigen::half. -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +// ldg() has an overload for __half_raw, but we also need one for Eigen::half. +#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { return Eigen::half_impl::raw_uint16_to_half( __ldg(reinterpret_cast(ptr))); @@ -606,7 +645,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) #endif -#if defined(__CUDA_ARCH__) +#if defined(EIGEN_CUDA_ARCH) namespace Eigen { namespace numext { diff --git a/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225..c66d384 100644 --- a/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -99,7 +99,8 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& template<> __device__ EIGEN_STRONG_INLINE half2 pabs(const half2& a) { half2 result; - result.x = a.x & 0x7FFF7FFF; + unsigned temp = *(reinterpret_cast(&(a))); + *(reinterpret_cast(&(result))) = temp & 0x7FFF7FFF; return result; } @@ -275,7 +276,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 template<> __device__ EIGEN_STRONG_INLINE half2 plog(const half2& a) { diff --git a/third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h b/third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h new file mode 100644 index 0000000..4cfe34e --- /dev/null +++ b/third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h @@ -0,0 +1,29 @@ + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARCH_CONJ_HELPER_H +#define EIGEN_ARCH_CONJ_HELPER_H + +#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \ + template<> struct conj_helper { \ + EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \ + { return padd(c, pmul(x,y)); } \ + EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \ + { return PACKET_CPLX(Eigen::internal::pmul(x, y.v)); } \ + }; \ + \ + template<> struct conj_helper { \ + EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \ + { return padd(c, pmul(x,y)); } \ + EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \ + { return PACKET_CPLX(Eigen::internal::pmul(x.v, y)); } \ + }; + +#endif // EIGEN_ARCH_CONJ_HELPER_H diff --git a/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h b/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h index 57e9b43..306a309 100644 --- a/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h +++ b/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h @@ -67,7 +67,7 @@ template<> struct unpacket_traits { typedef std::complex type; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { float32x2_t r64; - r64 = vld1_f32((float *)&from); + r64 = vld1_f32((const float *)&from); return Packet2cf(vcombine_f32(r64, r64)); } @@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf to[stride*1] = std::complex(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((float *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((const float *)addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -265,6 +265,8 @@ template<> struct conj_helper } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) + template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for NEON @@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, con s = vmulq_f32(b.v, b.v); rev_s = vrev64q_f32(s); - return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); + return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); } EIGEN_DEVICE_FUNC inline void @@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((double *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((const double *)addr); } template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather, Packet1cd>(const std::complex* from, Index stride) { @@ -456,6 +458,8 @@ template<> struct conj_helper } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) + template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for NEON diff --git a/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h index 836fbc0..3d5ed0d 100644 --- a/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h @@ -36,12 +36,43 @@ namespace internal { #endif #endif +#if EIGEN_COMP_MSVC + +// In MSVC's arm_neon.h header file, all NEON vector types +// are aliases to the same underlying type __n128. +// We thus have to wrap them to make them different C++ types. +// (See also bug 1428) + +template +struct eigen_packet_wrapper +{ + operator T&() { return m_val; } + operator const T&() const { return m_val; } + eigen_packet_wrapper() {} + eigen_packet_wrapper(const T &v) : m_val(v) {} + eigen_packet_wrapper& operator=(const T &v) { + m_val = v; + return *this; + } + + T m_val; +}; +typedef eigen_packet_wrapper Packet2f; +typedef eigen_packet_wrapper Packet4f; +typedef eigen_packet_wrapper Packet4i; +typedef eigen_packet_wrapper Packet2i; +typedef eigen_packet_wrapper Packet4ui; + +#else + typedef float32x2_t Packet2f; typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; typedef int32x2_t Packet2i; typedef uint32x4_t Packet4ui; +#endif // EIGEN_COMP_MSVC + #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ const Packet4f p4f_##NAME = pset1(X) diff --git a/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h b/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h index 5607fe0..d075043 100644 --- a/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h +++ b/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h @@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -229,23 +229,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const - { return Packet2cf(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const - { return Packet2cf(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { @@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { @@ -430,23 +414,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const - { return Packet1cd(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const - { return Packet1cd(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { diff --git a/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h index 3832de1..60e2517 100755 --- a/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h @@ -28,7 +28,7 @@ namespace internal { #endif #endif -#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004) +#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot // have overloads for both types without linking error. // One solution is to increase ABI version using -fabi-version=4 (or greater). @@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1(double* to, const double& pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); } +#if EIGEN_COMP_PGI +typedef const void * SsePrefetchPtrType; +#else +typedef const char * SsePrefetchPtrType; +#endif + #ifndef EIGEN_VECTORIZE_AVX -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 @@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co } // end namespace Eigen +#if EIGEN_COMP_PGI +// PGI++ does not define the following intrinsics in C++ mode. +static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); } +static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); } +static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); } +static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); } +static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); } +static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); } +#endif + #endif // EIGEN_PACKET_MATH_SSE_H diff --git a/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h b/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h index c848932..c6ca8c7 100644 --- a/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -14,6 +14,7 @@ namespace Eigen { namespace internal { +#ifndef EIGEN_VECTORIZE_AVX template <> struct type_casting_traits { enum { @@ -23,11 +24,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { - return _mm_cvttps_epi32(a); -} - - template <> struct type_casting_traits { enum { @@ -37,11 +33,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { - return _mm_cvtepi32_ps(a); -} - - template <> struct type_casting_traits { enum { @@ -51,10 +42,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d& a, const Packet2d& b) { - return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); -} - template <> struct type_casting_traits { enum { @@ -63,6 +50,19 @@ struct type_casting_traits { TgtCoeffRatio = 2 }; }; +#endif + +template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { + return _mm_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { + return _mm_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d& a, const Packet2d& b) { + return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); +} template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f& a) { // Simply discard the second half of the input diff --git a/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h b/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h index d39d2d1..1bfb733 100644 --- a/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h +++ b/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h @@ -336,6 +336,9 @@ template<> struct conj_helper } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) + template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec diff --git a/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h b/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h index 96747ba..3eae6b8 100644 --- a/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h +++ b/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h @@ -255,7 +255,7 @@ struct scalar_cmp_op : binary_op_base struct scalar_hypot_op : binary_op_base { EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const { - EIGEN_USING_STD_MATH(sqrt) - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - return p * sqrt(Scalar(1) + qp*qp); + // This functor is used by hypotNorm only for which it is faster to first apply abs + // on all coefficients prior to reduction through hypot. + // This way we avoid calling abs on positive and real entries, and this also permits + // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes + // through the same functor... + return internal::positive_real_hypot(x,y); } }; template diff --git a/third_party_includes/Eigen/src/Core/functors/StlFunctors.h b/third_party_includes/Eigen/src/Core/functors/StlFunctors.h index 6df3fa5..9c1d758 100644 --- a/third_party_includes/Eigen/src/Core/functors/StlFunctors.h +++ b/third_party_includes/Eigen/src/Core/functors/StlFunctors.h @@ -83,13 +83,17 @@ struct functor_traits > { enum { Cost = functor_traits::Cost, PacketAccess = false }; }; #endif +#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910) +// std::unary_negate is deprecated since c++17 and will be removed in c++20 template struct functor_traits > { enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; +// std::binary_negate is deprecated since c++17 and will be removed in c++20 template struct functor_traits > { enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; +#endif #ifdef EIGEN_STDEXT_SUPPORT diff --git a/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 45230bc..e3980f6 100644 --- a/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1197,10 +1197,16 @@ void gebp_kernel=6 without FMA (bug 1637) + #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) + #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1)); + #else + #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND + #endif + #define EIGEN_GEBGP_ONESTEP(K) \ do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \ - EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ @@ -1212,6 +1218,7 @@ void gebp_kernel::half SResPacketHalf; + const int SResPacketHalfSize = unpacket_traits::half>::size; if ((SwappedTraits::LhsProgress % 4) == 0 && (SwappedTraits::LhsProgress <= 8) && - (SwappedTraits::LhsProgress!=8 || unpacket_traits::size==nr)) + (SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr)) { SAccPacket C0, C1, C2, C3; straits.initAcc(C0); diff --git a/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 41e18ff..f6f9ebe 100644 --- a/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product& blocking) \ { \ - if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \ general_matrix_matrix_rankupdate \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ @@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate(lhsStride), ldc=convert_index(resStride), n=convert_index(size), k=convert_index(depth); \ char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \ EIGTYPE beta(1); \ - BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \ + BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \ } \ }; @@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate(b_tmp.outerStride()); \ } else b = _rhs; \ \ - BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ }}; -GEMM_SPECIALIZATION(double, d, double, d) -GEMM_SPECIALIZATION(float, f, float, s) -GEMM_SPECIALIZATION(dcomplex, cd, double, z) -GEMM_SPECIALIZATION(scomplex, cf, float, c) +#ifdef EIGEN_USE_MKL +GEMM_SPECIALIZATION(double, d, double, dgemm) +GEMM_SPECIALIZATION(float, f, float, sgemm) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm) +#else +GEMM_SPECIALIZATION(double, d, double, dgemm_) +GEMM_SPECIALIZATION(float, f, float, sgemm_) +GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_) +GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_) +#endif } // end namespase internal diff --git a/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h b/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h index 3c1a7fc..a597c1f 100644 --- a/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h @@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product \ struct general_matrix_vector_product_gemv \ { \ @@ -113,14 +113,21 @@ static void run( \ x_ptr=x_tmp.data(); \ incx=1; \ } else x_ptr=rhs; \ - BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ + BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; -EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d) -EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s) -EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z) -EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv) +#else +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_) +#endif } // end namespase internal diff --git a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h index a45238d..9a53185 100644 --- a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +++ b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h @@ -40,7 +40,7 @@ namespace internal { /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ -#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -81,13 +81,13 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } else b = _rhs; \ \ - BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -144,20 +144,26 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } \ \ - BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -EIGEN_BLAS_SYMM_L(double, double, d, d) -EIGEN_BLAS_SYMM_L(float, float, f, s) -EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z) -EIGEN_BLAS_HEMM_L(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMM_L(double, double, d, dsymm) +EIGEN_BLAS_SYMM_L(float, float, f, ssymm) +EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm) +EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm) +#else +EIGEN_BLAS_SYMM_L(double, double, d, dsymm_) +EIGEN_BLAS_SYMM_L(float, float, f, ssymm_) +EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_) +EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_) +#endif /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ -#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -197,13 +203,13 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } else b = _lhs; \ \ - BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -259,15 +265,21 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } \ \ - BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ } \ }; -EIGEN_BLAS_SYMM_R(double, double, d, d) -EIGEN_BLAS_SYMM_R(float, float, f, s) -EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z) -EIGEN_BLAS_HEMM_R(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMM_R(double, double, d, dsymm) +EIGEN_BLAS_SYMM_R(float, float, f, ssymm) +EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm) +EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm) +#else +EIGEN_BLAS_SYMM_R(double, double, d, dsymm_) +EIGEN_BLAS_SYMM_R(float, float, f, ssymm_) +EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_) +EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_) +#endif } // end namespace internal } // end namespace Eigen diff --git a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h index 38f23ac..1238345 100644 --- a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +++ b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h @@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \ x_tmp=map_x.conjugate(); \ x_ptr=x_tmp.data(); \ } else x_ptr=_rhs; \ - BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ + BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) +#else EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_) +#endif } // end namespace internal diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h index 6ec5a8a..f784507 100644 --- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix triangularBuffer((internal::constructor_without_unaligned_array_assert())); + // To work around an "error: member reference base type 'Matrix<...> + // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is + // not a structure or union" compilation error in nvcc (tested V8.0.61), + // create a dummy internal::constructor_without_unaligned_array_assert + // object to pass to the Matrix constructor. + internal::constructor_without_unaligned_array_assert a; + Matrix triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix triangularBuffer((internal::constructor_without_unaligned_array_assert())); + internal::constructor_without_unaligned_array_assert a; + Matrix triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -393,7 +400,9 @@ struct triangular_product_impl { template static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typedef typename Dest::Scalar Scalar; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar Scalar; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; @@ -405,8 +414,9 @@ struct triangular_product_impl typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) - * RhsBlasTraits::extractScalarFactor(a_rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs); + Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha; typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -431,6 +441,21 @@ struct triangular_product_impl &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha, blocking ); + + // Apply correction if the diagonal is unit and a scalar factor was nested: + if ((Mode&UnitDiag)==UnitDiag) + { + if (LhsIsTriangular && lhs_alpha!=LhsScalar(1)) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize); + } + else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1)) + { + Index diagSize = (std::min)(rhs.rows(),rhs.cols()); + dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize); + } + } } }; diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h index aecded6..a25197a 100644 --- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h @@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false) // implements col-major += alpha * op(triangular) * op(general) -#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm \ @@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm struct trmv_selector typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 @@ -274,6 +275,12 @@ template struct trmv_selector else dest = MappedDest(actualDestPtr, dest.size()); } + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; @@ -295,8 +302,9 @@ template struct trmv_selector typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 @@ -326,6 +334,12 @@ template struct trmv_selector actualRhsPtr,1, dest.data(),dest.innerStride(), actualAlpha); + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h index 07bf26c..3d47a2b 100644 --- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h @@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex) EIGEN_BLAS_TRMV_SPECIALIZE(scomplex) // implements col-major: res += alpha * op(triangular) * vector -#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \ template \ struct triangular_matrix_vector_product_trmv { \ enum { \ @@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv(size); \ n = convert_index(cols-size); \ } \ - BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ + BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_BLAS_TRMV_CM(double, double, d, d) -EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z) -EIGEN_BLAS_TRMV_CM(float, float, f, s) -EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMV_CM(double, double, d, d,) +EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,) +EIGEN_BLAS_TRMV_CM(float, float, f, s,) +EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,) +#else +EIGEN_BLAS_TRMV_CM(double, double, d, d, _) +EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _) +EIGEN_BLAS_TRMV_CM(float, float, f, s, _) +EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _) +#endif // implements row-major: res += alpha * op(triangular) * vector -#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \ template \ struct triangular_matrix_vector_product_trmv { \ enum { \ @@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv(size); \ n = convert_index(cols-size); \ } \ - BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ + BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_BLAS_TRMV_RM(double, double, d, d) -EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z) -EIGEN_BLAS_TRMV_RM(float, float, f, s) -EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMV_RM(double, double, d, d,) +EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,) +EIGEN_BLAS_TRMV_RM(float, float, f, s,) +EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,) +#else +EIGEN_BLAS_TRMV_RM(double, double, d, d,_) +EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_) +EIGEN_BLAS_TRMV_RM(float, float, f, s,_) +EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_) +#endif } // end namespase internal diff --git a/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h index 88c0fb7..f077511 100644 --- a/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +++ b/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h @@ -38,7 +38,7 @@ namespace Eigen { namespace internal { // implements LeftSide op(triangular)^-1 * general -#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \ +#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \ template \ struct triangular_solve_matrix \ { \ @@ -80,18 +80,24 @@ struct triangular_solve_matrix \ struct triangular_solve_matrix \ { \ @@ -133,16 +139,22 @@ struct triangular_solve_matrix=6 +#elif defined __GNUC__ - #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) #pragma GCC diagnostic push #endif - #pragma GCC diagnostic ignored "-Wignored-attributes" + // g++ warns about local variables shadowing member functions, which is too strict + #pragma GCC diagnostic ignored "-Wshadow" + #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 + // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions: + #pragma GCC diagnostic ignored "-Wtype-limits" + #endif + #if __GNUC__>=6 + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif #endif diff --git a/third_party_includes/Eigen/src/Core/util/MKL_support.h b/third_party_includes/Eigen/src/Core/util/MKL_support.h index 26b5966..b7d6ecc 100755 --- a/third_party_includes/Eigen/src/Core/util/MKL_support.h +++ b/third_party_includes/Eigen/src/Core/util/MKL_support.h @@ -49,10 +49,11 @@ #define EIGEN_USE_LAPACKE #endif -#if defined(EIGEN_USE_MKL_VML) +#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL) #define EIGEN_USE_MKL #endif + #if defined EIGEN_USE_MKL # include /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ @@ -108,6 +109,10 @@ #endif #endif +#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL) +#include "../../misc/blas.h" +#endif + namespace Eigen { typedef std::complex dcomplex; @@ -121,8 +126,5 @@ typedef int BlasIndex; } // end namespace Eigen -#if defined(EIGEN_USE_BLAS) -#include "../../misc/blas.h" -#endif #endif // EIGEN_MKL_SUPPORT_H diff --git a/third_party_includes/Eigen/src/Core/util/Macros.h b/third_party_includes/Eigen/src/Core/util/Macros.h index 38d6ddb..aa054a0 100644 --- a/third_party_includes/Eigen/src/Core/util/Macros.h +++ b/third_party_includes/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 4 +#define EIGEN_MINOR_VERSION 7 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -399,7 +399,7 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 @@ -413,7 +413,7 @@ #ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above -#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) +#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ @@ -487,11 +487,13 @@ // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. +#ifndef EIGEN_STRONG_INLINE #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline #endif +#endif // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible // attribute to maximize inlining. This should only be used when really necessary: in particular, @@ -812,7 +814,8 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0) + // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) @@ -986,7 +989,13 @@ namespace Eigen { # define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NO_THROW throw() -# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# if EIGEN_COMP_MSVC + // MSVC does not support exception specifications (warning C4290), + // and they are deprecated in c++11 anyway. +# define EIGEN_EXCEPTION_SPEC(X) throw() +# else +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# endif #endif #endif // EIGEN_MACROS_H diff --git a/third_party_includes/Eigen/src/Core/util/Memory.h b/third_party_includes/Eigen/src/Core/util/Memory.h index c634d7e..291383c 100644 --- a/third_party_includes/Eigen/src/Core/util/Memory.h +++ b/third_party_includes/Eigen/src/Core/util/Memory.h @@ -70,7 +70,7 @@ inline void throw_std_bad_alloc() throw std::bad_alloc(); #else std::size_t huge = static_cast(-1); - new int[huge]; + ::operator new(huge); #endif } @@ -493,7 +493,7 @@ template struct smart_copy_helper { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); - memcpy(target, start, size); + std::memcpy(target, start, size); } }; @@ -696,7 +696,15 @@ template void swap(scoped_array &a,scoped_array &b) /** \class aligned_allocator * \ingroup Core_Module * -* \brief STL compatible allocator to use with with 16 byte aligned types +* \brief STL compatible allocator to use with types requiring a non standrad alignment. +* +* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. +* By default, it will thus provide at least 16 bytes alignment and more in following cases: +* - 32 bytes alignment if AVX is enabled. +* - 64 bytes alignment if AVX512 is enabled. +* +* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented +* \link TopicPreprocessorDirectivesPerformance there \endlink. * * Example: * \code @@ -739,7 +747,15 @@ class aligned_allocator : public std::allocator pointer allocate(size_type num, const void* /*hint*/ = 0) { internal::check_size_for_overflow(num); - return static_cast( internal::aligned_malloc(num * sizeof(T)) ); + size_type size = num * sizeof(T); +#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0) + // workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544 + // It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807 + if(size>=std::size_t((std::numeric_limits::max)())) + return 0; + else +#endif + return static_cast( internal::aligned_malloc(size) ); } void deallocate(pointer p, size_type /*num*/) diff --git a/third_party_includes/Eigen/src/Core/util/Meta.h b/third_party_includes/Eigen/src/Core/util/Meta.h index 7f63707..d31e954 100755 --- a/third_party_includes/Eigen/src/Core/util/Meta.h +++ b/third_party_includes/Eigen/src/Core/util/Meta.h @@ -109,6 +109,28 @@ template<> struct is_integral { enum { value = true }; }; template<> struct is_integral { enum { value = true }; }; template<> struct is_integral { enum { value = true }; }; +#if EIGEN_HAS_CXX11 +using std::make_unsigned; +#else +// TODO: Possibly improve this implementation of make_unsigned. +// It is currently used only by +// template struct random_default_impl. +template struct make_unsigned; +template<> struct make_unsigned { typedef unsigned char type; }; +template<> struct make_unsigned { typedef unsigned char type; }; +template<> struct make_unsigned { typedef unsigned char type; }; +template<> struct make_unsigned { typedef unsigned short type; }; +template<> struct make_unsigned { typedef unsigned short type; }; +template<> struct make_unsigned { typedef unsigned int type; }; +template<> struct make_unsigned { typedef unsigned int type; }; +template<> struct make_unsigned { typedef unsigned long type; }; +template<> struct make_unsigned { typedef unsigned long type; }; +#if EIGEN_COMP_MSVC +template<> struct make_unsigned { typedef unsigned __int64 type; }; +template<> struct make_unsigned { typedef unsigned __int64 type; }; +#endif +#endif + template struct add_const { typedef const T type; }; template struct add_const { typedef T& type; }; @@ -485,6 +507,26 @@ T div_ceil(const T &a, const T &b) return (a+b-1) / b; } +// The aim of the following functions is to bypass -Wfloat-equal warnings +// when we really want a strict equality comparison on floating points. +template EIGEN_STRONG_INLINE +bool equal_strict(const X& x,const Y& y) { return x == y; } + +template<> EIGEN_STRONG_INLINE +bool equal_strict(const float& x,const float& y) { return std::equal_to()(x,y); } + +template<> EIGEN_STRONG_INLINE +bool equal_strict(const double& x,const double& y) { return std::equal_to()(x,y); } + +template EIGEN_STRONG_INLINE +bool not_equal_strict(const X& x,const Y& y) { return x != y; } + +template<> EIGEN_STRONG_INLINE +bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to()(x,y); } + +template<> EIGEN_STRONG_INLINE +bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to()(x,y); } + } // end namespace numext } // end namespace Eigen diff --git a/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h b/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h index 86b60f5..ecc82b7 100644 --- a/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h +++ b/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -8,7 +8,7 @@ #pragma warning pop #elif defined __clang__ #pragma clang diagnostic pop - #elif defined __GNUC__ && __GNUC__>=6 + #elif defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) #pragma GCC diagnostic pop #endif diff --git a/third_party_includes/Eigen/src/Core/util/StaticAssert.h b/third_party_includes/Eigen/src/Core/util/StaticAssert.h index 983361a..500e477 100644 --- a/third_party_includes/Eigen/src/Core/util/StaticAssert.h +++ b/third_party_includes/Eigen/src/Core/util/StaticAssert.h @@ -24,6 +24,7 @@ * */ +#ifndef EIGEN_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) @@ -44,64 +45,65 @@ struct static_assertion { enum { - YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, - YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, - YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES, - THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, - OUT_OF_RANGE_ACCESS, - YOU_MADE_A_PROGRAMMING_MISTAKE, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, - EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, - YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, - YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, - UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, - THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES, - FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED, - NUMERIC_TYPE_MUST_BE_REAL, - COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED, - WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED, - THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE, - INVALID_MATRIX_PRODUCT, - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS, - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION, - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, - THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, - INVALID_MATRIX_TEMPLATE_PARAMETERS, - INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, - BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, - THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, - YOU_ALREADY_SPECIFIED_THIS_STRIDE, - INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION, - THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD, - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1, - THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS, - YOU_CANNOT_MIX_ARRAYS_AND_MATRICES, - YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION, - THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, - YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, - THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, - THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, - THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, - YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, - YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, - THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, - THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, - OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, - IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, - THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED, - STORAGE_KIND_MUST_MATCH, - STORAGE_INDEX_MUST_MATCH, - CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1, + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1, + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1, + THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1, + OUT_OF_RANGE_ACCESS=1, + YOU_MADE_A_PROGRAMMING_MISTAKE=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1, + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1, + YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1, + YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1, + UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1, + THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1, + NUMERIC_TYPE_MUST_BE_REAL=1, + COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1, + WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1, + INVALID_MATRIX_PRODUCT=1, + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1, + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1, + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1, + THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1, + INVALID_MATRIX_TEMPLATE_PARAMETERS=1, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1, + BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1, + THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1, + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1, + YOU_ALREADY_SPECIFIED_THIS_STRIDE=1, + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1, + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1, + THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1, + YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1, + THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1, + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1, + THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1, + THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1, + YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1, + YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1, + THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1, + THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1, + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1, + IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1, + STORAGE_LAYOUT_DOES_NOT_MATCH=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1, + THIS_TYPE_IS_NOT_SUPPORTED=1, + STORAGE_KIND_MUST_MATCH=1, + STORAGE_INDEX_MUST_MATCH=1, + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, + SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1 }; }; @@ -131,7 +133,7 @@ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #endif // EIGEN_NO_STATIC_ASSERT - +#endif // EIGEN_STATIC_ASSERT // static assertion failing if the type \a TYPE is not a vector type #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ diff --git a/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 36a91df..87d789b 100644 --- a/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -311,7 +311,6 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp // Aliases: Map v(reinterpret_cast(m_tmp.data()), size); ComplexVectorType &cv = m_tmp; - const MatrixType &mZ = m_realQZ.matrixZ(); const MatrixType &mS = m_realQZ.matrixS(); const MatrixType &mT = m_realQZ.matrixT(); @@ -351,7 +350,7 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } } } - m_eivec.col(i).real().noalias() = mZ.transpose() * v; + m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v; m_eivec.col(i).real().normalize(); m_eivec.col(i).imag().setConstant(0); } @@ -400,7 +399,7 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp / (alpha*mT.coeffRef(j,j) - static_cast(beta*mS.coeffRef(j,j))); } } - m_eivec.col(i+1).noalias() = (mZ.transpose() * cv); + m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv); m_eivec.col(i+1).normalize(); m_eivec.col(i) = m_eivec.col(i+1).conjugate(); } diff --git a/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h index 4fec8af..e4e4260 100644 --- a/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +++ b/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h @@ -66,7 +66,6 @@ template inline typename MatrixBase::EigenvaluesReturnType MatrixBase::eigenvalues() const { - typedef typename internal::traits::Scalar Scalar; return internal::eigenvalues_selector::IsComplex>::run(derived()); } @@ -88,7 +87,6 @@ template inline typename SelfAdjointView::EigenvaluesReturnType SelfAdjointView::eigenvalues() const { - typedef typename SelfAdjointView::PlainObject PlainObject; PlainObject thisAsMatrix(*this); return SelfAdjointEigenSolver(thisAsMatrix, false).eigenvalues(); } diff --git a/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h b/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h index f5c8604..17ea903 100644 --- a/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h +++ b/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h @@ -303,7 +303,7 @@ RealSchur& RealSchur::computeFromHessenberg(const HessMa Scalar exshift(0); // sum of exceptional shifts Scalar norm = computeNormOfT(); - if(norm!=0) + if(norm!=Scalar(0)) { while (iu >= 0) { @@ -327,7 +327,7 @@ RealSchur& RealSchur::computeFromHessenberg(const HessMa else // No convergence yet { // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG ) - Vector3s firstHouseholderVector(0,0,0), shiftInfo; + Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo; computeShift(iu, iter, exshift, shiftInfo); iter = iter + 1; totalIter = totalIter + 1; diff --git a/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h b/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h index 3891cf8..b0c947d 100644 --- a/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +++ b/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h @@ -37,7 +37,7 @@ namespace Eigen { /** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \ +#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \ template<> template inline \ SelfAdjointEigenSolver >& \ SelfAdjointEigenSolver >::compute(const EigenBase& matrix, int options) \ @@ -47,7 +47,7 @@ SelfAdjointEigenSolver >::compute(c && (options&EigVecMask)!=EigVecMask \ && "invalid option parameter"); \ bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ - lapack_int n = internal::convert_index(matrix.cols()), lda, matrix_order, info; \ + lapack_int n = internal::convert_index(matrix.cols()), lda, info; \ m_eivalues.resize(n,1); \ m_subdiag.resize(n-1); \ m_eivec = matrix; \ @@ -63,27 +63,24 @@ SelfAdjointEigenSolver >::compute(c } \ \ lda = internal::convert_index(m_eivec.outerStride()); \ - matrix_order=LAPACKE_COLROW; \ char jobz, uplo='L'/*, range='A'*/; \ jobz = computeEigenvectors ? 'V' : 'N'; \ \ - info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ + info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ m_info = (info==0) ? Success : NoConvergence; \ m_isInitialized = true; \ m_eigenvectorsOk = computeEigenvectors; \ return *this; \ } +#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME ) \ + EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor ) \ + EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor ) -EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR) - -EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev) +EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev) +EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev) +EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev) } // end namespace Eigen diff --git a/third_party_includes/Eigen/src/Geometry/AngleAxis.h b/third_party_includes/Eigen/src/Geometry/AngleAxis.h index 0af3c1b..83ee1be 100644 --- a/third_party_includes/Eigen/src/Geometry/AngleAxis.h +++ b/third_party_includes/Eigen/src/Geometry/AngleAxis.h @@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis& AngleAxis::operator=(const Quaterni if (n != Scalar(0)) { m_angle = Scalar(2)*atan2(n, abs(q.w())); - if(q.w() < 0) + if(q.w() < Scalar(0)) n = -n; m_axis = q.vec() / n; } diff --git a/third_party_includes/Eigen/src/Geometry/Quaternion.h b/third_party_includes/Eigen/src/Geometry/Quaternion.h index 3e5a9ba..c3fd8c3 100644 --- a/third_party_includes/Eigen/src/Geometry/Quaternion.h +++ b/third_party_includes/Eigen/src/Geometry/Quaternion.h @@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; typedef typename internal::traits::Coefficients Coefficients; + typedef typename Coefficients::CoeffReturnType CoeffReturnType; + typedef typename internal::conditional::Flags&LvalueBit), + Scalar&, CoeffReturnType>::type NonConstCoeffReturnType; + + enum { Flags = Eigen::internal::traits::Flags }; @@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase /** \returns the \c x coefficient */ - EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); } + EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); } /** \returns the \c y coefficient */ - EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); } + EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); } /** \returns the \c z coefficient */ - EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); } + EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); } /** \returns the \c w coefficient */ - EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); } - - /** \returns a reference to the \c x coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } - /** \returns a reference to the \c y coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } - /** \returns a reference to the \c z coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } - /** \returns a reference to the \c w coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } + EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); } + + /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); } + /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); } + /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); } + /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); } /** \returns a read-only vector expression of the imaginary part (x,y,z) */ EIGEN_DEVICE_FUNC inline const VectorBlock vec() const { return coeffs().template head<3>(); } diff --git a/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index facdaf8..f66c846 100644 --- a/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -168,7 +168,7 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { for(Index j=0; jRealScalar(0)) m_invdiag(j) = RealScalar(1)/sum; else diff --git a/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 395daa8..f7ce471 100644 --- a/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -50,7 +50,8 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x, tol_error = 0; return; } - RealScalar threshold = tol*tol*rhsNorm2; + const RealScalar considerAsZero = (std::numeric_limits::min)(); + RealScalar threshold = numext::maxi(tol*tol*rhsNorm2,considerAsZero); RealScalar residualNorm2 = residual.squaredNorm(); if (residualNorm2 < threshold) { @@ -58,7 +59,7 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x, tol_error = sqrt(residualNorm2 / rhsNorm2); return; } - + VectorType p(n); p = precond.solve(residual); // initial search direction diff --git a/third_party_includes/Eigen/src/Jacobi/Jacobi.h b/third_party_includes/Eigen/src/Jacobi/Jacobi.h index c30326e..1998c63 100644 --- a/third_party_includes/Eigen/src/Jacobi/Jacobi.h +++ b/third_party_includes/Eigen/src/Jacobi/Jacobi.h @@ -65,11 +65,11 @@ template class JacobiRotation bool makeJacobi(const MatrixBase&, Index p, Index q); bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z); - void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0); + void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0); protected: - void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::true_type); - void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::false_type); + void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type); + void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type); Scalar m_c, m_s; }; @@ -84,7 +84,6 @@ bool JacobiRotation::makeJacobi(const RealScalar& x, const Scalar& y, co { using std::sqrt; using std::abs; - typedef typename NumTraits::Real RealScalar; RealScalar deno = RealScalar(2)*abs(y); if(deno < (std::numeric_limits::min)()) { @@ -133,7 +132,7 @@ inline bool JacobiRotation::makeJacobi(const MatrixBase& m, Ind * \f$ V = \left ( \begin{array}{c} p \\ q \end{array} \right )\f$ yields: * \f$ G^* V = \left ( \begin{array}{c} r \\ 0 \end{array} \right )\f$. * - * The value of \a z is returned if \a z is not null (the default is null). + * The value of \a r is returned if \a r is not null (the default is null). * Also note that G is built such that the cosine is always real. * * Example: \include Jacobi_makeGivens.cpp @@ -146,9 +145,9 @@ inline bool JacobiRotation::makeJacobi(const MatrixBase& m, Ind * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight() */ template -void JacobiRotation::makeGivens(const Scalar& p, const Scalar& q, Scalar* z) +void JacobiRotation::makeGivens(const Scalar& p, const Scalar& q, Scalar* r) { - makeGivens(p, q, z, typename internal::conditional::IsComplex, internal::true_type, internal::false_type>::type()); + makeGivens(p, q, r, typename internal::conditional::IsComplex, internal::true_type, internal::false_type>::type()); } @@ -298,61 +297,119 @@ inline void MatrixBase::applyOnTheRight(Index p, Index q, const JacobiR } namespace internal { -template -void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x, DenseBase& xpr_y, const JacobiRotation& j) + +template +struct apply_rotation_in_the_plane_selector { - typedef typename VectorX::Scalar Scalar; - enum { - PacketSize = packet_traits::size, - OtherPacketSize = packet_traits::size - }; - typedef typename packet_traits::type Packet; - typedef typename packet_traits::type OtherPacket; - eigen_assert(xpr_x.size() == xpr_y.size()); - Index size = xpr_x.size(); - Index incrx = xpr_x.derived().innerStride(); - Index incry = xpr_y.derived().innerStride(); + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + for(Index i=0; i +struct apply_rotation_in_the_plane_selector +{ + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + enum { + PacketSize = packet_traits::size, + OtherPacketSize = packet_traits::size + }; + typedef typename packet_traits::type Packet; + typedef typename packet_traits::type OtherPacket; + + /*** dynamic-size vectorized paths ***/ + if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1)) + { + // both vectors are sequentially stored in memory => vectorization + enum { Peeling = 2 }; - /*** dynamic-size vectorized paths ***/ + Index alignedStart = internal::first_default_aligned(y, size); + Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - if(VectorX::SizeAtCompileTime == Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (PacketSize == OtherPacketSize) && - ((incrx==1 && incry==1) || PacketSize == 1)) - { - // both vectors are sequentially stored in memory => vectorization - enum { Peeling = 2 }; + const OtherPacket pc = pset1(c); + const OtherPacket ps = pset1(s); + conj_helper::IsComplex,false> pcj; + conj_helper pm; - Index alignedStart = internal::first_default_aligned(y, size); - Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; + for(Index i=0; i(c); - const OtherPacket ps = pset1(s); - conj_helper::IsComplex,false> pcj; - conj_helper pm; + Scalar* EIGEN_RESTRICT px = x + alignedStart; + Scalar* EIGEN_RESTRICT py = y + alignedStart; - for(Index i=0; i(px); + Packet yi = pload(py); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + px += PacketSize; + py += PacketSize; + } + } + else + { + Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); + for(Index i=alignedStart; i(px); + Packet xi1 = ploadu(px+PacketSize); + Packet yi = pload (py); + Packet yi1 = pload (py+PacketSize); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); + px += Peeling*PacketSize; + py += Peeling*PacketSize; + } + if(alignedEnd!=peelingEnd) + { + Packet xi = ploadu(x+peelingEnd); + Packet yi = pload (y+peelingEnd); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + } + } - Scalar* EIGEN_RESTRICT px = x + alignedStart; - Scalar* EIGEN_RESTRICT py = y + alignedStart; + for(Index i=alignedEnd; i0) // FIXME should be compared to the required alignment { - for(Index i=alignedStart; i(c); + const OtherPacket ps = pset1(s); + conj_helper::IsComplex,false> pcj; + conj_helper pm; + Scalar* EIGEN_RESTRICT px = x; + Scalar* EIGEN_RESTRICT py = y; + for(Index i=0; i(px); Packet yi = pload(py); @@ -362,76 +419,40 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x py += PacketSize; } } - else - { - Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); - for(Index i=alignedStart; i(px); - Packet xi1 = ploadu(px+PacketSize); - Packet yi = pload (py); - Packet yi1 = pload (py+PacketSize); - pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); - px += Peeling*PacketSize; - py += Peeling*PacketSize; - } - if(alignedEnd!=peelingEnd) - { - Packet xi = ploadu(x+peelingEnd); - Packet yi = pload (y+peelingEnd); - pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - } - } - for(Index i=alignedEnd; i::run(x,incrx,y,incry,size,c,s); } } +}; - /*** fixed-size vectorized path ***/ - else if(VectorX::SizeAtCompileTime != Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (PacketSize == OtherPacketSize) && - (EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment)>0)) // FIXME should be compared to the required alignment - { - const OtherPacket pc = pset1(c); - const OtherPacket ps = pset1(s); - conj_helper::IsComplex,false> pcj; - conj_helper pm; - Scalar* EIGEN_RESTRICT px = x; - Scalar* EIGEN_RESTRICT py = y; - for(Index i=0; i(px); - Packet yi = pload(py); - pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - px += PacketSize; - py += PacketSize; - } - } +template +void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x, DenseBase& xpr_y, const JacobiRotation& j) +{ + typedef typename VectorX::Scalar Scalar; + const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit) + && (int(packet_traits::size) == int(packet_traits::size)); - /*** non-vectorized path ***/ - else - { - for(Index i=0; i::Alignment, evaluator::Alignment), + Vectorizable>::run(x,incrx,y,incry,size,c,s); } } // end namespace internal diff --git a/third_party_includes/Eigen/src/LU/InverseImpl.h b/third_party_includes/Eigen/src/LU/InverseImpl.h index 018f99b..f49f233 100644 --- a/third_party_includes/Eigen/src/LU/InverseImpl.h +++ b/third_party_includes/Eigen/src/LU/InverseImpl.h @@ -404,7 +404,7 @@ inline void MatrixBase::computeInverseWithCheck( const RealScalar& absDeterminantThreshold ) const { - RealScalar determinant; + Scalar determinant; // i'd love to put some static assertions there, but SFINAE means that they have no effect... eigen_assert(rows() == cols()); computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); diff --git a/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h b/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h index d2ebfd7..160d8a5 100644 --- a/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -64,28 +64,28 @@ namespace internal typedef typename _MatrixType::StorageIndex StorageIndex; }; - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); } - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); } - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex *vals, int *perm, int * invp, std::complex *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex *vals, int *perm, int * invp, std::complex *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast(vals), perm, invp, reinterpret_cast(x), nbrhs, iparm, dparm); } - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex *vals, int *perm, int * invp, std::complex *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex *vals, int *perm, int * invp, std::complex *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} diff --git a/third_party_includes/Eigen/src/SVD/BDCSVD.h b/third_party_includes/Eigen/src/SVD/BDCSVD.h index d7a4271..1134d66 100644 --- a/third_party_includes/Eigen/src/SVD/BDCSVD.h +++ b/third_party_includes/Eigen/src/SVD/BDCSVD.h @@ -11,7 +11,7 @@ // Copyright (C) 2013 Jean Ceccato // Copyright (C) 2013 Pierre Zoppitelli // Copyright (C) 2013 Jitse Niesen -// Copyright (C) 2014-2016 Gael Guennebaud +// Copyright (C) 2014-2017 Gael Guennebaud // // Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -696,7 +696,9 @@ typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar for(Index i=0; i::computeSingVals(const ArrayRef& col0, const ArrayRef& d { using std::abs; using std::swap; + using std::sqrt; Index n = col0.size(); Index actual_n = n; + // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above + // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value. while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) @@ -732,7 +737,9 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d right = (diag(actual_n-1) + col0.matrix().norm()); else { - // Skip deflated singular values + // Skip deflated singular values, + // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside. + // This should be equivalent to using perm[] Index l = k+1; while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l::computeSingVals(const ArrayRef& col0, const ArrayRef& d RealScalar leftShifted, rightShifted; if (shift == left) { - leftShifted = (std::numeric_limits::min)(); + // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)), + // the factor 2 is to be more conservative + leftShifted = numext::maxi( (std::numeric_limits::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits::max)()) ); + + // check that we did it right: + eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) ); // I don't understand why the case k==0 would be special there: - // if (k == 0) rightShifted = right - left; else - rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe + // if (k == 0) rightShifted = right - left; else + rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe } else { - leftShifted = -(right - left) * RealScalar(0.6); - rightShifted = -(std::numeric_limits::min)(); + leftShifted = -(right - left) * RealScalar(0.51); + if(k+1( (std::numeric_limits::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits::max)()) ); + else + rightShifted = -(std::numeric_limits::min)(); } RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); @@ -980,7 +995,7 @@ void BDCSVD::deflation43(Index firstCol, Index shift, Index i, Index Index start = firstCol + shift; RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); - RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); + RealScalar r = numext::hypot(c,s); if (r == Literal(0)) { m_computed(start+i, start+i) = Literal(0); diff --git a/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h index 5027215..ff0516f 100644 --- a/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h +++ b/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h @@ -61,9 +61,10 @@ JacobiSVD, ColPiv u = (LAPACKE_TYPE*)m_matrixU.data(); \ } else { ldu=1; u=&dummy; }\ MatrixType localV; \ - ldvt = (m_computeFullV) ? internal::convert_index(m_cols) : (m_computeThinV) ? internal::convert_index(m_diagSize) : 1; \ + lapack_int vt_rows = (m_computeFullV) ? internal::convert_index(m_cols) : (m_computeThinV) ? internal::convert_index(m_diagSize) : 1; \ if (computeV()) { \ - localV.resize(ldvt, m_cols); \ + localV.resize(vt_rows, m_cols); \ + ldvt = internal::convert_index(localV.outerStride()); \ vt = (LAPACKE_TYPE*)localV.data(); \ } else { ldvt=1; vt=&dummy; }\ Matrix superb; superb.resize(m_diagSize, 1); \ diff --git a/third_party_includes/Eigen/src/SVD/SVDBase.h b/third_party_includes/Eigen/src/SVD/SVDBase.h index cc90a3b..3d1ef37 100644 --- a/third_party_includes/Eigen/src/SVD/SVDBase.h +++ b/third_party_includes/Eigen/src/SVD/SVDBase.h @@ -180,8 +180,10 @@ class SVDBase RealScalar threshold() const { eigen_assert(m_isInitialized || m_usePrescribedThreshold); + // this temporary is needed to workaround a MSVC issue + Index diagSize = (std::max)(1,m_diagSize); return m_usePrescribedThreshold ? m_prescribedThreshold - : (std::max)(1,m_diagSize)*NumTraits::epsilon(); + : diagSize*NumTraits::epsilon(); } /** \returns true if \a U (full or thin) is asked for in this SVD decomposition */ diff --git a/third_party_includes/Eigen/src/SparseCore/AmbiVector.h b/third_party_includes/Eigen/src/SparseCore/AmbiVector.h index 8a5cc91..e0295f2 100644 --- a/third_party_includes/Eigen/src/SparseCore/AmbiVector.h +++ b/third_party_includes/Eigen/src/SparseCore/AmbiVector.h @@ -94,7 +94,7 @@ class AmbiVector Index allocSize = m_allocatedElements * sizeof(ListEl); allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar); Scalar* newBuffer = new Scalar[allocSize]; - memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); + std::memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); delete[] m_buffer; m_buffer = newBuffer; } diff --git a/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 492eb0a..9db119b 100644 --- a/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -17,7 +17,9 @@ namespace internal { template static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) { - typedef typename remove_all::type::Scalar Scalar; + typedef typename remove_all::type::Scalar LhsScalar; + typedef typename remove_all::type::Scalar RhsScalar; + typedef typename remove_all::type::Scalar ResScalar; // make sure to call innerSize/outerSize since we fake the storage order. Index rows = lhs.innerSize(); @@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r eigen_assert(lhs.outerSize() == rhs.innerSize()); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); + ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); std::memset(mask,0,sizeof(bool)*rows); @@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r Index nnz = 0; for (typename evaluator::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { - Scalar y = rhsIt.value(); + RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); - Scalar x = lhsIt.value(); + LhsScalar x = lhsIt.value(); if(!mask[i]) { mask[i] = true; @@ -166,11 +168,12 @@ struct conservative_sparse_sparse_product_selector RowMajorMatrix; - RowMajorMatrix rhsRow = rhs; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, resRow); - res = resRow; + typedef SparseMatrix RowMajorRhs; + typedef SparseMatrix RowMajorRes; + RowMajorRhs rhsRow = rhs; + RowMajorRes resRow(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, resRow); + res = resRow; } }; @@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector RowMajorMatrix; - RowMajorMatrix lhsRow = lhs; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, resRow); + typedef SparseMatrix RowMajorLhs; + typedef SparseMatrix RowMajorRes; + RowMajorLhs lhsRow = lhs; + RowMajorRes resRow(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, resRow); res = resRow; } }; @@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector ColMajorMatrix; - ColMajorMatrix lhsCol = lhs; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(lhsCol, rhs, resCol); + typedef SparseMatrix ColMajorLhs; + typedef SparseMatrix ColMajorRes; + ColMajorLhs lhsCol = lhs; + ColMajorRes resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(lhsCol, rhs, resCol); res = resCol; } }; @@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector ColMajorMatrix; - ColMajorMatrix rhsCol = rhs; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(lhs, rhsCol, resCol); + typedef SparseMatrix ColMajorRhs; + typedef SparseMatrix ColMajorRes; + ColMajorRhs rhsCol = rhs; + ColMajorRes resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(lhs, rhsCol, resCol); res = resCol; } }; @@ -263,7 +269,8 @@ namespace internal { template static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef typename remove_all::type::Scalar Scalar; + typedef typename remove_all::type::Scalar LhsScalar; + typedef typename remove_all::type::Scalar RhsScalar; Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); @@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, { for (typename evaluator::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { - Scalar y = rhsIt.value(); + RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); - Scalar x = lhsIt.value(); + LhsScalar x = lhsIt.value(); res.coeffRef(i,j) += x * y; } } @@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector ColMajorMatrix; - ColMajorMatrix lhsCol(lhs); - internal::sparse_sparse_to_dense_product_impl(lhsCol, rhs, res); + typedef SparseMatrix ColMajorLhs; + ColMajorLhs lhsCol(lhs); + internal::sparse_sparse_to_dense_product_impl(lhsCol, rhs, res); } }; @@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector ColMajorMatrix; - ColMajorMatrix rhsCol(rhs); - internal::sparse_sparse_to_dense_product_impl(lhs, rhsCol, res); + typedef SparseMatrix ColMajorRhs; + ColMajorRhs rhsCol(rhs); + internal::sparse_sparse_to_dense_product_impl(lhs, rhsCol, res); } }; diff --git a/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h b/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h index 323c232..0a2490b 100644 --- a/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h +++ b/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h @@ -893,7 +893,7 @@ class SparseMatrix Index p = m_outerIndex[outer] + m_innerNonZeros[outer]++; m_data.index(p) = convert_index(inner); - return (m_data.value(p) = 0); + return (m_data.value(p) = Scalar(0)); } private: @@ -1274,7 +1274,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& m_innerNonZeros[outer]++; m_data.index(p) = inner; - return (m_data.value(p) = 0); + return (m_data.value(p) = Scalar(0)); } template @@ -1381,7 +1381,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar& } m_data.index(p) = inner; - return (m_data.value(p) = 0); + return (m_data.value(p) = Scalar(0)); } namespace internal { diff --git a/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h b/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h index 5ab64f1..65611b3 100644 --- a/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -311,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons while (i && i.index()::type::Scalar Scalar; + typedef typename remove_all::type::Scalar RhsScalar; + typedef typename remove_all::type::Scalar ResScalar; typedef typename remove_all::type::StorageIndex StorageIndex; // make sure to call innerSize/outerSize since we fake the storage order. @@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r eigen_assert(lhs.outerSize() == rhs.innerSize()); // allocate a temporary buffer - AmbiVector tempVector(rows); + AmbiVector tempVector(rows); // mimics a resizeByInnerOuter: if(ResultType::IsRowMajor) @@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r { // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index()) tempVector.restart(); - Scalar x = rhsIt.value(); + RhsScalar x = rhsIt.value(); for (typename evaluator::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt) { tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x; } } res.startVec(j); - for (typename AmbiVector::Iterator it(tempVector,tolerance); it; ++it) + for (typename AmbiVector::Iterator it(tempVector,tolerance); it; ++it) res.insertBackByOuterInner(j,it.index()) = it.value(); } res.finalize(); @@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector; template struct sparse_sparse_product_with_pruning_selector { - typedef typename traits::type>::Scalar Scalar; typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) @@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector ColMajorMatrixLhs; - typedef SparseMatrix ColMajorMatrixRhs; + typedef SparseMatrix ColMajorMatrixLhs; + typedef SparseMatrix ColMajorMatrixRhs; ColMajorMatrixLhs colLhs(lhs); ColMajorMatrixRhs colRhs(rhs); internal::sparse_sparse_product_with_pruning_impl(colLhs, colRhs, res, tolerance); @@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector RowMajorMatrixLhs; + typedef SparseMatrix RowMajorMatrixLhs; RowMajorMatrixLhs rowLhs(lhs); sparse_sparse_product_with_pruning_selector(rowLhs,rhs,res,tolerance); } @@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector RowMajorMatrixRhs; + typedef SparseMatrix RowMajorMatrixRhs; RowMajorMatrixRhs rowRhs(rhs); sparse_sparse_product_with_pruning_selector(lhs,rowRhs,res,tolerance); } @@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector ColMajorMatrixRhs; + typedef SparseMatrix ColMajorMatrixRhs; ColMajorMatrixRhs colRhs(rhs); internal::sparse_sparse_product_with_pruning_impl(lhs, colRhs, res, tolerance); } @@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector ColMajorMatrixLhs; + typedef SparseMatrix ColMajorMatrixLhs; ColMajorMatrixLhs colLhs(lhs); internal::sparse_sparse_product_with_pruning_impl(colLhs, rhs, res, tolerance); } diff --git a/third_party_includes/Eigen/src/SparseLU/SparseLU.h b/third_party_includes/Eigen/src/SparseLU/SparseLU.h index f883ab3..7104831 100644 --- a/third_party_includes/Eigen/src/SparseLU/SparseLU.h +++ b/third_party_includes/Eigen/src/SparseLU/SparseLU.h @@ -499,8 +499,6 @@ void SparseLU::factorize(const MatrixType& matrix) eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); - typedef typename IndexVector::Scalar StorageIndex; - m_isInitialized = true; diff --git a/third_party_includes/Eigen/src/SparseQR/SparseQR.h b/third_party_includes/Eigen/src/SparseQR/SparseQR.h index 2d4498b..7409fca 100644 --- a/third_party_includes/Eigen/src/SparseQR/SparseQR.h +++ b/third_party_includes/Eigen/src/SparseQR/SparseQR.h @@ -52,7 +52,7 @@ namespace internal { * rank-revealing permutations. Use colsPermutation() to get it. * * Q is the orthogonal matrix represented as products of Householder reflectors. - * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. + * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint. * You can then apply it to a vector. * * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient. @@ -65,6 +65,7 @@ namespace internal { * \implsparsesolverconcept * * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). + * \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix. * */ template @@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase > Index rank = this->rank(); - // Compute Q^T * b; + // Compute Q^* * b; typename Dest::PlainObject y, b; - y = this->matrixQ().transpose() * B; + y = this->matrixQ().adjoint() * B; b = y; // Solve with the triangular matrix R @@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue @@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase(m_qr,other.derived(),false); } + // To use for operations with the adjoint of Q SparseQRMatrixQTransposeReturnType adjoint() const { return SparseQRMatrixQTransposeReturnType(m_qr); } inline Index rows() const { return m_qr.rows(); } - inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); } - // To use for operations with the transpose of Q + inline Index cols() const { return m_qr.rows(); } + // To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment SparseQRMatrixQTransposeReturnType transpose() const { return SparseQRMatrixQTransposeReturnType(m_qr); @@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase struct SparseQRMatrixQTransposeReturnType { @@ -712,7 +718,7 @@ struct Assignment, internal: typedef typename DstXprType::StorageIndex StorageIndex; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { - typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); + typename DstXprType::PlainObject idMat(src.rows(), src.cols()); idMat.setIdentity(); // Sort the sparse householder reflectors if needed const_cast(&src.m_qr)->_sort_matrix_Q(); diff --git a/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h b/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h index 50a69f3..7261c7d 100644 --- a/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat) template MappedSparseMatrix map_superlu(SluMatrix& sluMat) { - eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR - || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC); + eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR) + || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC)); Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow; diff --git a/third_party_includes/nanoflann.hpp b/third_party_includes/nanoflann.hpp index 45c185b..0e22899 100644 --- a/third_party_includes/nanoflann.hpp +++ b/third_party_includes/nanoflann.hpp @@ -31,1960 +31,2009 @@ *************************************************************************/ /** \mainpage nanoflann C++ API documentation - * nanoflann is a C++ header-only library for building KD-Trees, mostly - * optimized for 2D or 3D point clouds. - * - * nanoflann does not require compiling or installing, just an - * #include in your code. - * - * See: - * - C++ API organized by modules - * - Online README - * - Doxygen documentation - */ - -#ifndef NANOFLANN_HPP_ -#define NANOFLANN_HPP_ + * nanoflann is a C++ header-only library for building KD-Trees, mostly + * optimized for 2D or 3D point clouds. + * + * nanoflann does not require compiling or installing, just an + * #include in your code. + * + * See: + * - C++ API organized by modules + * - Online README + * - Doxygen + * documentation + */ + +#ifndef NANOFLANN_HPP_ +#define NANOFLANN_HPP_ -#include -#include #include -#include -#include // for fwrite() -#define _USE_MATH_DEFINES // Required by MSVC to define M_PI,etc. in +#include +#include #include // for abs() +#include // for fwrite() #include // for abs() -#include +#include +#include // std::reference_wrapper +#include +#include + +/** Library version: 0xMmP (M=Major,m=minor,P=patch) */ +#define NANOFLANN_VERSION 0x130 // Avoid conflicting declaration of min/max macros in windows headers -#if !defined(NOMINMAX) && (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) -# define NOMINMAX -# ifdef max -# undef max -# undef min -# endif +#if !defined(NOMINMAX) && \ + (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) +#define NOMINMAX +#ifdef max +#undef max +#undef min +#endif #endif -namespace nanoflann -{ +namespace nanoflann { /** @addtogroup nanoflann_grp nanoflann C++ library for ANN - * @{ */ - - /** Library version: 0xMmP (M=Major,m=minor,P=patch) */ - #define NANOFLANN_VERSION 0x123 - - /** @addtogroup result_sets_grp Result set classes - * @{ */ - template - class KNNResultSet - { - IndexType * indices; - DistanceType* dists; - CountType capacity; - CountType count; - - public: - inline KNNResultSet(CountType capacity_) : indices(0), dists(0), capacity(capacity_), count(0) - { - } - - inline void init(IndexType* indices_, DistanceType* dists_) - { - indices = indices_; - dists = dists_; - count = 0; - if (capacity) - dists[capacity-1] = (std::numeric_limits::max)(); - } - - inline CountType size() const - { - return count; - } - - inline bool full() const - { - return count == capacity; - } - - - /** - * Called during search to add an element matching the criteria. - * @return true if the search should be continued, false if the results are sufficient - */ - inline bool addPoint(DistanceType dist, IndexType index) - { - CountType i; - for (i = count; i > 0; --i) { -#ifdef NANOFLANN_FIRST_MATCH // If defined and two points have the same distance, the one with the lowest-index will be returned first. - if ( (dists[i-1] > dist) || ((dist == dists[i-1]) && (indices[i-1] > index)) ) { + * @{ */ + +/** the PI constant (required to avoid MSVC missing symbols) */ +template T pi_const() { + return static_cast(3.14159265358979323846); +} + +/** + * Traits if object is resizable and assignable (typically has a resize | assign + * method) + */ +template struct has_resize : std::false_type {}; + +template +struct has_resize().resize(1), 0)> + : std::true_type {}; + +template struct has_assign : std::false_type {}; + +template +struct has_assign().assign(1, 0), 0)> + : std::true_type {}; + +/** + * Free function to resize a resizable object + */ +template +inline typename std::enable_if::value, void>::type +resize(Container &c, const size_t nElements) { + c.resize(nElements); +} + +/** + * Free function that has no effects on non resizable containers (e.g. + * std::array) It raises an exception if the expected size does not match + */ +template +inline typename std::enable_if::value, void>::type +resize(Container &c, const size_t nElements) { + if (nElements != c.size()) + throw std::logic_error("Try to change the size of a std::array."); +} + +/** + * Free function to assign to a container + */ +template +inline typename std::enable_if::value, void>::type +assign(Container &c, const size_t nElements, const T &value) { + c.assign(nElements, value); +} + +/** + * Free function to assign to a std::array + */ +template +inline typename std::enable_if::value, void>::type +assign(Container &c, const size_t nElements, const T &value) { + for (size_t i = 0; i < nElements; i++) + c[i] = value; +} + +/** @addtogroup result_sets_grp Result set classes + * @{ */ +template +class KNNResultSet { +public: + typedef _DistanceType DistanceType; + typedef _IndexType IndexType; + typedef _CountType CountType; + +private: + IndexType *indices; + DistanceType *dists; + CountType capacity; + CountType count; + +public: + inline KNNResultSet(CountType capacity_) + : indices(0), dists(0), capacity(capacity_), count(0) {} + + inline void init(IndexType *indices_, DistanceType *dists_) { + indices = indices_; + dists = dists_; + count = 0; + if (capacity) + dists[capacity - 1] = (std::numeric_limits::max)(); + } + + inline CountType size() const { return count; } + + inline bool full() const { return count == capacity; } + + /** + * Called during search to add an element matching the criteria. + * @return true if the search should be continued, false if the results are + * sufficient + */ + inline bool addPoint(DistanceType dist, IndexType index) { + CountType i; + for (i = count; i > 0; --i) { +#ifdef NANOFLANN_FIRST_MATCH // If defined and two points have the same + // distance, the one with the lowest-index will be + // returned first. + if ((dists[i - 1] > dist) || + ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else - if (dists[i-1] > dist) { + if (dists[i - 1] > dist) { #endif - if (i < capacity) { - dists[i] = dists[i-1]; - indices[i] = indices[i-1]; - } - } - else break; - } - if (i < capacity) { - dists[i] = dist; - indices[i] = index; - } - if (count < capacity) count++; - - // tell caller that the search shall continue - return true; - } - - inline DistanceType worstDist() const - { - return dists[capacity-1]; - } - }; - - /** operator "<" for std::sort() */ - struct IndexDist_Sorter - { - /** PairType will be typically: std::pair */ - template - inline bool operator()(const PairType &p1, const PairType &p2) const { - return p1.second < p2.second; - } - }; - - /** - * A result-set class used when performing a radius based search. - */ - template - class RadiusResultSet - { - public: - const DistanceType radius; - - std::vector > &m_indices_dists; - - inline RadiusResultSet(DistanceType radius_, std::vector > &indices_dists) : radius(radius_), m_indices_dists(indices_dists) - { - init(); - } - - inline void init() { clear(); } - inline void clear() { m_indices_dists.clear(); } - - inline size_t size() const { return m_indices_dists.size(); } - - inline bool full() const { return true; } - - /** - * Called during search to add an element matching the criteria. - * @return true if the search should be continued, false if the results are sufficient - */ - inline bool addPoint(DistanceType dist, IndexType index) - { - if (dist < radius) - m_indices_dists.push_back(std::make_pair(index, dist)); - return true; - } - - inline DistanceType worstDist() const { return radius; } - - /** - * Find the worst result (furtherest neighbor) without copying or sorting - * Pre-conditions: size() > 0 - */ - std::pair worst_item() const - { - if (m_indices_dists.empty()) throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on an empty list of results."); - typedef typename std::vector >::const_iterator DistIt; - DistIt it = std::max_element(m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); - return *it; - } - }; - - - /** @} */ - - - /** @addtogroup loadsave_grp Load/save auxiliary functions - * @{ */ - template - void save_value(FILE* stream, const T& value, size_t count = 1) - { - fwrite(&value, sizeof(value), count, stream); - } - - template - void save_value(FILE* stream, const std::vector& value) - { - size_t size = value.size(); - fwrite(&size, sizeof(size_t), 1, stream); - fwrite(&value[0], sizeof(T), size, stream); - } - - template - void load_value(FILE* stream, T& value, size_t count = 1) - { - size_t read_cnt = fread(&value, sizeof(value), count, stream); - if (read_cnt != count) { - throw std::runtime_error("Cannot read from file"); - } - } - - - template - void load_value(FILE* stream, std::vector& value) - { - size_t size; - size_t read_cnt = fread(&size, sizeof(size_t), 1, stream); - if (read_cnt != 1) { - throw std::runtime_error("Cannot read from file"); - } - value.resize(size); - read_cnt = fread(&value[0], sizeof(T), size, stream); - if (read_cnt != size) { - throw std::runtime_error("Cannot read from file"); - } - } - /** @} */ - - - /** @addtogroup metric_grp Metric (distance) classes - * @{ */ - - struct Metric - { - }; - - /** Manhattan distance functor (generic version, optimized for high-dimensionality data sets). - * Corresponding distance traits: nanoflann::metric_L1 - * \tparam T Type of the elements (e.g. double, float, uint8_t) - * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double, int64_t) - */ - template - struct L1_Adaptor - { - typedef T ElementType; - typedef _DistanceType DistanceType; - - const DataSource &data_source; - - L1_Adaptor(const DataSource &_data_source) : data_source(_data_source) { } - - inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const - { - DistanceType result = DistanceType(); - const T* last = a + size; - const T* lastgroup = last - 3; - size_t d = 0; - - /* Process 4 items with each loop for efficiency. */ - while (a < lastgroup) { - const DistanceType diff0 = std::abs(a[0] - data_source.kdtree_get_pt(b_idx,d++)); - const DistanceType diff1 = std::abs(a[1] - data_source.kdtree_get_pt(b_idx,d++)); - const DistanceType diff2 = std::abs(a[2] - data_source.kdtree_get_pt(b_idx,d++)); - const DistanceType diff3 = std::abs(a[3] - data_source.kdtree_get_pt(b_idx,d++)); - result += diff0 + diff1 + diff2 + diff3; - a += 4; - if ((worst_dist > 0) && (result > worst_dist)) { - return result; - } - } - /* Process last 0-3 components. Not needed for standard vector lengths. */ - while (a < last) { - result += std::abs( *a++ - data_source.kdtree_get_pt(b_idx, d++) ); - } - return result; - } - - template - inline DistanceType accum_dist(const U a, const V b, int ) const - { - return std::abs(a-b); - } - }; - - /** Squared Euclidean distance functor (generic version, optimized for high-dimensionality data sets). - * Corresponding distance traits: nanoflann::metric_L2 - * \tparam T Type of the elements (e.g. double, float, uint8_t) - * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double, int64_t) - */ - template - struct L2_Adaptor - { - typedef T ElementType; - typedef _DistanceType DistanceType; - - const DataSource &data_source; - - L2_Adaptor(const DataSource &_data_source) : data_source(_data_source) { } - - inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const - { - DistanceType result = DistanceType(); - const T* last = a + size; - const T* lastgroup = last - 3; - size_t d = 0; - - /* Process 4 items with each loop for efficiency. */ - while (a < lastgroup) { - const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx,d++); - const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx,d++); - const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx,d++); - const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx,d++); - result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; - a += 4; - if ((worst_dist > 0) && (result > worst_dist)) { - return result; - } - } - /* Process last 0-3 components. Not needed for standard vector lengths. */ - while (a < last) { - const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++); - result += diff0 * diff0; - } - return result; - } - - template - inline DistanceType accum_dist(const U a, const V b, int ) const - { - return (a - b) * (a - b); - } - }; - - /** Squared Euclidean (L2) distance functor (suitable for low-dimensionality datasets, like 2D or 3D point clouds) - * Corresponding distance traits: nanoflann::metric_L2_Simple - * \tparam T Type of the elements (e.g. double, float, uint8_t) - * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double, int64_t) - */ - template - struct L2_Simple_Adaptor - { - typedef T ElementType; - typedef _DistanceType DistanceType; - - const DataSource &data_source; - - L2_Simple_Adaptor(const DataSource &_data_source) : data_source(_data_source) { } - - inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size) const { - DistanceType result = DistanceType(); - for (size_t i = 0; i < size; ++i) { - const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i); - result += diff * diff; - } - return result; - } - - template - inline DistanceType accum_dist(const U a, const V b, int ) const - { - return (a - b) * (a - b); - } - }; - - /** SO2 distance functor - * Corresponding distance traits: nanoflann::metric_SO2 - * \tparam T Type of the elements (e.g. double, float) - * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double) - * orientation is constrained to be in [-pi, pi] - */ - template - struct SO2_Adaptor - { - typedef T ElementType; - typedef _DistanceType DistanceType; - - const DataSource &data_source; - - SO2_Adaptor(const DataSource &_data_source) : data_source(_data_source) { } - - inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size) const { - return accum_dist(a[size-1], data_source.kdtree_get_pt(b_idx, size - 1) , size - 1); - } - - template - inline DistanceType accum_dist(const U a, const V b, int ) const - { - DistanceType result = DistanceType(); - result = b - a; - if (result > M_PI) - result -= 2. * M_PI; - else if (result < -M_PI) - result += 2. * M_PI; - return result; - } - }; - - /** SO3 distance functor (Uses L2_Simple) - * Corresponding distance traits: nanoflann::metric_SO3 - * \tparam T Type of the elements (e.g. double, float) - * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double) - */ - template - struct SO3_Adaptor - { - typedef T ElementType; - typedef _DistanceType DistanceType; - - L2_Simple_Adaptor distance_L2_Simple; - - SO3_Adaptor(const DataSource &_data_source) : distance_L2_Simple(_data_source) { } - - inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size) const { - return distance_L2_Simple.evalMetric(a, b_idx, size); - } - - template - inline DistanceType accum_dist(const U a, const V b, int idx) const - { - return distance_L2_Simple.accum_dist(a, b, idx); - } - }; - - /** Metaprogramming helper traits class for the L1 (Manhattan) metric */ - struct metric_L1 : public Metric - { - template - struct traits { - typedef L1_Adaptor distance_t; - }; - }; - /** Metaprogramming helper traits class for the L2 (Euclidean) metric */ - struct metric_L2 : public Metric - { - template - struct traits { - typedef L2_Adaptor distance_t; - }; - }; - /** Metaprogramming helper traits class for the L2_simple (Euclidean) metric */ - struct metric_L2_Simple : public Metric - { - template - struct traits { - typedef L2_Simple_Adaptor distance_t; - }; - }; - /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ - struct metric_SO2 : public Metric - { - template - struct traits { - typedef SO2_Adaptor distance_t; - }; - }; - /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ - struct metric_SO3 : public Metric - { - template - struct traits { - typedef SO3_Adaptor distance_t; - }; - }; - - /** @} */ - - /** @addtogroup param_grp Parameter structs - * @{ */ - - /** Parameters (see README.md) */ - struct KDTreeSingleIndexAdaptorParams - { - KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10) : - leaf_max_size(_leaf_max_size) - {} - - size_t leaf_max_size; - }; - - /** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */ - struct SearchParams - { - /** Note: The first argument (checks_IGNORED_) is ignored, but kept for compatibility with the FLANN interface */ - SearchParams(int checks_IGNORED_ = 32, float eps_ = 0, bool sorted_ = true ) : - checks(checks_IGNORED_), eps(eps_), sorted(sorted_) {} - - int checks; //!< Ignored parameter (Kept for compatibility with the FLANN interface). - float eps; //!< search for eps-approximate neighbours (default: 0) - bool sorted; //!< only for radius search, require neighbours sorted by distance (default: true) - }; - /** @} */ - - - /** @addtogroup memalloc_grp Memory allocation - * @{ */ - - /** - * Allocates (using C's malloc) a generic type T. - * - * Params: - * count = number of instances to allocate. - * Returns: pointer (of type T*) to memory buffer - */ - template - inline T* allocate(size_t count = 1) - { - T* mem = static_cast( ::malloc(sizeof(T)*count)); - return mem; - } - - - /** - * Pooled storage allocator - * - * The following routines allow for the efficient allocation of storage in - * small chunks from a specified pool. Rather than allowing each structure - * to be freed individually, an entire pool of storage is freed at once. - * This method has two advantages over just using malloc() and free(). First, - * it is far more efficient for allocating small objects, as there is - * no overhead for remembering all the information needed to free each - * object or consolidating fragmented memory. Second, the decision about - * how long to keep an object is made at the time of allocation, and there - * is no need to track down all the objects to free them. - * - */ - - const size_t WORDSIZE = 16; - const size_t BLOCKSIZE = 8192; - - class PooledAllocator - { - /* We maintain memory alignment to word boundaries by requiring that all - allocations be in multiples of the machine wordsize. */ - /* Size of machine word in bytes. Must be power of 2. */ - /* Minimum number of bytes requested at a time from the system. Must be multiple of WORDSIZE. */ - - - size_t remaining; /* Number of bytes left in current block of storage. */ - void* base; /* Pointer to base of current block of storage. */ - void* loc; /* Current location in block to next allocate memory. */ - - void internal_init() - { - remaining = 0; - base = NULL; - usedMemory = 0; - wastedMemory = 0; - } - - public: - size_t usedMemory; - size_t wastedMemory; - - /** - Default constructor. Initializes a new pool. - */ - PooledAllocator() { - internal_init(); - } - - /** - * Destructor. Frees all the memory allocated in this pool. - */ - ~PooledAllocator() { - free_all(); - } - - /** Frees all allocated memory chunks */ - void free_all() - { - while (base != NULL) { - void *prev = *(static_cast( base)); /* Get pointer to prev block. */ - ::free(base); - base = prev; - } - internal_init(); - } - - /** - * Returns a pointer to a piece of new memory of the given size in bytes - * allocated from the pool. - */ - void* malloc(const size_t req_size) - { - /* Round size up to a multiple of wordsize. The following expression - only works for WORDSIZE that is a power of 2, by masking last bits of - incremented size to zero. - */ - const size_t size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1); - - /* Check whether a new block must be allocated. Note that the first word - of a block is reserved for a pointer to the previous block. - */ - if (size > remaining) { - - wastedMemory += remaining; - - /* Allocate new storage. */ - const size_t blocksize = (size + sizeof(void*) + (WORDSIZE - 1) > BLOCKSIZE) ? - size + sizeof(void*) + (WORDSIZE - 1) : BLOCKSIZE; - - // use the standard C malloc to allocate memory - void* m = ::malloc(blocksize); - if (!m) { - fprintf(stderr, "Failed to allocate memory.\n"); - return NULL; - } - - /* Fill first word of new block with pointer to previous block. */ - static_cast(m)[0] = base; - base = m; - - size_t shift = 0; - //int size_t = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) & (WORDSIZE-1))) & (WORDSIZE-1); - - remaining = blocksize - sizeof(void*) - shift; - loc = (static_cast(m) + sizeof(void*) + shift); - } - void* rloc = loc; - loc = static_cast(loc) + size; - remaining -= size; - - usedMemory += size; - - return rloc; - } - - /** - * Allocates (using this pool) a generic type T. - * - * Params: - * count = number of instances to allocate. - * Returns: pointer (of type T*) to memory buffer - */ - template - T* allocate(const size_t count = 1) - { - T* mem = static_cast(this->malloc(sizeof(T)*count)); - return mem; - } - - }; - /** @} */ - - /** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff - * @{ */ - - // ---------------- CArray ------------------------- - /** A STL container (as wrapper) for arrays of constant size defined at compile time (class imported from the MRPT project) - * This code is an adapted version from Boost, modifed for its integration - * within MRPT (JLBC, Dec/2009) (Renamed array -> CArray to avoid possible potential conflicts). - * See - * http://www.josuttis.com/cppcode - * for details and the latest version. - * See - * http://www.boost.org/libs/array for Documentation. - * for documentation. - * - * (C) Copyright Nicolai M. Josuttis 2001. - * Permission to copy, use, modify, sell and distribute this software - * is granted provided this copyright notice appears in all copies. - * This software is provided "as is" without express or implied - * warranty, and with no claim as to its suitability for any purpose. - * - * 29 Jan 2004 - minor fixes (Nico Josuttis) - * 04 Dec 2003 - update to synch with library TR1 (Alisdair Meredith) - * 23 Aug 2002 - fix for Non-MSVC compilers combined with MSVC libraries. - * 05 Aug 2001 - minor update (Nico Josuttis) - * 20 Jan 2001 - STLport fix (Beman Dawes) - * 29 Sep 2000 - Initial Revision (Nico Josuttis) - * - * Jan 30, 2004 - */ - template - class CArray { - public: - T elems[N]; // fixed-size array of elements of type T - - public: - // type definitions - typedef T value_type; - typedef T* iterator; - typedef const T* const_iterator; - typedef T& reference; - typedef const T& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - // iterator support - inline iterator begin() { return elems; } - inline const_iterator begin() const { return elems; } - inline iterator end() { return elems+N; } - inline const_iterator end() const { return elems+N; } - - // reverse iterator support -#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !defined(BOOST_MSVC_STD_ITERATOR) && !defined(BOOST_NO_STD_ITERATOR_TRAITS) - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; -#elif defined(_MSC_VER) && (_MSC_VER == 1300) && defined(BOOST_DINKUMWARE_STDLIB) && (BOOST_DINKUMWARE_STDLIB == 310) - // workaround for broken reverse_iterator in VC7 - typedef std::reverse_iterator > reverse_iterator; - typedef std::reverse_iterator > const_reverse_iterator; -#else - // workaround for broken reverse_iterator implementations - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; -#endif - - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - // operator[] - inline reference operator[](size_type i) { return elems[i]; } - inline const_reference operator[](size_type i) const { return elems[i]; } - // at() with range check - reference at(size_type i) { rangecheck(i); return elems[i]; } - const_reference at(size_type i) const { rangecheck(i); return elems[i]; } - // front() and back() - reference front() { return elems[0]; } - const_reference front() const { return elems[0]; } - reference back() { return elems[N-1]; } - const_reference back() const { return elems[N-1]; } - // size is constant - static inline size_type size() { return N; } - static bool empty() { return false; } - static size_type max_size() { return N; } - enum { static_size = N }; - /** This method has no effects in this class, but raises an exception if the expected size does not match */ - inline void resize(const size_t nElements) { if (nElements!=N) throw std::logic_error("Try to change the size of a CArray."); } - // swap (note: linear complexity in N, constant for given instantiation) - void swap (CArray& y) { std::swap_ranges(begin(),end(),y.begin()); } - // direct access to data (read-only) - const T* data() const { return elems; } - // use array as C array (direct read/write access to data) - T* data() { return elems; } - // assignment with type conversion - template CArray& operator= (const CArray& rhs) { - std::copy(rhs.begin(),rhs.end(), begin()); - return *this; + if (i < capacity) { + dists[i] = dists[i - 1]; + indices[i] = indices[i - 1]; + } + } else + break; + } + if (i < capacity) { + dists[i] = dist; + indices[i] = index; + } + if (count < capacity) + count++; + + // tell caller that the search shall continue + return true; + } + + inline DistanceType worstDist() const { return dists[capacity - 1]; } +}; + +/** operator "<" for std::sort() */ +struct IndexDist_Sorter { + /** PairType will be typically: std::pair */ + template + inline bool operator()(const PairType &p1, const PairType &p2) const { + return p1.second < p2.second; + } +}; + +/** + * A result-set class used when performing a radius based search. + */ +template +class RadiusResultSet { +public: + typedef _DistanceType DistanceType; + typedef _IndexType IndexType; + +public: + const DistanceType radius; + + std::vector> &m_indices_dists; + + inline RadiusResultSet( + DistanceType radius_, + std::vector> &indices_dists) + : radius(radius_), m_indices_dists(indices_dists) { + init(); + } + + inline void init() { clear(); } + inline void clear() { m_indices_dists.clear(); } + + inline size_t size() const { return m_indices_dists.size(); } + + inline bool full() const { return true; } + + /** + * Called during search to add an element matching the criteria. + * @return true if the search should be continued, false if the results are + * sufficient + */ + inline bool addPoint(DistanceType dist, IndexType index) { + if (dist < radius) + m_indices_dists.push_back(std::make_pair(index, dist)); + return true; + } + + inline DistanceType worstDist() const { return radius; } + + /** + * Find the worst result (furtherest neighbor) without copying or sorting + * Pre-conditions: size() > 0 + */ + std::pair worst_item() const { + if (m_indices_dists.empty()) + throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on " + "an empty list of results."); + typedef + typename std::vector>::const_iterator + DistIt; + DistIt it = std::max_element(m_indices_dists.begin(), m_indices_dists.end(), + IndexDist_Sorter()); + return *it; + } +}; + +/** @} */ + +/** @addtogroup loadsave_grp Load/save auxiliary functions + * @{ */ +template +void save_value(FILE *stream, const T &value, size_t count = 1) { + fwrite(&value, sizeof(value), count, stream); +} + +template +void save_value(FILE *stream, const std::vector &value) { + size_t size = value.size(); + fwrite(&size, sizeof(size_t), 1, stream); + fwrite(&value[0], sizeof(T), size, stream); +} + +template +void load_value(FILE *stream, T &value, size_t count = 1) { + size_t read_cnt = fread(&value, sizeof(value), count, stream); + if (read_cnt != count) { + throw std::runtime_error("Cannot read from file"); + } +} + +template void load_value(FILE *stream, std::vector &value) { + size_t size; + size_t read_cnt = fread(&size, sizeof(size_t), 1, stream); + if (read_cnt != 1) { + throw std::runtime_error("Cannot read from file"); + } + value.resize(size); + read_cnt = fread(&value[0], sizeof(T), size, stream); + if (read_cnt != size) { + throw std::runtime_error("Cannot read from file"); + } +} +/** @} */ + +/** @addtogroup metric_grp Metric (distance) classes + * @{ */ + +struct Metric {}; + +/** Manhattan distance functor (generic version, optimized for + * high-dimensionality data sets). Corresponding distance traits: + * nanoflann::metric_L1 \tparam T Type of the elements (e.g. double, float, + * uint8_t) \tparam _DistanceType Type of distance variables (must be signed) + * (e.g. float, double, int64_t) + */ +template +struct L1_Adaptor { + typedef T ElementType; + typedef _DistanceType DistanceType; + + const DataSource &data_source; + + L1_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} + + inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size, + DistanceType worst_dist = -1) const { + DistanceType result = DistanceType(); + const T *last = a + size; + const T *lastgroup = last - 3; + size_t d = 0; + + /* Process 4 items with each loop for efficiency. */ + while (a < lastgroup) { + const DistanceType diff0 = + std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++)); + const DistanceType diff1 = + std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++)); + const DistanceType diff2 = + std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++)); + const DistanceType diff3 = + std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++)); + result += diff0 + diff1 + diff2 + diff3; + a += 4; + if ((worst_dist > 0) && (result > worst_dist)) { + return result; + } + } + /* Process last 0-3 components. Not needed for standard vector lengths. */ + while (a < last) { + result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++)); + } + return result; + } + + template + inline DistanceType accum_dist(const U a, const V b, const size_t) const { + return std::abs(a - b); + } +}; + +/** Squared Euclidean distance functor (generic version, optimized for + * high-dimensionality data sets). Corresponding distance traits: + * nanoflann::metric_L2 \tparam T Type of the elements (e.g. double, float, + * uint8_t) \tparam _DistanceType Type of distance variables (must be signed) + * (e.g. float, double, int64_t) + */ +template +struct L2_Adaptor { + typedef T ElementType; + typedef _DistanceType DistanceType; + + const DataSource &data_source; + + L2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} + + inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size, + DistanceType worst_dist = -1) const { + DistanceType result = DistanceType(); + const T *last = a + size; + const T *lastgroup = last - 3; + size_t d = 0; + + /* Process 4 items with each loop for efficiency. */ + while (a < lastgroup) { + const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx, d++); + const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx, d++); + const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx, d++); + const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx, d++); + result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; + a += 4; + if ((worst_dist > 0) && (result > worst_dist)) { + return result; + } + } + /* Process last 0-3 components. Not needed for standard vector lengths. */ + while (a < last) { + const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++); + result += diff0 * diff0; + } + return result; + } + + template + inline DistanceType accum_dist(const U a, const V b, const size_t) const { + return (a - b) * (a - b); + } +}; + +/** Squared Euclidean (L2) distance functor (suitable for low-dimensionality + * datasets, like 2D or 3D point clouds) Corresponding distance traits: + * nanoflann::metric_L2_Simple \tparam T Type of the elements (e.g. double, + * float, uint8_t) \tparam _DistanceType Type of distance variables (must be + * signed) (e.g. float, double, int64_t) + */ +template +struct L2_Simple_Adaptor { + typedef T ElementType; + typedef _DistanceType DistanceType; + + const DataSource &data_source; + + L2_Simple_Adaptor(const DataSource &_data_source) + : data_source(_data_source) {} + + inline DistanceType evalMetric(const T *a, const size_t b_idx, + size_t size) const { + DistanceType result = DistanceType(); + for (size_t i = 0; i < size; ++i) { + const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i); + result += diff * diff; + } + return result; + } + + template + inline DistanceType accum_dist(const U a, const V b, const size_t) const { + return (a - b) * (a - b); + } +}; + +/** SO2 distance functor + * Corresponding distance traits: nanoflann::metric_SO2 + * \tparam T Type of the elements (e.g. double, float) + * \tparam _DistanceType Type of distance variables (must be signed) (e.g. + * float, double) orientation is constrained to be in [-pi, pi] + */ +template +struct SO2_Adaptor { + typedef T ElementType; + typedef _DistanceType DistanceType; + + const DataSource &data_source; + + SO2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} + + inline DistanceType evalMetric(const T *a, const size_t b_idx, + size_t size) const { + return accum_dist(a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), + size - 1); + } + + /** Note: this assumes that input angles are already in the range [-pi,pi] */ + template + inline DistanceType accum_dist(const U a, const V b, const size_t) const { + DistanceType result = DistanceType(), PI = pi_const(); + result = b - a; + if (result > PI) + result -= 2 * PI; + else if (result < -PI) + result += 2 * PI; + return result; + } +}; + +/** SO3 distance functor (Uses L2_Simple) + * Corresponding distance traits: nanoflann::metric_SO3 + * \tparam T Type of the elements (e.g. double, float) + * \tparam _DistanceType Type of distance variables (must be signed) (e.g. + * float, double) + */ +template +struct SO3_Adaptor { + typedef T ElementType; + typedef _DistanceType DistanceType; + + L2_Simple_Adaptor distance_L2_Simple; + + SO3_Adaptor(const DataSource &_data_source) + : distance_L2_Simple(_data_source) {} + + inline DistanceType evalMetric(const T *a, const size_t b_idx, + size_t size) const { + return distance_L2_Simple.evalMetric(a, b_idx, size); + } + + template + inline DistanceType accum_dist(const U a, const V b, const size_t idx) const { + return distance_L2_Simple.accum_dist(a, b, idx); + } +}; + +/** Metaprogramming helper traits class for the L1 (Manhattan) metric */ +struct metric_L1 : public Metric { + template struct traits { + typedef L1_Adaptor distance_t; + }; +}; +/** Metaprogramming helper traits class for the L2 (Euclidean) metric */ +struct metric_L2 : public Metric { + template struct traits { + typedef L2_Adaptor distance_t; + }; +}; +/** Metaprogramming helper traits class for the L2_simple (Euclidean) metric */ +struct metric_L2_Simple : public Metric { + template struct traits { + typedef L2_Simple_Adaptor distance_t; + }; +}; +/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ +struct metric_SO2 : public Metric { + template struct traits { + typedef SO2_Adaptor distance_t; + }; +}; +/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ +struct metric_SO3 : public Metric { + template struct traits { + typedef SO3_Adaptor distance_t; + }; +}; + +/** @} */ + +/** @addtogroup param_grp Parameter structs + * @{ */ + +/** Parameters (see README.md) */ +struct KDTreeSingleIndexAdaptorParams { + KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10) + : leaf_max_size(_leaf_max_size) {} + + size_t leaf_max_size; +}; + +/** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */ +struct SearchParams { + /** Note: The first argument (checks_IGNORED_) is ignored, but kept for + * compatibility with the FLANN interface */ + SearchParams(int checks_IGNORED_ = 32, float eps_ = 0, bool sorted_ = true) + : checks(checks_IGNORED_), eps(eps_), sorted(sorted_) {} + + int checks; //!< Ignored parameter (Kept for compatibility with the FLANN + //!< interface). + float eps; //!< search for eps-approximate neighbours (default: 0) + bool sorted; //!< only for radius search, require neighbours sorted by + //!< distance (default: true) +}; +/** @} */ + +/** @addtogroup memalloc_grp Memory allocation + * @{ */ + +/** + * Allocates (using C's malloc) a generic type T. + * + * Params: + * count = number of instances to allocate. + * Returns: pointer (of type T*) to memory buffer + */ +template inline T *allocate(size_t count = 1) { + T *mem = static_cast(::malloc(sizeof(T) * count)); + return mem; +} + +/** + * Pooled storage allocator + * + * The following routines allow for the efficient allocation of storage in + * small chunks from a specified pool. Rather than allowing each structure + * to be freed individually, an entire pool of storage is freed at once. + * This method has two advantages over just using malloc() and free(). First, + * it is far more efficient for allocating small objects, as there is + * no overhead for remembering all the information needed to free each + * object or consolidating fragmented memory. Second, the decision about + * how long to keep an object is made at the time of allocation, and there + * is no need to track down all the objects to free them. + * + */ + +const size_t WORDSIZE = 16; +const size_t BLOCKSIZE = 8192; + +class PooledAllocator { + /* We maintain memory alignment to word boundaries by requiring that all + allocations be in multiples of the machine wordsize. */ + /* Size of machine word in bytes. Must be power of 2. */ + /* Minimum number of bytes requested at a time from the system. Must be + * multiple of WORDSIZE. */ + + size_t remaining; /* Number of bytes left in current block of storage. */ + void *base; /* Pointer to base of current block of storage. */ + void *loc; /* Current location in block to next allocate memory. */ + + void internal_init() { + remaining = 0; + base = NULL; + usedMemory = 0; + wastedMemory = 0; + } + +public: + size_t usedMemory; + size_t wastedMemory; + + /** + Default constructor. Initializes a new pool. + */ + PooledAllocator() { internal_init(); } + + /** + * Destructor. Frees all the memory allocated in this pool. + */ + ~PooledAllocator() { free_all(); } + + /** Frees all allocated memory chunks */ + void free_all() { + while (base != NULL) { + void *prev = + *(static_cast(base)); /* Get pointer to prev block. */ + ::free(base); + base = prev; + } + internal_init(); + } + + /** + * Returns a pointer to a piece of new memory of the given size in bytes + * allocated from the pool. + */ + void *malloc(const size_t req_size) { + /* Round size up to a multiple of wordsize. The following expression + only works for WORDSIZE that is a power of 2, by masking last bits of + incremented size to zero. + */ + const size_t size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1); + + /* Check whether a new block must be allocated. Note that the first word + of a block is reserved for a pointer to the previous block. + */ + if (size > remaining) { + + wastedMemory += remaining; + + /* Allocate new storage. */ + const size_t blocksize = + (size + sizeof(void *) + (WORDSIZE - 1) > BLOCKSIZE) + ? size + sizeof(void *) + (WORDSIZE - 1) + : BLOCKSIZE; + + // use the standard C malloc to allocate memory + void *m = ::malloc(blocksize); + if (!m) { + fprintf(stderr, "Failed to allocate memory.\n"); + return NULL; + } + + /* Fill first word of new block with pointer to previous block. */ + static_cast(m)[0] = base; + base = m; + + size_t shift = 0; + // int size_t = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) & + // (WORDSIZE-1))) & (WORDSIZE-1); + + remaining = blocksize - sizeof(void *) - shift; + loc = (static_cast(m) + sizeof(void *) + shift); + } + void *rloc = loc; + loc = static_cast(loc) + size; + remaining -= size; + + usedMemory += size; + + return rloc; + } + + /** + * Allocates (using this pool) a generic type T. + * + * Params: + * count = number of instances to allocate. + * Returns: pointer (of type T*) to memory buffer + */ + template T *allocate(const size_t count = 1) { + T *mem = static_cast(this->malloc(sizeof(T) * count)); + return mem; + } +}; +/** @} */ + +/** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff + * @{ */ + +/** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors + * when DIM=-1. Fixed size version for a generic DIM: + */ +template struct array_or_vector_selector { + typedef std::array container_t; +}; +/** Dynamic size version */ +template struct array_or_vector_selector<-1, T> { + typedef std::vector container_t; +}; + +/** @} */ + +/** kd-tree base-class + * + * Contains the member functions common to the classes KDTreeSingleIndexAdaptor + * and KDTreeSingleIndexDynamicAdaptor_. + * + * \tparam Derived The name of the class which inherits this class. + * \tparam DatasetAdaptor The user-provided adaptor (see comments above). + * \tparam Distance The distance metric to use, these are all classes derived + * from nanoflann::Metric \tparam DIM Dimensionality of data points (e.g. 3 for + * 3D points) \tparam IndexType Will be typically size_t or int + */ + +template +class KDTreeBaseClass { + +public: + /** Frees the previously-built index. Automatically called within + * buildIndex(). */ + void freeIndex(Derived &obj) { + obj.pool.free_all(); + obj.root_node = NULL; + obj.m_size_at_index_build = 0; + } + + typedef typename Distance::ElementType ElementType; + typedef typename Distance::DistanceType DistanceType; + + /*--------------------- Internal Data Structures --------------------------*/ + struct Node { + /** Union used because a node can be either a LEAF node or a non-leaf node, + * so both data fields are never used simultaneously */ + union { + struct leaf { + IndexType left, right; //!< Indices of points in leaf node + } lr; + struct nonleaf { + int divfeat; //!< Dimension used for subdivision. + DistanceType divlow, divhigh; //!< The values used for subdivision. + } sub; + } node_type; + Node *child1, *child2; //!< Child nodes (both=NULL mean its a leaf node) + }; + + typedef Node *NodePtr; + + struct Interval { + ElementType low, high; + }; + + /** + * Array of indices to vectors in the dataset. + */ + std::vector vind; + + NodePtr root_node; + + size_t m_leaf_max_size; + + size_t m_size; //!< Number of current points in the dataset + size_t m_size_at_index_build; //!< Number of points in the dataset when the + //!< index was built + int dim; //!< Dimensionality of each data point + + /** Define "BoundingBox" as a fixed-size or variable-size container depending + * on "DIM" */ + typedef + typename array_or_vector_selector::container_t BoundingBox; + + /** Define "distance_vector_t" as a fixed-size or variable-size container + * depending on "DIM" */ + typedef typename array_or_vector_selector::container_t + distance_vector_t; + + /** The KD-tree used to find neighbours */ + + BoundingBox root_bbox; + + /** + * Pooled memory allocator. + * + * Using a pooled memory allocator is more efficient + * than allocating memory directly when there is a large + * number small of memory allocations. + */ + PooledAllocator pool; + + /** Returns number of points in dataset */ + size_t size(const Derived &obj) const { return obj.m_size; } + + /** Returns the length of each point in the dataset */ + size_t veclen(const Derived &obj) { + return static_cast(DIM > 0 ? DIM : obj.dim); + } + + /// Helper accessor to the dataset points: + inline ElementType dataset_get(const Derived &obj, size_t idx, + int component) const { + return obj.dataset.kdtree_get_pt(idx, component); + } + + /** + * Computes the inde memory usage + * Returns: memory used by the index + */ + size_t usedMemory(Derived &obj) { + return obj.pool.usedMemory + obj.pool.wastedMemory + + obj.dataset.kdtree_get_point_count() * + sizeof(IndexType); // pool memory and vind array memory + } + + void computeMinMax(const Derived &obj, IndexType *ind, IndexType count, + int element, ElementType &min_elem, + ElementType &max_elem) { + min_elem = dataset_get(obj, ind[0], element); + max_elem = dataset_get(obj, ind[0], element); + for (IndexType i = 1; i < count; ++i) { + ElementType val = dataset_get(obj, ind[i], element); + if (val < min_elem) + min_elem = val; + if (val > max_elem) + max_elem = val; + } + } + + /** + * Create a tree node that subdivides the list of vecs from vind[first] + * to vind[last]. The routine is called recursively on each sublist. + * + * @param left index of the first vector + * @param right index of the last vector + */ + NodePtr divideTree(Derived &obj, const IndexType left, const IndexType right, + BoundingBox &bbox) { + NodePtr node = obj.pool.template allocate(); // allocate memory + + /* If too few exemplars remain, then make this a leaf node. */ + if ((right - left) <= static_cast(obj.m_leaf_max_size)) { + node->child1 = node->child2 = NULL; /* Mark as leaf node. */ + node->node_type.lr.left = left; + node->node_type.lr.right = right; + + // compute bounding-box of leaf points + for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { + bbox[i].low = dataset_get(obj, obj.vind[left], i); + bbox[i].high = dataset_get(obj, obj.vind[left], i); + } + for (IndexType k = left + 1; k < right; ++k) { + for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { + if (bbox[i].low > dataset_get(obj, obj.vind[k], i)) + bbox[i].low = dataset_get(obj, obj.vind[k], i); + if (bbox[i].high < dataset_get(obj, obj.vind[k], i)) + bbox[i].high = dataset_get(obj, obj.vind[k], i); + } + } + } else { + IndexType idx; + int cutfeat; + DistanceType cutval; + middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval, + bbox); + + node->node_type.sub.divfeat = cutfeat; + + BoundingBox left_bbox(bbox); + left_bbox[cutfeat].high = cutval; + node->child1 = divideTree(obj, left, left + idx, left_bbox); + + BoundingBox right_bbox(bbox); + right_bbox[cutfeat].low = cutval; + node->child2 = divideTree(obj, left + idx, right, right_bbox); + + node->node_type.sub.divlow = left_bbox[cutfeat].high; + node->node_type.sub.divhigh = right_bbox[cutfeat].low; + + for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { + bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); + bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); + } + } + + return node; + } + + void middleSplit_(Derived &obj, IndexType *ind, IndexType count, + IndexType &index, int &cutfeat, DistanceType &cutval, + const BoundingBox &bbox) { + const DistanceType EPS = static_cast(0.00001); + ElementType max_span = bbox[0].high - bbox[0].low; + for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) { + ElementType span = bbox[i].high - bbox[i].low; + if (span > max_span) { + max_span = span; + } + } + ElementType max_spread = -1; + cutfeat = 0; + for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { + ElementType span = bbox[i].high - bbox[i].low; + if (span > (1 - EPS) * max_span) { + ElementType min_elem, max_elem; + computeMinMax(obj, ind, count, i, min_elem, max_elem); + ElementType spread = max_elem - min_elem; + ; + if (spread > max_spread) { + cutfeat = i; + max_spread = spread; + } + } + } + // split in the middle + DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; + ElementType min_elem, max_elem; + computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem); + + if (split_val < min_elem) + cutval = min_elem; + else if (split_val > max_elem) + cutval = max_elem; + else + cutval = split_val; + + IndexType lim1, lim2; + planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); + + if (lim1 > count / 2) + index = lim1; + else if (lim2 < count / 2) + index = lim2; + else + index = count / 2; + } + + /** + * Subdivide the list of points by a plane perpendicular on axe corresponding + * to the 'cutfeat' dimension at 'cutval' position. + * + * On return: + * dataset[ind[0..lim1-1]][cutfeat]cutval + */ + void planeSplit(Derived &obj, IndexType *ind, const IndexType count, + int cutfeat, DistanceType &cutval, IndexType &lim1, + IndexType &lim2) { + /* Move vector indices for left subtree to front of list. */ + IndexType left = 0; + IndexType right = count - 1; + for (;;) { + while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval) + ++left; + while (right && left <= right && + dataset_get(obj, ind[right], cutfeat) >= cutval) + --right; + if (left > right || !right) + break; // "!right" was added to support unsigned Index types + std::swap(ind[left], ind[right]); + ++left; + --right; + } + /* If either list is empty, it means that all remaining features + * are identical. Split in the middle to maintain a balanced tree. + */ + lim1 = left; + right = count - 1; + for (;;) { + while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval) + ++left; + while (right && left <= right && + dataset_get(obj, ind[right], cutfeat) > cutval) + --right; + if (left > right || !right) + break; // "!right" was added to support unsigned Index types + std::swap(ind[left], ind[right]); + ++left; + --right; + } + lim2 = left; + } + + DistanceType computeInitialDistances(const Derived &obj, + const ElementType *vec, + distance_vector_t &dists) const { + assert(vec); + DistanceType distsq = DistanceType(); + + for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { + if (vec[i] < obj.root_bbox[i].low) { + dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i); + distsq += dists[i]; + } + if (vec[i] > obj.root_bbox[i].high) { + dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i); + distsq += dists[i]; + } + } + return distsq; + } + + void save_tree(Derived &obj, FILE *stream, NodePtr tree) { + save_value(stream, *tree); + if (tree->child1 != NULL) { + save_tree(obj, stream, tree->child1); + } + if (tree->child2 != NULL) { + save_tree(obj, stream, tree->child2); + } + } + + void load_tree(Derived &obj, FILE *stream, NodePtr &tree) { + tree = obj.pool.template allocate(); + load_value(stream, *tree); + if (tree->child1 != NULL) { + load_tree(obj, stream, tree->child1); + } + if (tree->child2 != NULL) { + load_tree(obj, stream, tree->child2); + } + } + + /** Stores the index in a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when + * loading the index object it must be constructed associated to the same + * source of data points used while building it. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void saveIndex_(Derived &obj, FILE *stream) { + save_value(stream, obj.m_size); + save_value(stream, obj.dim); + save_value(stream, obj.root_bbox); + save_value(stream, obj.m_leaf_max_size); + save_value(stream, obj.vind); + save_tree(obj, stream, obj.root_node); + } + + /** Loads a previous index from a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the + * index object must be constructed associated to the same source of data + * points used while building the index. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void loadIndex_(Derived &obj, FILE *stream) { + load_value(stream, obj.m_size); + load_value(stream, obj.dim); + load_value(stream, obj.root_bbox); + load_value(stream, obj.m_leaf_max_size); + load_value(stream, obj.vind); + load_tree(obj, stream, obj.root_node); + } +}; + +/** @addtogroup kdtrees_grp KD-tree classes and adaptors + * @{ */ + +/** kd-tree static index + * + * Contains the k-d trees and other information for indexing a set of points + * for nearest-neighbor matching. + * + * The class "DatasetAdaptor" must provide the following interface (can be + * non-virtual, inlined methods): + * + * \code + * // Must return the number of data poins + * inline size_t kdtree_get_point_count() const { ... } + * + * + * // Must return the dim'th component of the idx'th point in the class: + * inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... } + * + * // Optional bounding-box computation: return false to default to a standard + * bbox computation loop. + * // Return true if the BBOX was already computed by the class and returned + * in "bb" so it can be avoided to redo it again. + * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 + * for point clouds) template bool kdtree_get_bbox(BBOX &bb) const + * { + * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits + * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits + * ... + * return true; + * } + * + * \endcode + * + * \tparam DatasetAdaptor The user-provided adaptor (see comments above). + * \tparam Distance The distance metric to use: nanoflann::metric_L1, + * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM + * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will + * be typically size_t or int + */ +template +class KDTreeSingleIndexAdaptor + : public KDTreeBaseClass< + KDTreeSingleIndexAdaptor, + Distance, DatasetAdaptor, DIM, IndexType> { +public: + /** Deleted copy constructor*/ + KDTreeSingleIndexAdaptor( + const KDTreeSingleIndexAdaptor + &) = delete; + + /** + * The dataset used by this index + */ + const DatasetAdaptor &dataset; //!< The source of our data + + const KDTreeSingleIndexAdaptorParams index_params; + + Distance distance; + + typedef typename nanoflann::KDTreeBaseClass< + nanoflann::KDTreeSingleIndexAdaptor, + Distance, DatasetAdaptor, DIM, IndexType> + BaseClassRef; + + typedef typename BaseClassRef::ElementType ElementType; + typedef typename BaseClassRef::DistanceType DistanceType; + + typedef typename BaseClassRef::Node Node; + typedef Node *NodePtr; + + typedef typename BaseClassRef::Interval Interval; + /** Define "BoundingBox" as a fixed-size or variable-size container depending + * on "DIM" */ + typedef typename BaseClassRef::BoundingBox BoundingBox; + + /** Define "distance_vector_t" as a fixed-size or variable-size container + * depending on "DIM" */ + typedef typename BaseClassRef::distance_vector_t distance_vector_t; + + /** + * KDTree constructor + * + * Refer to docs in README.md or online in + * https://github.com/jlblancoc/nanoflann + * + * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 + * for 3D points) is determined by means of: + * - The \a DIM template parameter if >0 (highest priority) + * - Otherwise, the \a dimensionality parameter of this constructor. + * + * @param inputData Dataset with the input features + * @param params Basically, the maximum leaf node size + */ + KDTreeSingleIndexAdaptor(const int dimensionality, + const DatasetAdaptor &inputData, + const KDTreeSingleIndexAdaptorParams ¶ms = + KDTreeSingleIndexAdaptorParams()) + : dataset(inputData), index_params(params), distance(inputData) { + BaseClassRef::root_node = NULL; + BaseClassRef::m_size = dataset.kdtree_get_point_count(); + BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; + BaseClassRef::dim = dimensionality; + if (DIM > 0) + BaseClassRef::dim = DIM; + BaseClassRef::m_leaf_max_size = params.leaf_max_size; + + // Create a permutable array of indices to the input vectors. + init_vind(); + } + + /** + * Builds the index + */ + void buildIndex() { + BaseClassRef::m_size = dataset.kdtree_get_point_count(); + BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; + init_vind(); + this->freeIndex(*this); + BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; + if (BaseClassRef::m_size == 0) + return; + computeBoundingBox(BaseClassRef::root_bbox); + BaseClassRef::root_node = + this->divideTree(*this, 0, BaseClassRef::m_size, + BaseClassRef::root_bbox); // construct the tree + } + + /** \name Query methods + * @{ */ + + /** + * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored + * inside the result object. + * + * Params: + * result = the result object in which the indices of the + * nearest-neighbors are stored vec = the vector for which to search the + * nearest neighbors + * + * \tparam RESULTSET Should be any ResultSet + * \return True if the requested neighbors could be found. + * \sa knnSearch, radiusSearch + */ + template + bool findNeighbors(RESULTSET &result, const ElementType *vec, + const SearchParams &searchParams) const { + assert(vec); + if (this->size(*this) == 0) + return false; + if (!BaseClassRef::root_node) + throw std::runtime_error( + "[nanoflann] findNeighbors() called before building the index."); + float epsError = 1 + searchParams.eps; + + distance_vector_t + dists; // fixed or variable-sized container (depending on DIM) + auto zero = static_cast(0); + assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim), + zero); // Fill it with zeros. + DistanceType distsq = this->computeInitialDistances(*this, vec, dists); + searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, + epsError); // "count_leaf" parameter removed since was neither + // used nor returned to the user. + return result.full(); + } + + /** + * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. + * Their indices are stored inside the result object. \sa radiusSearch, + * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility + * with the original FLANN interface. \return Number `N` of valid points in + * the result set. Only the first `N` entries in `out_indices` and + * `out_distances_sq` will be valid. Return may be less than `num_closest` + * only if the number of elements in the tree is less than `num_closest`. + */ + size_t knnSearch(const ElementType *query_point, const size_t num_closest, + IndexType *out_indices, DistanceType *out_distances_sq, + const int /* nChecks_IGNORED */ = 10) const { + nanoflann::KNNResultSet resultSet(num_closest); + resultSet.init(out_indices, out_distances_sq); + this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); + return resultSet.size(); + } + + /** + * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. + * The output is given as a vector of pairs, of which the first element is a + * point index and the second the corresponding distance. Previous contents of + * \a IndicesDists are cleared. + * + * If searchParams.sorted==true, the output list is sorted by ascending + * distances. + * + * For a better performance, it is advisable to do a .reserve() on the vector + * if you have any wild guess about the number of expected matches. + * + * \sa knnSearch, findNeighbors, radiusSearchCustomCallback + * \return The number of points within the given radius (i.e. indices.size() + * or dists.size() ) + */ + size_t + radiusSearch(const ElementType *query_point, const DistanceType &radius, + std::vector> &IndicesDists, + const SearchParams &searchParams) const { + RadiusResultSet resultSet(radius, IndicesDists); + const size_t nFound = + radiusSearchCustomCallback(query_point, resultSet, searchParams); + if (searchParams.sorted) + std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); + return nFound; + } + + /** + * Just like radiusSearch() but with a custom callback class for each point + * found in the radius of the query. See the source of RadiusResultSet<> as a + * start point for your own classes. \sa radiusSearch + */ + template + size_t radiusSearchCustomCallback( + const ElementType *query_point, SEARCH_CALLBACK &resultSet, + const SearchParams &searchParams = SearchParams()) const { + this->findNeighbors(resultSet, query_point, searchParams); + return resultSet.size(); + } + + /** @} */ + +public: + /** Make sure the auxiliary list \a vind has the same size than the current + * dataset, and re-generate if size has changed. */ + void init_vind() { + // Create a permutable array of indices to the input vectors. + BaseClassRef::m_size = dataset.kdtree_get_point_count(); + if (BaseClassRef::vind.size() != BaseClassRef::m_size) + BaseClassRef::vind.resize(BaseClassRef::m_size); + for (size_t i = 0; i < BaseClassRef::m_size; i++) + BaseClassRef::vind[i] = i; + } + + void computeBoundingBox(BoundingBox &bbox) { + resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim)); + if (dataset.kdtree_get_bbox(bbox)) { + // Done! It was implemented in derived class + } else { + const size_t N = dataset.kdtree_get_point_count(); + if (!N) + throw std::runtime_error("[nanoflann] computeBoundingBox() called but " + "no data points found."); + for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { + bbox[i].low = bbox[i].high = this->dataset_get(*this, 0, i); + } + for (size_t k = 1; k < N; ++k) { + for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { + if (this->dataset_get(*this, k, i) < bbox[i].low) + bbox[i].low = this->dataset_get(*this, k, i); + if (this->dataset_get(*this, k, i) > bbox[i].high) + bbox[i].high = this->dataset_get(*this, k, i); } - // assign one value to all elements - inline void assign (const T& value) { for (size_t i=0;i= size()) { throw std::out_of_range("CArray<>: index out of range"); } } - }; // end of CArray - - /** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors when DIM=-1. - * Fixed size version for a generic DIM: - */ - template - struct array_or_vector_selector - { - typedef CArray container_t; - }; - /** Dynamic size version */ - template - struct array_or_vector_selector<-1, T> { - typedef std::vector container_t; - }; - - /** @} */ - - /** kd-tree base-class - * - * Contains the member functions common to the classes KDTreeSingleIndexAdaptor and KDTreeSingleIndexDynamicAdaptor_. - * - * \tparam Derived The name of the class which inherits this class. - * \tparam DatasetAdaptor The user-provided adaptor (see comments above). - * \tparam Distance The distance metric to use, these are all classes derived from nanoflann::Metric - * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points) - * \tparam IndexType Will be typically size_t or int - */ - - template - class KDTreeBaseClass - { - - public: - /** Frees the previously-built index. Automatically called within buildIndex(). */ - void freeIndex(Derived &obj) - { - obj.pool.free_all(); - obj.root_node = NULL; - obj.m_size_at_index_build = 0; - } - - typedef typename Distance::ElementType ElementType; - typedef typename Distance::DistanceType DistanceType; - - /*--------------------- Internal Data Structures --------------------------*/ - struct Node - { - /** Union used because a node can be either a LEAF node or a non-leaf node, so both data fields are never used simultaneously */ - union { - struct leaf - { - IndexType left, right; //!< Indices of points in leaf node - } lr; - struct nonleaf - { - int divfeat; //!< Dimension used for subdivision. - DistanceType divlow, divhigh; //!< The values used for subdivision. - } sub; - } node_type; - Node *child1, *child2; //!< Child nodes (both=NULL mean its a leaf node) - }; - - typedef Node* NodePtr; - - struct Interval - { - ElementType low, high; - }; - - /** - * Array of indices to vectors in the dataset. - */ - std::vector vind; - - NodePtr root_node; - - size_t m_leaf_max_size; - - size_t m_size; //!< Number of current points in the dataset - size_t m_size_at_index_build; //!< Number of points in the dataset when the index was built - int dim; //!< Dimensionality of each data point - - /** Define "BoundingBox" as a fixed-size or variable-size container depending on "DIM" */ - typedef typename array_or_vector_selector::container_t BoundingBox; - - /** Define "distance_vector_t" as a fixed-size or variable-size container depending on "DIM" */ - typedef typename array_or_vector_selector::container_t distance_vector_t; - - /** The KD-tree used to find neighbours */ - - BoundingBox root_bbox; - - /** - * Pooled memory allocator. - * - * Using a pooled memory allocator is more efficient - * than allocating memory directly when there is a large - * number small of memory allocations. - */ - PooledAllocator pool; - - /** Returns number of points in dataset */ - size_t size(const Derived &obj) const { return obj.m_size; } - - /** Returns the length of each point in the dataset */ - size_t veclen(const Derived &obj) { - return static_cast(DIM>0 ? DIM : obj.dim); - } - - /// Helper accessor to the dataset points: - inline ElementType dataset_get(const Derived &obj, size_t idx, int component) const{ - return obj.dataset.kdtree_get_pt(idx, component); - } - - /** - * Computes the inde memory usage - * Returns: memory used by the index - */ - size_t usedMemory(Derived &obj) - { - return obj.pool.usedMemory + obj.pool.wastedMemory + obj.dataset.kdtree_get_point_count() * sizeof(IndexType); // pool memory and vind array memory - } - - void computeMinMax(const Derived &obj, IndexType* ind, IndexType count, int element, ElementType& min_elem, ElementType& max_elem) - { - min_elem = dataset_get(obj, ind[0],element); - max_elem = dataset_get(obj, ind[0],element); - for (IndexType i = 1; i < count; ++i) { - ElementType val = dataset_get(obj, ind[i], element); - if (val < min_elem) min_elem = val; - if (val > max_elem) max_elem = val; - } - } - - /** - * Create a tree node that subdivides the list of vecs from vind[first] - * to vind[last]. The routine is called recursively on each sublist. - * - * @param left index of the first vector - * @param right index of the last vector - */ - NodePtr divideTree(Derived &obj, const IndexType left, const IndexType right, BoundingBox& bbox) - { - NodePtr node = obj.pool.template allocate(); // allocate memory - - /* If too few exemplars remain, then make this a leaf node. */ - if ( (right - left) <= static_cast(obj.m_leaf_max_size) ) { - node->child1 = node->child2 = NULL; /* Mark as leaf node. */ - node->node_type.lr.left = left; - node->node_type.lr.right = right; - - // compute bounding-box of leaf points - for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { - bbox[i].low = dataset_get(obj, obj.vind[left], i); - bbox[i].high = dataset_get(obj, obj.vind[left], i); - } - for (IndexType k = left + 1; k < right; ++k) { - for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { - if (bbox[i].low > dataset_get(obj, obj.vind[k], i)) bbox[i].low = dataset_get(obj, obj.vind[k], i); - if (bbox[i].high < dataset_get(obj, obj.vind[k], i)) bbox[i].high = dataset_get(obj, obj.vind[k], i); - } - } - } - else { - IndexType idx; - int cutfeat; - DistanceType cutval; - middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval, bbox); - - node->node_type.sub.divfeat = cutfeat; - - BoundingBox left_bbox(bbox); - left_bbox[cutfeat].high = cutval; - node->child1 = divideTree(obj, left, left + idx, left_bbox); - - BoundingBox right_bbox(bbox); - right_bbox[cutfeat].low = cutval; - node->child2 = divideTree(obj, left + idx, right, right_bbox); - - node->node_type.sub.divlow = left_bbox[cutfeat].high; - node->node_type.sub.divhigh = right_bbox[cutfeat].low; - - for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { - bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); - bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); - } - } - - return node; - } - - void middleSplit_(Derived &obj, IndexType* ind, IndexType count, IndexType& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox) - { - const DistanceType EPS = static_cast(0.00001); - ElementType max_span = bbox[0].high-bbox[0].low; - for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) { - ElementType span = bbox[i].high - bbox[i].low; - if (span > max_span) { - max_span = span; - } - } - ElementType max_spread = -1; - cutfeat = 0; - for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { - ElementType span = bbox[i].high-bbox[i].low; - if (span > (1 - EPS) * max_span) { - ElementType min_elem, max_elem; - computeMinMax(obj, ind, count, i, min_elem, max_elem); - ElementType spread = max_elem - min_elem;; - if (spread > max_spread) { - cutfeat = i; - max_spread = spread; - } - } - } - // split in the middle - DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; - ElementType min_elem, max_elem; - computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem); - - if (split_val < min_elem) cutval = min_elem; - else if (split_val > max_elem) cutval = max_elem; - else cutval = split_val; - - IndexType lim1, lim2; - planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); - - if (lim1 > count / 2) index = lim1; - else if (lim2 < count / 2) index = lim2; - else index = count/2; - } - - /** - * Subdivide the list of points by a plane perpendicular on axe corresponding - * to the 'cutfeat' dimension at 'cutval' position. - * - * On return: - * dataset[ind[0..lim1-1]][cutfeat]cutval - */ - void planeSplit(Derived &obj, IndexType* ind, const IndexType count, int cutfeat, DistanceType &cutval, IndexType& lim1, IndexType& lim2) - { - /* Move vector indices for left subtree to front of list. */ - IndexType left = 0; - IndexType right = count-1; - for (;; ) { - while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval) ++left; - while (right && left <= right && dataset_get(obj, ind[right], cutfeat) >= cutval) --right; - if (left > right || !right) break; // "!right" was added to support unsigned Index types - std::swap(ind[left], ind[right]); - ++left; - --right; - } - /* If either list is empty, it means that all remaining features - * are identical. Split in the middle to maintain a balanced tree. - */ - lim1 = left; - right = count-1; - for (;; ) { - while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval) ++left; - while (right && left <= right && dataset_get(obj, ind[right], cutfeat) > cutval) --right; - if (left > right || !right) break; // "!right" was added to support unsigned Index types - std::swap(ind[left], ind[right]); - ++left; - --right; - } - lim2 = left; - } - - DistanceType computeInitialDistances(const Derived &obj, const ElementType* vec, distance_vector_t& dists) const - { - assert(vec); - DistanceType distsq = DistanceType(); - - for (int i = 0; i < (DIM>0 ? DIM : obj.dim); ++i) { - if (vec[i] < obj.root_bbox[i].low) { - dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i); - distsq += dists[i]; - } - if (vec[i] > obj.root_bbox[i].high) { - dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i); - distsq += dists[i]; - } - } - return distsq; - } - - void save_tree(Derived &obj, FILE* stream, NodePtr tree) - { - save_value(stream, *tree); - if (tree->child1 != NULL) { - save_tree(obj, stream, tree->child1); - } - if (tree->child2 != NULL) { - save_tree(obj, stream, tree->child2); - } - } - - - void load_tree(Derived &obj, FILE* stream, NodePtr& tree) - { - tree = obj.pool.template allocate(); - load_value(stream, *tree); - if (tree->child1 != NULL) { - load_tree(obj, stream, tree->child1); - } - if (tree->child2 != NULL) { - load_tree(obj, stream, tree->child2); - } - } - - /** Stores the index in a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when loading the index object it must be constructed associated to the same source of data points used while building it. - * See the example: examples/saveload_example.cpp - * \sa loadIndex */ - void saveIndex_(Derived &obj, FILE* stream) - { - save_value(stream, obj.m_size); - save_value(stream, obj.dim); - save_value(stream, obj.root_bbox); - save_value(stream, obj.m_leaf_max_size); - save_value(stream, obj.vind); - save_tree(obj, stream, obj.root_node); - } - - /** Loads a previous index from a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the index object must be constructed associated to the same source of data points used while building the index. - * See the example: examples/saveload_example.cpp - * \sa loadIndex */ - void loadIndex_(Derived &obj, FILE* stream) - { - load_value(stream, obj.m_size); - load_value(stream, obj.dim); - load_value(stream, obj.root_bbox); - load_value(stream, obj.m_leaf_max_size); - load_value(stream, obj.vind); - load_tree(obj, stream, obj.root_node); - } - - }; - - - /** @addtogroup kdtrees_grp KD-tree classes and adaptors - * @{ */ - - /** kd-tree static index - * - * Contains the k-d trees and other information for indexing a set of points - * for nearest-neighbor matching. - * - * The class "DatasetAdaptor" must provide the following interface (can be non-virtual, inlined methods): - * - * \code - * // Must return the number of data poins - * inline size_t kdtree_get_point_count() const { ... } - * - * - * // Must return the dim'th component of the idx'th point in the class: - * inline T kdtree_get_pt(const size_t idx, int dim) const { ... } - * - * // Optional bounding-box computation: return false to default to a standard bbox computation loop. - * // Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again. - * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds) - * template - * bool kdtree_get_bbox(BBOX &bb) const - * { - * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits - * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits - * ... - * return true; - * } - * - * \endcode - * - * \tparam DatasetAdaptor The user-provided adaptor (see comments above). - * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. - * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points) - * \tparam IndexType Will be typically size_t or int - */ - template - class KDTreeSingleIndexAdaptor : public KDTreeBaseClass, Distance, DatasetAdaptor, DIM, IndexType> - { - public: - /** Deleted copy constructor*/ - KDTreeSingleIndexAdaptor(const KDTreeSingleIndexAdaptor&) = delete; - - /** - * The dataset used by this index - */ - const DatasetAdaptor &dataset; //!< The source of our data - - const KDTreeSingleIndexAdaptorParams index_params; - - Distance distance; - - typedef typename nanoflann::KDTreeBaseClass, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef; - - typedef typename BaseClassRef::ElementType ElementType; - typedef typename BaseClassRef::DistanceType DistanceType; - - typedef typename BaseClassRef::Node Node; - typedef Node* NodePtr; - - typedef typename BaseClassRef::Interval Interval; - /** Define "BoundingBox" as a fixed-size or variable-size container depending on "DIM" */ - typedef typename BaseClassRef::BoundingBox BoundingBox; - - /** Define "distance_vector_t" as a fixed-size or variable-size container depending on "DIM" */ - typedef typename BaseClassRef::distance_vector_t distance_vector_t; - - /** - * KDTree constructor - * - * Refer to docs in README.md or online in https://github.com/jlblancoc/nanoflann - * - * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 for 3D points) - * is determined by means of: - * - The \a DIM template parameter if >0 (highest priority) - * - Otherwise, the \a dimensionality parameter of this constructor. - * - * @param inputData Dataset with the input features - * @param params Basically, the maximum leaf node size - */ - KDTreeSingleIndexAdaptor(const int dimensionality, const DatasetAdaptor& inputData, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams() ) : - dataset(inputData), index_params(params), distance(inputData) - { - BaseClassRef::root_node = NULL; - BaseClassRef::m_size = dataset.kdtree_get_point_count(); - BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; - BaseClassRef::dim = dimensionality; - if (DIM>0) BaseClassRef::dim = DIM; - BaseClassRef::m_leaf_max_size = params.leaf_max_size; - - // Create a permutable array of indices to the input vectors. - init_vind(); - } - - /** - * Builds the index - */ - void buildIndex() - { - BaseClassRef::m_size = dataset.kdtree_get_point_count(); - BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; - init_vind(); - this->freeIndex(*this); - BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; - if(BaseClassRef::m_size == 0) return; - computeBoundingBox(BaseClassRef::root_bbox); - BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox ); // construct the tree - } - - /** \name Query methods - * @{ */ - - /** - * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored inside - * the result object. - * - * Params: - * result = the result object in which the indices of the nearest-neighbors are stored - * vec = the vector for which to search the nearest neighbors - * - * \tparam RESULTSET Should be any ResultSet - * \return True if the requested neighbors could be found. - * \sa knnSearch, radiusSearch - */ - template - bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const - { - assert(vec); - if (this->size(*this) == 0) - return false; - if (!BaseClassRef::root_node) - throw std::runtime_error("[nanoflann] findNeighbors() called before building the index."); - float epsError = 1 + searchParams.eps; - - distance_vector_t dists; // fixed or variable-sized container (depending on DIM) - dists.assign((DIM > 0 ? DIM : BaseClassRef::dim), 0); // Fill it with zeros. - DistanceType distsq = this->computeInitialDistances(*this, vec, dists); - searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError); // "count_leaf" parameter removed since was neither used nor returned to the user. - return result.full(); - } - - /** - * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. Their indices are stored inside - * the result object. - * \sa radiusSearch, findNeighbors - * \note nChecks_IGNORED is ignored but kept for compatibility with the original FLANN interface. - * \return Number `N` of valid points in the result set. Only the first `N` entries in `out_indices` and `out_distances_sq` will be valid. - * Return may be less than `num_closest` only if the number of elements in the tree is less than `num_closest`. - */ - size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const - { - nanoflann::KNNResultSet resultSet(num_closest); - resultSet.init(out_indices, out_distances_sq); - this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); - return resultSet.size(); - } - - /** - * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. - * The output is given as a vector of pairs, of which the first element is a point index and the second the corresponding distance. - * Previous contents of \a IndicesDists are cleared. - * - * If searchParams.sorted==true, the output list is sorted by ascending distances. - * - * For a better performance, it is advisable to do a .reserve() on the vector if you have any wild guess about the number of expected matches. - * - * \sa knnSearch, findNeighbors, radiusSearchCustomCallback - * \return The number of points within the given radius (i.e. indices.size() or dists.size() ) - */ - size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector >& IndicesDists, const SearchParams& searchParams) const - { - RadiusResultSet resultSet(radius, IndicesDists); - const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); - if (searchParams.sorted) - std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter() ); - return nFound; - } - - /** - * Just like radiusSearch() but with a custom callback class for each point found in the radius of the query. - * See the source of RadiusResultSet<> as a start point for your own classes. - * \sa radiusSearch - */ - template - size_t radiusSearchCustomCallback(const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams& searchParams = SearchParams() ) const - { - this->findNeighbors(resultSet, query_point, searchParams); - return resultSet.size(); - } - - /** @} */ - - public: - /** Make sure the auxiliary list \a vind has the same size than the current dataset, and re-generate if size has changed. */ - void init_vind() - { - // Create a permutable array of indices to the input vectors. - BaseClassRef::m_size = dataset.kdtree_get_point_count(); - if (BaseClassRef::vind.size() != BaseClassRef::m_size) BaseClassRef::vind.resize(BaseClassRef::m_size); - for (size_t i = 0; i < BaseClassRef::m_size; i++) BaseClassRef::vind[i] = i; - } - - void computeBoundingBox(BoundingBox& bbox) - { - bbox.resize((DIM > 0 ? DIM : BaseClassRef::dim)); - if (dataset.kdtree_get_bbox(bbox)) - { - // Done! It was implemented in derived class - } - else - { - const size_t N = dataset.kdtree_get_point_count(); - if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but no data points found."); - for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { - bbox[i].low = - bbox[i].high = this->dataset_get(*this, 0, i); - } - for (size_t k = 1; k < N; ++k) { - for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { - if (this->dataset_get(*this, k, i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, k, i); - if (this->dataset_get(*this, k, i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, k, i); - } - } - } - } - - /** - * Performs an exact search in the tree starting from a node. - * \tparam RESULTSET Should be any ResultSet - * \return true if the search should be continued, false if the results are sufficient - */ - template - bool searchLevel(RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq, - distance_vector_t& dists, const float epsError) const - { - /* If this is a leaf node, then do check and return. */ - if ((node->child1 == NULL) && (node->child2 == NULL)) { - //count_leaf += (node->lr.right-node->lr.left); // Removed since was neither used nor returned to the user. - DistanceType worst_dist = result_set.worstDist(); - for (IndexType i = node->node_type.lr.left; inode_type.lr.right; ++i) { - const IndexType index = BaseClassRef::vind[i];// reorder... : i; - DistanceType dist = distance.evalMetric(vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); - if (dist < worst_dist) { - if(!result_set.addPoint(dist, BaseClassRef::vind[i])) { - // the resultset doesn't want to receive any more points, we're done searching! - return false; - } - } - } - return true; - } - - /* Which child branch should be taken first? */ - int idx = node->node_type.sub.divfeat; - ElementType val = vec[idx]; - DistanceType diff1 = val - node->node_type.sub.divlow; - DistanceType diff2 = val - node->node_type.sub.divhigh; - - NodePtr bestChild; - NodePtr otherChild; - DistanceType cut_dist; - if ((diff1 + diff2) < 0) { - bestChild = node->child1; - otherChild = node->child2; - cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); - } - else { - bestChild = node->child2; - otherChild = node->child1; - cut_dist = distance.accum_dist( val, node->node_type.sub.divlow, idx); - } - - /* Call recursively to search next level down. */ - if(!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) { - // the resultset doesn't want to receive any more points, we're done searching! - return false; - } - - DistanceType dst = dists[idx]; - mindistsq = mindistsq + cut_dist - dst; - dists[idx] = cut_dist; - if (mindistsq*epsError <= result_set.worstDist()) { - if(!searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError)) { - // the resultset doesn't want to receive any more points, we're done searching! - return false; - } - } - dists[idx] = dst; - return true; - } - - public: - /** Stores the index in a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when loading the index object it must be constructed associated to the same source of data points used while building it. - * See the example: examples/saveload_example.cpp - * \sa loadIndex */ - void saveIndex(FILE* stream) - { - this->saveIndex_(*this, stream); - } - - /** Loads a previous index from a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the index object must be constructed associated to the same source of data points used while building the index. - * See the example: examples/saveload_example.cpp - * \sa loadIndex */ - void loadIndex(FILE* stream) - { - this->loadIndex_(*this, stream); - } - - }; // class KDTree - - - /** kd-tree dynamic index - * - * Contains the k-d trees and other information for indexing a set of points - * for nearest-neighbor matching. - * - * The class "DatasetAdaptor" must provide the following interface (can be non-virtual, inlined methods): - * - * \code - * // Must return the number of data poins - * inline size_t kdtree_get_point_count() const { ... } - * - * // Must return the dim'th component of the idx'th point in the class: - * inline T kdtree_get_pt(const size_t idx, int dim) const { ... } - * - * // Optional bounding-box computation: return false to default to a standard bbox computation loop. - * // Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again. - * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds) - * template - * bool kdtree_get_bbox(BBOX &bb) const - * { - * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits - * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits - * ... - * return true; - * } - * - * \endcode - * - * \tparam DatasetAdaptor The user-provided adaptor (see comments above). - * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. - * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points) - * \tparam IndexType Will be typically size_t or int - */ - template - class KDTreeSingleIndexDynamicAdaptor_ : public KDTreeBaseClass, Distance, DatasetAdaptor, DIM, IndexType> - { - public: - - /** - * The dataset used by this index - */ - const DatasetAdaptor &dataset; //!< The source of our data - - KDTreeSingleIndexAdaptorParams index_params; - - std::vector &treeIndex; - - Distance distance; - - typedef typename nanoflann::KDTreeBaseClass, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef; - - typedef typename BaseClassRef::ElementType ElementType; - typedef typename BaseClassRef::DistanceType DistanceType; - - typedef typename BaseClassRef::Node Node; - typedef Node* NodePtr; - - typedef typename BaseClassRef::Interval Interval; - /** Define "BoundingBox" as a fixed-size or variable-size container depending on "DIM" */ - typedef typename BaseClassRef::BoundingBox BoundingBox; - - /** Define "distance_vector_t" as a fixed-size or variable-size container depending on "DIM" */ - typedef typename BaseClassRef::distance_vector_t distance_vector_t; - - /** - * KDTree constructor - * - * Refer to docs in README.md or online in https://github.com/jlblancoc/nanoflann - * - * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 for 3D points) - * is determined by means of: - * - The \a DIM template parameter if >0 (highest priority) - * - Otherwise, the \a dimensionality parameter of this constructor. - * - * @param inputData Dataset with the input features - * @param params Basically, the maximum leaf node size - */ - KDTreeSingleIndexDynamicAdaptor_(const int dimensionality, const DatasetAdaptor& inputData, std::vector& treeIndex_, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams()) : - dataset(inputData), index_params(params), treeIndex(treeIndex_), distance(inputData) - { - BaseClassRef::root_node = NULL; - BaseClassRef::m_size = 0; - BaseClassRef::m_size_at_index_build = 0; - BaseClassRef::dim = dimensionality; - if (DIM>0) BaseClassRef::dim = DIM; - BaseClassRef::m_leaf_max_size = params.leaf_max_size; - } - - - /** Assignment operator definiton */ - KDTreeSingleIndexDynamicAdaptor_ operator=( const KDTreeSingleIndexDynamicAdaptor_& rhs ) { - KDTreeSingleIndexDynamicAdaptor_ tmp( rhs ); - std::swap( BaseClassRef::vind, tmp.BaseClassRef::vind ); - std::swap( BaseClassRef::m_leaf_max_size, tmp.BaseClassRef::m_leaf_max_size ); - std::swap( index_params, tmp.index_params ); - std::swap( treeIndex, tmp.treeIndex ); - std::swap( BaseClassRef::m_size, tmp.BaseClassRef::m_size ); - std::swap( BaseClassRef::m_size_at_index_build, tmp.BaseClassRef::m_size_at_index_build ); - std::swap( BaseClassRef::root_node, tmp.BaseClassRef::root_node ); - std::swap( BaseClassRef::root_bbox, tmp.BaseClassRef::root_bbox ); - std::swap( BaseClassRef::pool, tmp.BaseClassRef::pool ); - return *this; - } - - /** - * Builds the index - */ - void buildIndex() - { - BaseClassRef::m_size = BaseClassRef::vind.size(); - this->freeIndex(*this); - BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; - if(BaseClassRef::m_size == 0) return; - computeBoundingBox(BaseClassRef::root_bbox); - BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox ); // construct the tree - } - - /** \name Query methods - * @{ */ - - /** - * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored inside - * the result object. - * - * Params: - * result = the result object in which the indices of the nearest-neighbors are stored - * vec = the vector for which to search the nearest neighbors - * - * \tparam RESULTSET Should be any ResultSet - * \return True if the requested neighbors could be found. - * \sa knnSearch, radiusSearch - */ - template - bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const - { - assert(vec); - if (this->size(*this) == 0) - return false; - if (!BaseClassRef::root_node) - return false; - float epsError = 1 + searchParams.eps; - - distance_vector_t dists; // fixed or variable-sized container (depending on DIM) - dists.assign((DIM > 0 ? DIM : BaseClassRef::dim) , 0); // Fill it with zeros. - DistanceType distsq = this->computeInitialDistances(*this, vec, dists); - searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError); // "count_leaf" parameter removed since was neither used nor returned to the user. - return result.full(); - } - - /** - * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. Their indices are stored inside - * the result object. - * \sa radiusSearch, findNeighbors - * \note nChecks_IGNORED is ignored but kept for compatibility with the original FLANN interface. - * \return Number `N` of valid points in the result set. Only the first `N` entries in `out_indices` and `out_distances_sq` will be valid. - * Return may be less than `num_closest` only if the number of elements in the tree is less than `num_closest`. - */ - size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const - { - nanoflann::KNNResultSet resultSet(num_closest); - resultSet.init(out_indices, out_distances_sq); - this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); - return resultSet.size(); - } - - /** - * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. - * The output is given as a vector of pairs, of which the first element is a point index and the second the corresponding distance. - * Previous contents of \a IndicesDists are cleared. - * - * If searchParams.sorted==true, the output list is sorted by ascending distances. - * - * For a better performance, it is advisable to do a .reserve() on the vector if you have any wild guess about the number of expected matches. - * - * \sa knnSearch, findNeighbors, radiusSearchCustomCallback - * \return The number of points within the given radius (i.e. indices.size() or dists.size() ) - */ - size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector >& IndicesDists, const SearchParams& searchParams) const - { - RadiusResultSet resultSet(radius, IndicesDists); - const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); - if (searchParams.sorted) - std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter() ); - return nFound; - } - - /** - * Just like radiusSearch() but with a custom callback class for each point found in the radius of the query. - * See the source of RadiusResultSet<> as a start point for your own classes. - * \sa radiusSearch - */ - template - size_t radiusSearchCustomCallback(const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams& searchParams = SearchParams() ) const - { - this->findNeighbors(resultSet, query_point, searchParams); - return resultSet.size(); - } - - /** @} */ - - public: - - - void computeBoundingBox(BoundingBox& bbox) - { - bbox.resize((DIM > 0 ? DIM : BaseClassRef::dim)); - if (dataset.kdtree_get_bbox(bbox)) - { - // Done! It was implemented in derived class - } - else - { - const size_t N = BaseClassRef::m_size; - if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but no data points found."); - for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { - bbox[i].low = - bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[0], i); - } - for (size_t k = 1; k < N; ++k) { - for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { - if (this->dataset_get(*this, BaseClassRef::vind[k], i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, BaseClassRef::vind[k], i); - if (this->dataset_get(*this, BaseClassRef::vind[k], i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[k], i); - } - } - } - } - - /** - * Performs an exact search in the tree starting from a node. - * \tparam RESULTSET Should be any ResultSet - */ - template - void searchLevel(RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq, - distance_vector_t& dists, const float epsError) const - { - /* If this is a leaf node, then do check and return. */ - if ((node->child1 == NULL) && (node->child2 == NULL)) { - //count_leaf += (node->lr.right-node->lr.left); // Removed since was neither used nor returned to the user. - DistanceType worst_dist = result_set.worstDist(); - for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { - const IndexType index = BaseClassRef::vind[i];// reorder... : i; - if(treeIndex[index] == -1) - continue; - DistanceType dist = distance.evalMetric(vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); - if (distnode_type.sub.divfeat; - ElementType val = vec[idx]; - DistanceType diff1 = val - node->node_type.sub.divlow; - DistanceType diff2 = val - node->node_type.sub.divhigh; - - NodePtr bestChild; - NodePtr otherChild; - DistanceType cut_dist; - if ((diff1 + diff2) < 0) { - bestChild = node->child1; - otherChild = node->child2; - cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); - } - else { - bestChild = node->child2; - otherChild = node->child1; - cut_dist = distance.accum_dist( val, node->node_type.sub.divlow, idx); - } - - /* Call recursively to search next level down. */ - searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError); - - DistanceType dst = dists[idx]; - mindistsq = mindistsq + cut_dist - dst; - dists[idx] = cut_dist; - if (mindistsq*epsError <= result_set.worstDist()) { - searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError); - } - dists[idx] = dst; - } - - public: - /** Stores the index in a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when loading the index object it must be constructed associated to the same source of data points used while building it. - * See the example: examples/saveload_example.cpp - * \sa loadIndex */ - void saveIndex(FILE* stream) - { - this->saveIndex_(*this, stream); - } - - /** Loads a previous index from a binary file. - * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the index object must be constructed associated to the same source of data points used while building the index. - * See the example: examples/saveload_example.cpp - * \sa loadIndex */ - void loadIndex(FILE* stream) - { - this->loadIndex_(*this, stream); - } - - }; - - - /** kd-tree dynaimic index - * - * class to create multiple static index and merge their results to behave as single dynamic index as proposed in Logarithmic Approach. - * - * Example of usage: - * examples/dynamic_pointcloud_example.cpp - * - * \tparam DatasetAdaptor The user-provided adaptor (see comments above). - * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. - * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points) - * \tparam IndexType Will be typically size_t or int - */ - template - class KDTreeSingleIndexDynamicAdaptor - { - public: - typedef typename Distance::ElementType ElementType; - typedef typename Distance::DistanceType DistanceType; - protected: - - size_t m_leaf_max_size; - size_t treeCount; - size_t pointCount; - - /** - * The dataset used by this index - */ - const DatasetAdaptor &dataset; //!< The source of our data - - std::vector treeIndex; //!< treeIndex[idx] is the index of tree in which point at idx is stored. treeIndex[idx]=-1 means that point has been removed. - - KDTreeSingleIndexAdaptorParams index_params; - - int dim; //!< Dimensionality of each data point - - typedef KDTreeSingleIndexDynamicAdaptor_ index_container_t; - std::vector index; - - public: - /** Get a const ref to the internal list of indices; the number of indices is adapted dynamically as - * the dataset grows in size. */ - const std::vector & getAllIndices() const { - return index; - } - - private: - /** finds position of least significant unset bit */ - int First0Bit(IndexType num) - { - int pos = 0; - while(num&1) - { - num = num>>1; - pos++; - } - return pos; - } - - /** Creates multiple empty trees to handle dynamic support */ - void init() - { - typedef KDTreeSingleIndexDynamicAdaptor_ my_kd_tree_t; - std::vector index_(treeCount, my_kd_tree_t(dim /*dim*/, dataset, treeIndex, index_params)); - index=index_; - } - - public: - - Distance distance; - - /** - * KDTree constructor - * - * Refer to docs in README.md or online in https://github.com/jlblancoc/nanoflann - * - * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 for 3D points) - * is determined by means of: - * - The \a DIM template parameter if >0 (highest priority) - * - Otherwise, the \a dimensionality parameter of this constructor. - * - * @param inputData Dataset with the input features - * @param params Basically, the maximum leaf node size - */ - KDTreeSingleIndexDynamicAdaptor(const int dimensionality, const DatasetAdaptor& inputData, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams() , const size_t maximumPointCount = 1000000000U) : - dataset(inputData), index_params(params), distance(inputData) - { - treeCount = std::log2(maximumPointCount); - pointCount = 0U; - dim = dimensionality; - treeIndex.clear(); - if (DIM > 0) dim = DIM; - m_leaf_max_size = params.leaf_max_size; - init(); - int num_initial_points = dataset.kdtree_get_point_count(); - if (num_initial_points > 0) { - addPoints(0, num_initial_points - 1); - } - } - - /** Deleted copy constructor*/ - KDTreeSingleIndexDynamicAdaptor(const KDTreeSingleIndexDynamicAdaptor&) = delete; - - - /** Add points to the set, Inserts all points from [start, end] */ - void addPoints(IndexType start, IndexType end) - { - int count = end - start + 1; - treeIndex.resize(treeIndex.size() + count); - for(IndexType idx = start; idx <= end; idx++) { - int pos = First0Bit(pointCount); - index[pos].vind.clear(); - treeIndex[pointCount]=pos; - for(int i = 0; i < pos; i++) { - for(int j = 0; j < static_cast(index[i].vind.size()); j++) { - index[pos].vind.push_back(index[i].vind[j]); - treeIndex[index[i].vind[j]] = pos; - } - index[i].vind.clear(); - index[i].freeIndex(index[i]); - } - index[pos].vind.push_back(idx); - index[pos].buildIndex(); - pointCount++; - } - } - - /** Remove a point from the set (Lazy Deletion) */ - void removePoint(size_t idx) - { - if(idx >= pointCount) - return; - treeIndex[idx] = -1; - } - - /** - * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored inside - * the result object. - * - * Params: - * result = the result object in which the indices of the nearest-neighbors are stored - * vec = the vector for which to search the nearest neighbors - * - * \tparam RESULTSET Should be any ResultSet - * \return True if the requested neighbors could be found. - * \sa knnSearch, radiusSearch - */ - template - bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const - { - for(size_t i = 0; i < treeCount; i++) - { - index[i].findNeighbors(result, &vec[0], searchParams); - } - return result.full(); - } - - }; - - /** An L2-metric KD-tree adaptor for working with data directly stored in an Eigen Matrix, without duplicating the data storage. - * Each row in the matrix represents a point in the state space. - * - * Example of usage: - * \code - * Eigen::Matrix mat; - * // Fill out "mat"... - * - * typedef KDTreeEigenMatrixAdaptor< Eigen::Matrix > my_kd_tree_t; - * const int max_leaf = 10; - * my_kd_tree_t mat_index(mat, max_leaf ); - * mat_index.index->buildIndex(); - * mat_index.index->... - * \endcode - * - * \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality for the points in the data set, allowing more compiler optimizations. - * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. - */ - template - struct KDTreeEigenMatrixAdaptor - { - typedef KDTreeEigenMatrixAdaptor self_t; - typedef typename MatrixType::Scalar num_t; - typedef typename MatrixType::Index IndexType; - typedef typename Distance::template traits::distance_t metric_t; - typedef KDTreeSingleIndexAdaptor< metric_t,self_t, MatrixType::ColsAtCompileTime,IndexType> index_t; - - index_t* index; //! The kd-tree index for the user to call its methods as usual with any other FLANN index. - - /// Constructor: takes a const ref to the matrix object with the data points - KDTreeEigenMatrixAdaptor(const MatrixType &mat, const int leaf_max_size = 10) : m_data_matrix(mat) - { - const IndexType dims = mat.cols(); - index = new index_t( dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size ) ); - index->buildIndex(); - } - public: - /** Deleted copy constructor */ - KDTreeEigenMatrixAdaptor(const self_t&) = delete; - - ~KDTreeEigenMatrixAdaptor() { - delete index; - } - - const MatrixType &m_data_matrix; - - /** Query for the \a num_closest closest points to a given point (entered as query_point[0:dim-1]). - * Note that this is a short-cut method for index->findNeighbors(). - * The user can also call index->... methods as desired. - * \note nChecks_IGNORED is ignored but kept for compatibility with the original FLANN interface. - */ - inline void query(const num_t *query_point, const size_t num_closest, IndexType *out_indices, num_t *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const - { - nanoflann::KNNResultSet resultSet(num_closest); - resultSet.init(out_indices, out_distances_sq); - index->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); - } - - /** @name Interface expected by KDTreeSingleIndexAdaptor - * @{ */ - - const self_t & derived() const { - return *this; - } - self_t & derived() { - return *this; - } - - // Must return the number of data points - inline size_t kdtree_get_point_count() const { - return m_data_matrix.rows(); - } - - // Returns the dim'th component of the idx'th point in the class: - inline num_t kdtree_get_pt(const IndexType idx, int dim) const { - return m_data_matrix.coeff(idx, IndexType(dim)); - } - - // Optional bounding-box computation: return false to default to a standard bbox computation loop. - // Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again. - // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds) - template - bool kdtree_get_bbox(BBOX& /*bb*/) const { - return false; - } - - /** @} */ - - }; // end of KDTreeEigenMatrixAdaptor - /** @} */ + } + } + } + + /** + * Performs an exact search in the tree starting from a node. + * \tparam RESULTSET Should be any ResultSet + * \return true if the search should be continued, false if the results are + * sufficient + */ + template + bool searchLevel(RESULTSET &result_set, const ElementType *vec, + const NodePtr node, DistanceType mindistsq, + distance_vector_t &dists, const float epsError) const { + /* If this is a leaf node, then do check and return. */ + if ((node->child1 == NULL) && (node->child2 == NULL)) { + // count_leaf += (node->lr.right-node->lr.left); // Removed since was + // neither used nor returned to the user. + DistanceType worst_dist = result_set.worstDist(); + for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; + ++i) { + const IndexType index = BaseClassRef::vind[i]; // reorder... : i; + DistanceType dist = distance.evalMetric( + vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); + if (dist < worst_dist) { + if (!result_set.addPoint(dist, BaseClassRef::vind[i])) { + // the resultset doesn't want to receive any more points, we're done + // searching! + return false; + } + } + } + return true; + } + + /* Which child branch should be taken first? */ + int idx = node->node_type.sub.divfeat; + ElementType val = vec[idx]; + DistanceType diff1 = val - node->node_type.sub.divlow; + DistanceType diff2 = val - node->node_type.sub.divhigh; + + NodePtr bestChild; + NodePtr otherChild; + DistanceType cut_dist; + if ((diff1 + diff2) < 0) { + bestChild = node->child1; + otherChild = node->child2; + cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); + } else { + bestChild = node->child2; + otherChild = node->child1; + cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx); + } + + /* Call recursively to search next level down. */ + if (!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) { + // the resultset doesn't want to receive any more points, we're done + // searching! + return false; + } + + DistanceType dst = dists[idx]; + mindistsq = mindistsq + cut_dist - dst; + dists[idx] = cut_dist; + if (mindistsq * epsError <= result_set.worstDist()) { + if (!searchLevel(result_set, vec, otherChild, mindistsq, dists, + epsError)) { + // the resultset doesn't want to receive any more points, we're done + // searching! + return false; + } + } + dists[idx] = dst; + return true; + } + +public: + /** Stores the index in a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when + * loading the index object it must be constructed associated to the same + * source of data points used while building it. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); } + + /** Loads a previous index from a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the + * index object must be constructed associated to the same source of data + * points used while building the index. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); } + +}; // class KDTree + +/** kd-tree dynamic index + * + * Contains the k-d trees and other information for indexing a set of points + * for nearest-neighbor matching. + * + * The class "DatasetAdaptor" must provide the following interface (can be + * non-virtual, inlined methods): + * + * \code + * // Must return the number of data poins + * inline size_t kdtree_get_point_count() const { ... } + * + * // Must return the dim'th component of the idx'th point in the class: + * inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... } + * + * // Optional bounding-box computation: return false to default to a standard + * bbox computation loop. + * // Return true if the BBOX was already computed by the class and returned + * in "bb" so it can be avoided to redo it again. + * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 + * for point clouds) template bool kdtree_get_bbox(BBOX &bb) const + * { + * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits + * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits + * ... + * return true; + * } + * + * \endcode + * + * \tparam DatasetAdaptor The user-provided adaptor (see comments above). + * \tparam Distance The distance metric to use: nanoflann::metric_L1, + * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM + * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will + * be typically size_t or int + */ +template +class KDTreeSingleIndexDynamicAdaptor_ + : public KDTreeBaseClass, + Distance, DatasetAdaptor, DIM, IndexType> { +public: + /** + * The dataset used by this index + */ + const DatasetAdaptor &dataset; //!< The source of our data + + KDTreeSingleIndexAdaptorParams index_params; + + std::vector &treeIndex; + + Distance distance; + + typedef typename nanoflann::KDTreeBaseClass< + nanoflann::KDTreeSingleIndexDynamicAdaptor_, + Distance, DatasetAdaptor, DIM, IndexType> + BaseClassRef; + + typedef typename BaseClassRef::ElementType ElementType; + typedef typename BaseClassRef::DistanceType DistanceType; + + typedef typename BaseClassRef::Node Node; + typedef Node *NodePtr; + + typedef typename BaseClassRef::Interval Interval; + /** Define "BoundingBox" as a fixed-size or variable-size container depending + * on "DIM" */ + typedef typename BaseClassRef::BoundingBox BoundingBox; + + /** Define "distance_vector_t" as a fixed-size or variable-size container + * depending on "DIM" */ + typedef typename BaseClassRef::distance_vector_t distance_vector_t; + + /** + * KDTree constructor + * + * Refer to docs in README.md or online in + * https://github.com/jlblancoc/nanoflann + * + * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 + * for 3D points) is determined by means of: + * - The \a DIM template parameter if >0 (highest priority) + * - Otherwise, the \a dimensionality parameter of this constructor. + * + * @param inputData Dataset with the input features + * @param params Basically, the maximum leaf node size + */ + KDTreeSingleIndexDynamicAdaptor_( + const int dimensionality, const DatasetAdaptor &inputData, + std::vector &treeIndex_, + const KDTreeSingleIndexAdaptorParams ¶ms = + KDTreeSingleIndexAdaptorParams()) + : dataset(inputData), index_params(params), treeIndex(treeIndex_), + distance(inputData) { + BaseClassRef::root_node = NULL; + BaseClassRef::m_size = 0; + BaseClassRef::m_size_at_index_build = 0; + BaseClassRef::dim = dimensionality; + if (DIM > 0) + BaseClassRef::dim = DIM; + BaseClassRef::m_leaf_max_size = params.leaf_max_size; + } + + /** Assignment operator definiton */ + KDTreeSingleIndexDynamicAdaptor_ + operator=(const KDTreeSingleIndexDynamicAdaptor_ &rhs) { + KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); + std::swap(BaseClassRef::vind, tmp.BaseClassRef::vind); + std::swap(BaseClassRef::m_leaf_max_size, tmp.BaseClassRef::m_leaf_max_size); + std::swap(index_params, tmp.index_params); + std::swap(treeIndex, tmp.treeIndex); + std::swap(BaseClassRef::m_size, tmp.BaseClassRef::m_size); + std::swap(BaseClassRef::m_size_at_index_build, + tmp.BaseClassRef::m_size_at_index_build); + std::swap(BaseClassRef::root_node, tmp.BaseClassRef::root_node); + std::swap(BaseClassRef::root_bbox, tmp.BaseClassRef::root_bbox); + std::swap(BaseClassRef::pool, tmp.BaseClassRef::pool); + return *this; + } + + /** + * Builds the index + */ + void buildIndex() { + BaseClassRef::m_size = BaseClassRef::vind.size(); + this->freeIndex(*this); + BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; + if (BaseClassRef::m_size == 0) + return; + computeBoundingBox(BaseClassRef::root_bbox); + BaseClassRef::root_node = + this->divideTree(*this, 0, BaseClassRef::m_size, + BaseClassRef::root_bbox); // construct the tree + } + + /** \name Query methods + * @{ */ + + /** + * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored + * inside the result object. + * + * Params: + * result = the result object in which the indices of the + * nearest-neighbors are stored vec = the vector for which to search the + * nearest neighbors + * + * \tparam RESULTSET Should be any ResultSet + * \return True if the requested neighbors could be found. + * \sa knnSearch, radiusSearch + */ + template + bool findNeighbors(RESULTSET &result, const ElementType *vec, + const SearchParams &searchParams) const { + assert(vec); + if (this->size(*this) == 0) + return false; + if (!BaseClassRef::root_node) + return false; + float epsError = 1 + searchParams.eps; + + // fixed or variable-sized container (depending on DIM) + distance_vector_t dists; + // Fill it with zeros. + assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim), + static_cast(0)); + DistanceType distsq = this->computeInitialDistances(*this, vec, dists); + searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, + epsError); // "count_leaf" parameter removed since was neither + // used nor returned to the user. + return result.full(); + } + + /** + * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. + * Their indices are stored inside the result object. \sa radiusSearch, + * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility + * with the original FLANN interface. \return Number `N` of valid points in + * the result set. Only the first `N` entries in `out_indices` and + * `out_distances_sq` will be valid. Return may be less than `num_closest` + * only if the number of elements in the tree is less than `num_closest`. + */ + size_t knnSearch(const ElementType *query_point, const size_t num_closest, + IndexType *out_indices, DistanceType *out_distances_sq, + const int /* nChecks_IGNORED */ = 10) const { + nanoflann::KNNResultSet resultSet(num_closest); + resultSet.init(out_indices, out_distances_sq); + this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); + return resultSet.size(); + } + + /** + * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. + * The output is given as a vector of pairs, of which the first element is a + * point index and the second the corresponding distance. Previous contents of + * \a IndicesDists are cleared. + * + * If searchParams.sorted==true, the output list is sorted by ascending + * distances. + * + * For a better performance, it is advisable to do a .reserve() on the vector + * if you have any wild guess about the number of expected matches. + * + * \sa knnSearch, findNeighbors, radiusSearchCustomCallback + * \return The number of points within the given radius (i.e. indices.size() + * or dists.size() ) + */ + size_t + radiusSearch(const ElementType *query_point, const DistanceType &radius, + std::vector> &IndicesDists, + const SearchParams &searchParams) const { + RadiusResultSet resultSet(radius, IndicesDists); + const size_t nFound = + radiusSearchCustomCallback(query_point, resultSet, searchParams); + if (searchParams.sorted) + std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); + return nFound; + } + + /** + * Just like radiusSearch() but with a custom callback class for each point + * found in the radius of the query. See the source of RadiusResultSet<> as a + * start point for your own classes. \sa radiusSearch + */ + template + size_t radiusSearchCustomCallback( + const ElementType *query_point, SEARCH_CALLBACK &resultSet, + const SearchParams &searchParams = SearchParams()) const { + this->findNeighbors(resultSet, query_point, searchParams); + return resultSet.size(); + } + + /** @} */ + +public: + void computeBoundingBox(BoundingBox &bbox) { + resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim)); + + if (dataset.kdtree_get_bbox(bbox)) { + // Done! It was implemented in derived class + } else { + const size_t N = BaseClassRef::m_size; + if (!N) + throw std::runtime_error("[nanoflann] computeBoundingBox() called but " + "no data points found."); + for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { + bbox[i].low = bbox[i].high = + this->dataset_get(*this, BaseClassRef::vind[0], i); + } + for (size_t k = 1; k < N; ++k) { + for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { + if (this->dataset_get(*this, BaseClassRef::vind[k], i) < bbox[i].low) + bbox[i].low = this->dataset_get(*this, BaseClassRef::vind[k], i); + if (this->dataset_get(*this, BaseClassRef::vind[k], i) > bbox[i].high) + bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[k], i); + } + } + } + } + + /** + * Performs an exact search in the tree starting from a node. + * \tparam RESULTSET Should be any ResultSet + */ + template + void searchLevel(RESULTSET &result_set, const ElementType *vec, + const NodePtr node, DistanceType mindistsq, + distance_vector_t &dists, const float epsError) const { + /* If this is a leaf node, then do check and return. */ + if ((node->child1 == NULL) && (node->child2 == NULL)) { + // count_leaf += (node->lr.right-node->lr.left); // Removed since was + // neither used nor returned to the user. + DistanceType worst_dist = result_set.worstDist(); + for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; + ++i) { + const IndexType index = BaseClassRef::vind[i]; // reorder... : i; + if (treeIndex[index] == -1) + continue; + DistanceType dist = distance.evalMetric( + vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); + if (dist < worst_dist) { + if (!result_set.addPoint( + static_cast(dist), + static_cast( + BaseClassRef::vind[i]))) { + // the resultset doesn't want to receive any more points, we're done + // searching! + return; // false; + } + } + } + return; + } + + /* Which child branch should be taken first? */ + int idx = node->node_type.sub.divfeat; + ElementType val = vec[idx]; + DistanceType diff1 = val - node->node_type.sub.divlow; + DistanceType diff2 = val - node->node_type.sub.divhigh; + + NodePtr bestChild; + NodePtr otherChild; + DistanceType cut_dist; + if ((diff1 + diff2) < 0) { + bestChild = node->child1; + otherChild = node->child2; + cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); + } else { + bestChild = node->child2; + otherChild = node->child1; + cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx); + } + + /* Call recursively to search next level down. */ + searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError); + + DistanceType dst = dists[idx]; + mindistsq = mindistsq + cut_dist - dst; + dists[idx] = cut_dist; + if (mindistsq * epsError <= result_set.worstDist()) { + searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError); + } + dists[idx] = dst; + } + +public: + /** Stores the index in a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when + * loading the index object it must be constructed associated to the same + * source of data points used while building it. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); } + + /** Loads a previous index from a binary file. + * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the + * index object must be constructed associated to the same source of data + * points used while building the index. See the example: + * examples/saveload_example.cpp \sa loadIndex */ + void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); } +}; + +/** kd-tree dynaimic index + * + * class to create multiple static index and merge their results to behave as + * single dynamic index as proposed in Logarithmic Approach. + * + * Example of usage: + * examples/dynamic_pointcloud_example.cpp + * + * \tparam DatasetAdaptor The user-provided adaptor (see comments above). + * \tparam Distance The distance metric to use: nanoflann::metric_L1, + * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM + * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will + * be typically size_t or int + */ +template +class KDTreeSingleIndexDynamicAdaptor { +public: + typedef typename Distance::ElementType ElementType; + typedef typename Distance::DistanceType DistanceType; + +protected: + size_t m_leaf_max_size; + size_t treeCount; + size_t pointCount; + + /** + * The dataset used by this index + */ + const DatasetAdaptor &dataset; //!< The source of our data + + std::vector treeIndex; //!< treeIndex[idx] is the index of tree in which + //!< point at idx is stored. treeIndex[idx]=-1 + //!< means that point has been removed. + + KDTreeSingleIndexAdaptorParams index_params; + + int dim; //!< Dimensionality of each data point + + typedef KDTreeSingleIndexDynamicAdaptor_ + index_container_t; + std::vector index; + +public: + /** Get a const ref to the internal list of indices; the number of indices is + * adapted dynamically as the dataset grows in size. */ + const std::vector &getAllIndices() const { return index; } + +private: + /** finds position of least significant unset bit */ + int First0Bit(IndexType num) { + int pos = 0; + while (num & 1) { + num = num >> 1; + pos++; + } + return pos; + } + + /** Creates multiple empty trees to handle dynamic support */ + void init() { + typedef KDTreeSingleIndexDynamicAdaptor_ + my_kd_tree_t; + std::vector index_( + treeCount, my_kd_tree_t(dim /*dim*/, dataset, treeIndex, index_params)); + index = index_; + } + +public: + Distance distance; + + /** + * KDTree constructor + * + * Refer to docs in README.md or online in + * https://github.com/jlblancoc/nanoflann + * + * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 + * for 3D points) is determined by means of: + * - The \a DIM template parameter if >0 (highest priority) + * - Otherwise, the \a dimensionality parameter of this constructor. + * + * @param inputData Dataset with the input features + * @param params Basically, the maximum leaf node size + */ + KDTreeSingleIndexDynamicAdaptor(const int dimensionality, + const DatasetAdaptor &inputData, + const KDTreeSingleIndexAdaptorParams ¶ms = + KDTreeSingleIndexAdaptorParams(), + const size_t maximumPointCount = 1000000000U) + : dataset(inputData), index_params(params), distance(inputData) { + treeCount = static_cast(std::log2(maximumPointCount)); + pointCount = 0U; + dim = dimensionality; + treeIndex.clear(); + if (DIM > 0) + dim = DIM; + m_leaf_max_size = params.leaf_max_size; + init(); + const size_t num_initial_points = dataset.kdtree_get_point_count(); + if (num_initial_points > 0) { + addPoints(0, num_initial_points - 1); + } + } + + /** Deleted copy constructor*/ + KDTreeSingleIndexDynamicAdaptor( + const KDTreeSingleIndexDynamicAdaptor &) = delete; + + /** Add points to the set, Inserts all points from [start, end] */ + void addPoints(IndexType start, IndexType end) { + size_t count = end - start + 1; + treeIndex.resize(treeIndex.size() + count); + for (IndexType idx = start; idx <= end; idx++) { + int pos = First0Bit(pointCount); + index[pos].vind.clear(); + treeIndex[pointCount] = pos; + for (int i = 0; i < pos; i++) { + for (int j = 0; j < static_cast(index[i].vind.size()); j++) { + index[pos].vind.push_back(index[i].vind[j]); + if (treeIndex[index[i].vind[j]] != -1) + treeIndex[index[i].vind[j]] = pos; + } + index[i].vind.clear(); + index[i].freeIndex(index[i]); + } + index[pos].vind.push_back(idx); + index[pos].buildIndex(); + pointCount++; + } + } + + /** Remove a point from the set (Lazy Deletion) */ + void removePoint(size_t idx) { + if (idx >= pointCount) + return; + treeIndex[idx] = -1; + } + + /** + * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored + * inside the result object. + * + * Params: + * result = the result object in which the indices of the + * nearest-neighbors are stored vec = the vector for which to search the + * nearest neighbors + * + * \tparam RESULTSET Should be any ResultSet + * \return True if the requested neighbors could be found. + * \sa knnSearch, radiusSearch + */ + template + bool findNeighbors(RESULTSET &result, const ElementType *vec, + const SearchParams &searchParams) const { + for (size_t i = 0; i < treeCount; i++) { + index[i].findNeighbors(result, &vec[0], searchParams); + } + return result.full(); + } +}; + +/** An L2-metric KD-tree adaptor for working with data directly stored in an + * Eigen Matrix, without duplicating the data storage. Each row in the matrix + * represents a point in the state space. + * + * Example of usage: + * \code + * Eigen::Matrix mat; + * // Fill out "mat"... + * + * typedef KDTreeEigenMatrixAdaptor< Eigen::Matrix > + * my_kd_tree_t; const int max_leaf = 10; my_kd_tree_t mat_index(mat, max_leaf + * ); mat_index.index->buildIndex(); mat_index.index->... \endcode + * + * \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality + * for the points in the data set, allowing more compiler optimizations. \tparam + * Distance The distance metric to use: nanoflann::metric_L1, + * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. + */ +template +struct KDTreeEigenMatrixAdaptor { + typedef KDTreeEigenMatrixAdaptor self_t; + typedef typename MatrixType::Scalar num_t; + typedef typename MatrixType::Index IndexType; + typedef + typename Distance::template traits::distance_t metric_t; + typedef KDTreeSingleIndexAdaptor + index_t; + + index_t *index; //! The kd-tree index for the user to call its methods as + //! usual with any other FLANN index. + + /// Constructor: takes a const ref to the matrix object with the data points + KDTreeEigenMatrixAdaptor(const size_t dimensionality, + const std::reference_wrapper &mat, + const int leaf_max_size = 10) + : m_data_matrix(mat) { + const auto dims = mat.get().cols(); + if (size_t(dims) != dimensionality) + throw std::runtime_error( + "Error: 'dimensionality' must match column count in data matrix"); + if (DIM > 0 && int(dims) != DIM) + throw std::runtime_error( + "Data set dimensionality does not match the 'DIM' template argument"); + index = + new index_t(static_cast(dims), *this /* adaptor */, + nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); + index->buildIndex(); + } + +public: + /** Deleted copy constructor */ + KDTreeEigenMatrixAdaptor(const self_t &) = delete; + + ~KDTreeEigenMatrixAdaptor() { delete index; } + + const std::reference_wrapper m_data_matrix; + + /** Query for the \a num_closest closest points to a given point (entered as + * query_point[0:dim-1]). Note that this is a short-cut method for + * index->findNeighbors(). The user can also call index->... methods as + * desired. \note nChecks_IGNORED is ignored but kept for compatibility with + * the original FLANN interface. + */ + inline void query(const num_t *query_point, const size_t num_closest, + IndexType *out_indices, num_t *out_distances_sq, + const int /* nChecks_IGNORED */ = 10) const { + nanoflann::KNNResultSet resultSet(num_closest); + resultSet.init(out_indices, out_distances_sq); + index->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); + } + + /** @name Interface expected by KDTreeSingleIndexAdaptor + * @{ */ + + const self_t &derived() const { return *this; } + self_t &derived() { return *this; } + + // Must return the number of data points + inline size_t kdtree_get_point_count() const { + return m_data_matrix.get().rows(); + } + + // Returns the dim'th component of the idx'th point in the class: + inline num_t kdtree_get_pt(const IndexType idx, size_t dim) const { + return m_data_matrix.get().coeff(idx, IndexType(dim)); + } + + // Optional bounding-box computation: return false to default to a standard + // bbox computation loop. + // Return true if the BBOX was already computed by the class and returned in + // "bb" so it can be avoided to redo it again. Look at bb.size() to find out + // the expected dimensionality (e.g. 2 or 3 for point clouds) + template bool kdtree_get_bbox(BBOX & /*bb*/) const { + return false; + } + + /** @} */ + +}; // end of KDTreeEigenMatrixAdaptor + /** @} */ /** @} */ // end of grouping -} // end of NS - +} // namespace nanoflann #endif /* NANOFLANN_HPP_ */