From b024aa2a974a2e69d6fd11ee0818b1af143d3f9a Mon Sep 17 00:00:00 2001
From: Alexandre Boulch <alexandre.boulch@enpc.fr>
Date: Mon, 17 Dec 2018 11:19:13 +0100
Subject: [PATCH] Update fix compilaltion

---
 {include => python}/Normals.h                 |   19 +-
 python/normEstHough.cxx                       |    2 +-
 python/setup.py                               |   11 +-
 third_party_includes/Eigen/Cholesky           |    5 +
 third_party_includes/Eigen/Core               |   27 +-
 third_party_includes/Eigen/Eigenvalues        |    4 +
 third_party_includes/Eigen/LU                 |    4 +
 third_party_includes/Eigen/QR                 |    4 +
 third_party_includes/Eigen/QtAlignedMalloc    |    2 +-
 third_party_includes/Eigen/SVD                |    4 +
 .../Eigen/src/Cholesky/LDLT.h                 |   14 +-
 third_party_includes/Eigen/src/Cholesky/LLT.h |   26 +-
 third_party_includes/Eigen/src/Core/Array.h   |    2 -
 .../Eigen/src/Core/AssignEvaluator.h          |    4 +-
 .../Eigen/src/Core/Assign_MKL.h               |    6 +-
 .../Eigen/src/Core/ConditionEstimator.h       |    2 +-
 .../Eigen/src/Core/CoreEvaluators.h           |   39 +-
 .../Eigen/src/Core/Diagonal.h                 |    5 +-
 third_party_includes/Eigen/src/Core/Dot.h     |   17 +-
 .../Eigen/src/Core/GeneralProduct.h           |   21 +-
 third_party_includes/Eigen/src/Core/Map.h     |   17 +-
 third_party_includes/Eigen/src/Core/MapBase.h |    6 +-
 .../Eigen/src/Core/MathFunctions.h            |   64 +-
 .../Eigen/src/Core/MathFunctionsImpl.h        |   23 +
 third_party_includes/Eigen/src/Core/Matrix.h  |    2 -
 .../Eigen/src/Core/MatrixBase.h               |   35 +-
 .../Eigen/src/Core/PlainObjectBase.h          |    4 +
 third_party_includes/Eigen/src/Core/Product.h |   10 +-
 .../Eigen/src/Core/ProductEvaluators.h        |   31 +-
 third_party_includes/Eigen/src/Core/Redux.h   |    2 +-
 third_party_includes/Eigen/src/Core/Ref.h     |    2 +
 .../Eigen/src/Core/SelfAdjointView.h          |    6 +-
 .../Eigen/src/Core/SelfCwiseBinaryOp.h        |    4 -
 .../Eigen/src/Core/SolveTriangular.h          |    3 +
 .../Eigen/src/Core/StableNorm.h               |    2 +-
 .../Eigen/src/Core/Transpositions.h           |    2 +-
 .../Eigen/src/Core/arch/AVX/Complex.h         |   36 +-
 .../Eigen/src/Core/arch/AVX/PacketMath.h      |   24 +-
 .../src/Core/arch/AVX512/MathFunctions.h      |   33 +-
 .../Eigen/src/Core/arch/AVX512/PacketMath.h   |   12 +-
 .../Eigen/src/Core/arch/AltiVec/Complex.h     |   35 +-
 .../Eigen/src/Core/arch/AltiVec/PacketMath.h  |   44 +-
 .../Eigen/src/Core/arch/CUDA/Half.h           |  253 +-
 .../Eigen/src/Core/arch/CUDA/PacketMathHalf.h |    5 +-
 .../Eigen/src/Core/arch/Default/ConjHelper.h  |   29 +
 .../Eigen/src/Core/arch/NEON/Complex.h        |   12 +-
 .../Eigen/src/Core/arch/NEON/PacketMath.h     |   31 +
 .../Eigen/src/Core/arch/SSE/Complex.h         |   40 +-
 .../Eigen/src/Core/arch/SSE/PacketMath.h      |   24 +-
 .../Eigen/src/Core/arch/SSE/TypeCasting.h     |   28 +-
 .../Eigen/src/Core/arch/ZVector/Complex.h     |    3 +
 .../Eigen/src/Core/functors/BinaryFunctors.h  |   25 +-
 .../Eigen/src/Core/functors/StlFunctors.h     |    4 +
 .../Core/products/GeneralBlockPanelKernel.h   |   15 +-
 .../GeneralMatrixMatrixTriangular_BLAS.h      |   10 +-
 .../Core/products/GeneralMatrixMatrix_BLAS.h  |   19 +-
 .../src/Core/products/GeneralMatrixVector.h   |    8 +-
 .../Core/products/GeneralMatrixVector_BLAS.h  |   19 +-
 .../products/SelfadjointMatrixMatrix_BLAS.h   |   48 +-
 .../products/SelfadjointMatrixVector_BLAS.h   |    9 +-
 .../Core/products/TriangularMatrixMatrix.h    |   35 +-
 .../products/TriangularMatrixMatrix_BLAS.h    |   39 +-
 .../Core/products/TriangularMatrixVector.h    |   22 +-
 .../products/TriangularMatrixVector_BLAS.h    |   46 +-
 .../products/TriangularSolverMatrix_BLAS.h    |   40 +-
 .../src/Core/util/DisableStupidWarnings.h     |   14 +-
 .../Eigen/src/Core/util/MKL_support.h         |   10 +-
 .../Eigen/src/Core/util/Macros.h              |   19 +-
 .../Eigen/src/Core/util/Memory.h              |   24 +-
 .../Eigen/src/Core/util/Meta.h                |   42 +
 .../src/Core/util/ReenableStupidWarnings.h    |    2 +-
 .../Eigen/src/Core/util/StaticAssert.h        |  120 +-
 .../src/Eigenvalues/GeneralizedEigenSolver.h  |    5 +-
 .../src/Eigenvalues/MatrixBaseEigenvalues.h   |    2 -
 .../Eigen/src/Eigenvalues/RealSchur.h         |    4 +-
 .../SelfAdjointEigenSolver_LAPACKE.h          |   23 +-
 .../Eigen/src/Geometry/AngleAxis.h            |    2 +-
 .../Eigen/src/Geometry/Quaternion.h           |   31 +-
 .../BasicPreconditioners.h                    |    2 +-
 .../ConjugateGradient.h                       |    5 +-
 .../Eigen/src/Jacobi/Jacobi.h                 |  253 +-
 .../Eigen/src/LU/InverseImpl.h                |    2 +-
 .../Eigen/src/PaStiXSupport/PaStiXSupport.h   |    8 +-
 third_party_includes/Eigen/src/SVD/BDCSVD.h   |   33 +-
 .../Eigen/src/SVD/JacobiSVD_LAPACKE.h         |    5 +-
 third_party_includes/Eigen/src/SVD/SVDBase.h  |    4 +-
 .../Eigen/src/SparseCore/AmbiVector.h         |    2 +-
 .../ConservativeSparseSparseProduct.h         |   67 +-
 .../Eigen/src/SparseCore/SparseMatrix.h       |    6 +-
 .../src/SparseCore/SparseSelfAdjointView.h    |    8 +-
 .../SparseSparseProductWithPruning.h          |   22 +-
 .../Eigen/src/SparseLU/SparseLU.h             |    2 -
 .../Eigen/src/SparseQR/SparseQR.h             |   26 +-
 .../Eigen/src/SuperLUSupport/SuperLUSupport.h |    4 +-
 third_party_includes/nanoflann.hpp            | 3931 +++++++++--------
 95 files changed, 3258 insertions(+), 2799 deletions(-)
 rename {include => python}/Normals.h (98%)
 create mode 100644 third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h

diff --git a/include/Normals.h b/python/Normals.h
similarity index 98%
rename from include/Normals.h
rename to python/Normals.h
index b2b8fa2..a616ec7 100644
--- a/include/Normals.h
+++ b/python/Normals.h
@@ -30,8 +30,8 @@
 #include <math.h>
 #include <string>
 #include <sstream>
-#include "Eigen/Dense"
-#include "nanoflann.hpp"
+#include <Eigen/Dense>
+#include <nanoflann.hpp>
 
 #ifdef _OPENMP
 #include <omp.h>
@@ -91,7 +91,6 @@ class Eigen_Normal_Estimator{
 
 	typedef nanoflann::KDTreeEigenMatrixAdaptor< Eigen::MatrixX3d > kd_tree; //a row is a point
 
-
 	// constructor
 	Eigen_Normal_Estimator(const Eigen::MatrixX3d& points, Eigen::MatrixX3d& normals):
 		pts(points),nls(normals){
@@ -128,7 +127,7 @@ class Eigen_Normal_Estimator{
 	    }
 		istr.close();
 		pts.resize(points.size(),3);
-		for(int i=0; i<points.size(); i++){
+		for(uint i=0; i<points.size(); i++){
 			pts.row(i) = points[i];
 		}
 	}
@@ -159,7 +158,7 @@ class Eigen_Normal_Estimator{
 
 		//creating vector of random int
 		std::vector<int> vecInt(1000000);
-		for(int i=0; i<vecInt.size(); i++){
+		for(uint i=0; i<vecInt.size(); i++){
 			vecInt[i] = rand();
 		}
 
@@ -200,7 +199,7 @@ class Eigen_Normal_Estimator{
 
 		//kd tree creation
 		//build de kd_tree
-		kd_tree tree(pts);
+		kd_tree tree(3, pts, 10);
 		tree.index->buildIndex();
 
 		//create the density estimation for each point
@@ -218,7 +217,7 @@ class Eigen_Normal_Estimator{
 			//knn for k_density+1 because the point is itself include in the search tree
 			tree.index->knnSearch(&pt_query[0], k_density+1, &pointIdxSearch[0], &pointSquaredDistance[0]);
 			double d =0;
-			for(int i=0; i<pointSquaredDistance.size(); i++){
+			for(uint i=0; i<pointSquaredDistance.size(); i++){
 				d+=std::sqrt(pointSquaredDistance[i]);
 			}
 			d /= pointSquaredDistance.size()-1;
@@ -367,7 +366,7 @@ class Eigen_Normal_Estimator{
 	unsigned int dichotomic_search_nearest(const std::vector<double> elems, double d){
 		unsigned int i1 = 0;
 		unsigned int i2 = elems.size()-1;
-		unsigned int i3;
+		unsigned int i3 = (i1+i2)/2;
 		while(i2 > i1){
 			i3 = (i1+i2)/2;
 			if(elems[i3] == d){break;}
@@ -391,13 +390,13 @@ class Eigen_Normal_Estimator{
 	{
 		std::vector<double> dists;
 		double sum=0;
-		for(int i=0; i<pointIdxSearch.size(); i++){
+		for(uint i=0; i<pointIdxSearch.size(); i++){
 			sum+=densities[pointIdxSearch[i]];
 			dists.push_back(sum);
 		}
 
 		unsigned int S = vecRandInt.size();
-		unsigned int number_of_points = pointIdxSearch.size();
+		// unsigned int number_of_points = pointIdxSearch.size();
 		triplets.resize(triplet_number,3);
 		unsigned int pos=vecRandInt[0]%S;;
 		for(unsigned int i=0; i<triplet_number; i++){
diff --git a/python/normEstHough.cxx b/python/normEstHough.cxx
index 60136bf..b715d17 100644
--- a/python/normEstHough.cxx
+++ b/python/normEstHough.cxx
@@ -69,7 +69,7 @@ void NormEstHough::get_normals(double* array, int m, int n) {
         }
     return ;
 }
-
+ 
 void NormEstHough::set_points(double* array, int m, int n){
     // resize the point cloud
     pts.resize(m,3);
diff --git a/python/setup.py b/python/setup.py
index 2ef0ea0..eb6d154 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -4,11 +4,12 @@
 
 
 ext_modules = [Extension(
-       "NormalEstimatorHough",
-       sources=["NormalEstimatorHough.pyx", "normEstHough.cxx"],
-       include_dirs=["third_party_includes/", "../include"],
-       language="c++",             # generate C++ code
-       extra_compile_args = ["-fopenmp", "-std=c++11"]
+        "NormalEstimatorHough",
+        sources=["NormalEstimatorHough.pyx", "normEstHough.cxx"],
+        include_dirs=["../third_party_includes/"],
+        language="c++",             # generate C++ code
+        extra_compile_args = ["-fopenmp", "-std=c++11"],
+        extra_link_args=['-lgomp']
   )]
 
 setup(
diff --git a/third_party_includes/Eigen/Cholesky b/third_party_includes/Eigen/Cholesky
index 369d1f5..1332b54 100644
--- a/third_party_includes/Eigen/Cholesky
+++ b/third_party_includes/Eigen/Cholesky
@@ -9,6 +9,7 @@
 #define EIGEN_CHOLESKY_MODULE_H
 
 #include "Core"
+#include "Jacobi"
 
 #include "src/Core/util/DisableStupidWarnings.h"
 
@@ -31,7 +32,11 @@
 #include "src/Cholesky/LLT.h"
 #include "src/Cholesky/LDLT.h"
 #ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
 #include "src/misc/lapacke.h"
+#endif
 #include "src/Cholesky/LLT_LAPACKE.h"
 #endif
 
diff --git a/third_party_includes/Eigen/Core b/third_party_includes/Eigen/Core
index 0f7fa63..b923b8c 100644
--- a/third_party_includes/Eigen/Core
+++ b/third_party_includes/Eigen/Core
@@ -14,6 +14,22 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"
 
+#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
+  #define EIGEN_CUDACC __CUDACC__
+#endif
+
+#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
+  #define EIGEN_CUDA_ARCH __CUDA_ARCH__
+#endif
+
+#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
+#define EIGEN_CUDACC_VER  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
+#elif defined(__CUDACC_VER__)
+#define EIGEN_CUDACC_VER __CUDACC_VER__
+#else
+#define EIGEN_CUDACC_VER 0
+#endif
+
 // Handle NVCC/CUDA/SYCL
 #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
   // Do not try asserts on CUDA and SYCL!
@@ -37,9 +53,9 @@
     #endif
 
     #define EIGEN_DEVICE_FUNC __host__ __device__
-    // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
+    // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
     // works properly on the device side
-    #include <math_functions.hpp>
+    #include <cuda_runtime.h>
   #else
     #define EIGEN_DEVICE_FUNC
   #endif
@@ -155,6 +171,9 @@
       #ifdef __AVX512DQ__
         #define EIGEN_VECTORIZE_AVX512DQ
       #endif
+      #ifdef __AVX512ER__
+        #define EIGEN_VECTORIZE_AVX512ER
+      #endif
     #endif
 
     // include files
@@ -229,7 +248,7 @@
 #if defined __CUDACC__
   #define EIGEN_VECTORIZE_CUDA
   #include <vector_types.h>
-  #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+  #if EIGEN_CUDACC_VER >= 70500
     #define EIGEN_HAS_CUDA_FP16
   #endif
 #endif
@@ -352,6 +371,7 @@ using std::ptrdiff_t;
 #include "src/Core/MathFunctions.h"
 #include "src/Core/GenericPacketMath.h"
 #include "src/Core/MathFunctionsImpl.h"
+#include "src/Core/arch/Default/ConjHelper.h"
 
 #if defined EIGEN_VECTORIZE_AVX512
   #include "src/Core/arch/SSE/PacketMath.h"
@@ -367,6 +387,7 @@ using std::ptrdiff_t;
   #include "src/Core/arch/AVX/MathFunctions.h"
   #include "src/Core/arch/AVX/Complex.h"
   #include "src/Core/arch/AVX/TypeCasting.h"
+  #include "src/Core/arch/SSE/TypeCasting.h"
 #elif defined EIGEN_VECTORIZE_SSE
   #include "src/Core/arch/SSE/PacketMath.h"
   #include "src/Core/arch/SSE/MathFunctions.h"
diff --git a/third_party_includes/Eigen/Eigenvalues b/third_party_includes/Eigen/Eigenvalues
index 009e529..f3f661b 100644
--- a/third_party_includes/Eigen/Eigenvalues
+++ b/third_party_includes/Eigen/Eigenvalues
@@ -45,7 +45,11 @@
 #include "src/Eigenvalues/GeneralizedEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
 #ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
 #include "src/misc/lapacke.h"
+#endif
 #include "src/Eigenvalues/RealSchur_LAPACKE.h"
 #include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
 #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
diff --git a/third_party_includes/Eigen/LU b/third_party_includes/Eigen/LU
index 6f6c556..6418a86 100644
--- a/third_party_includes/Eigen/LU
+++ b/third_party_includes/Eigen/LU
@@ -28,7 +28,11 @@
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
 #ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
 #include "src/misc/lapacke.h"
+#endif
 #include "src/LU/PartialPivLU_LAPACKE.h"
 #endif
 #include "src/LU/Determinant.h"
diff --git a/third_party_includes/Eigen/QR b/third_party_includes/Eigen/QR
index 80838e3..c7e9144 100644
--- a/third_party_includes/Eigen/QR
+++ b/third_party_includes/Eigen/QR
@@ -36,7 +36,11 @@
 #include "src/QR/ColPivHouseholderQR.h"
 #include "src/QR/CompleteOrthogonalDecomposition.h"
 #ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
 #include "src/misc/lapacke.h"
+#endif
 #include "src/QR/HouseholderQR_LAPACKE.h"
 #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
 #endif
diff --git a/third_party_includes/Eigen/QtAlignedMalloc b/third_party_includes/Eigen/QtAlignedMalloc
index c6571f1..4f07df0 100644
--- a/third_party_includes/Eigen/QtAlignedMalloc
+++ b/third_party_includes/Eigen/QtAlignedMalloc
@@ -27,7 +27,7 @@ void qFree(void *ptr)
 void *qRealloc(void *ptr, std::size_t size)
 {
   void* newPtr = Eigen::internal::aligned_malloc(size);
-  memcpy(newPtr, ptr, size);
+  std::memcpy(newPtr, ptr, size);
   Eigen::internal::aligned_free(ptr);
   return newPtr;
 }
diff --git a/third_party_includes/Eigen/SVD b/third_party_includes/Eigen/SVD
index 86143c2..5d0e75f 100644
--- a/third_party_includes/Eigen/SVD
+++ b/third_party_includes/Eigen/SVD
@@ -37,7 +37,11 @@
 #include "src/SVD/JacobiSVD.h"
 #include "src/SVD/BDCSVD.h"
 #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
 #include "src/misc/lapacke.h"
+#endif
 #include "src/SVD/JacobiSVD_LAPACKE.h"
 #endif
 
diff --git a/third_party_includes/Eigen/src/Cholesky/LDLT.h b/third_party_includes/Eigen/src/Cholesky/LDLT.h
index fcee7b2..15ccf24 100644
--- a/third_party_includes/Eigen/src/Cholesky/LDLT.h
+++ b/third_party_includes/Eigen/src/Cholesky/LDLT.h
@@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     /** \brief Reports whether previous computation was successful.
       *
       * \returns \c Success if computation was succesful,
-      *          \c NumericalIssue if the matrix.appears to be negative.
+      *          \c NumericalIssue if the factorization failed because of a zero pivot.
       */
     ComputationInfo info() const
     {
@@ -305,7 +305,8 @@ template<> struct ldlt_inplace<Lower>
     if (size <= 1)
     {
       transpositions.setIdentity();
-      if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
+      if(size==0) sign = ZeroSign;
+      else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
       else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
       else sign = ZeroSign;
       return true;
@@ -376,6 +377,8 @@ template<> struct ldlt_inplace<Lower>
 
       if((rs>0) && pivot_is_valid)
         A21 /= realAkk;
+      else if(rs>0)
+        ret = ret && (A21.array()==Scalar(0)).all();
 
       if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
       else if(!pivot_is_valid) found_zero_pivot = true;
@@ -568,13 +571,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
   // more precisely, use pseudo-inverse of D (see bug 241)
   using std::abs;
   const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
-  // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon
-  // as motivated by LAPACK's xGELSS:
+  // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
+  // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
   // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
   // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
   // diagonal element is not well justified and leads to numerical issues in some cases.
   // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
-  RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest();
+  // Using numeric_limits::min() gives us more robustness to denormals.
+  RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
 
   for (Index i = 0; i < vecD.size(); ++i)
   {
diff --git a/third_party_includes/Eigen/src/Cholesky/LLT.h b/third_party_includes/Eigen/src/Cholesky/LLT.h
index 87ca8d4..e1624d2 100644
--- a/third_party_includes/Eigen/src/Cholesky/LLT.h
+++ b/third_party_includes/Eigen/src/Cholesky/LLT.h
@@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
   *
   * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
   * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
-  *             The other triangular part won't be read.
+  *               The other triangular part won't be read.
   *
   * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
   * matrix A such that A = LL^* = U^*U, where L is lower triangular.
@@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
   * Example: \include LLT_example.cpp
   * Output: \verbinclude LLT_example.out
   *
+  * \b Performance: for best performance, it is recommended to use a column-major storage format
+  * with the Lower triangular part (the default), or, equivalently, a row-major storage format
+  * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
+  * step, and rank-updates can be up to 3 times slower.
+  *
   * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
   *
+  * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
+  * Therefore, the strict lower part does not have to store correct values.
+  *
   * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
   */
- /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
-  * Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
-  * the strict lower part does not have to store correct values.
-  */
 template<typename _MatrixType, int _UpLo> class LLT
 {
   public:
@@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
     }
 
     template<typename Derived>
-    void solveInPlace(MatrixBase<Derived> &bAndX) const;
+    void solveInPlace(const MatrixBase<Derived> &bAndX) const;
 
     template<typename InputType>
     LLT& compute(const EigenBase<InputType>& matrix);
@@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
     /** \brief Reports whether previous computation was successful.
       *
       * \returns \c Success if computation was succesful,
-      *          \c NumericalIssue if the matrix.appears to be negative.
+      *          \c NumericalIssue if the matrix.appears not to be positive definite.
       */
     ComputationInfo info() const
     {
@@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
   eigen_assert(a.rows()==a.cols());
   const Index size = a.rows();
   m_matrix.resize(size, size);
-  m_matrix = a.derived();
+  if (!internal::is_same_dense(m_matrix, a.derived()))
+    m_matrix = a.derived();
 
   // Compute matrix L1 norm = max abs column sum.
   m_l1_norm = RealScalar(0);
@@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
   *
   * This version avoids a copy when the right hand side matrix b is not needed anymore.
   *
+  * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
+  * This function will const_cast it, so constness isn't honored here.
+  *
   * \sa LLT::solve(), MatrixBase::llt()
   */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
+void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
 {
   eigen_assert(m_isInitialized && "LLT is not initialized.");
   eigen_assert(m_matrix.rows()==bAndX.rows());
diff --git a/third_party_includes/Eigen/src/Core/Array.h b/third_party_includes/Eigen/src/Core/Array.h
index e10020d..16770fc 100644
--- a/third_party_includes/Eigen/src/Core/Array.h
+++ b/third_party_includes/Eigen/src/Core/Array.h
@@ -153,8 +153,6 @@ class Array
       : Base(std::move(other))
     {
       Base::_check_template_params();
-      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
-        Base::_set_noalias(other);
     }
     EIGEN_DEVICE_FUNC
     Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
diff --git a/third_party_includes/Eigen/src/Core/AssignEvaluator.h b/third_party_includes/Eigen/src/Core/AssignEvaluator.h
index b0ec7b7..dbe435d 100644
--- a/third_party_includes/Eigen/src/Core/AssignEvaluator.h
+++ b/third_party_includes/Eigen/src/Core/AssignEvaluator.h
@@ -39,7 +39,7 @@ struct copy_using_evaluator_traits
   enum {
     DstAlignment = DstEvaluator::Alignment,
     SrcAlignment = SrcEvaluator::Alignment,
-    DstHasDirectAccess = DstFlags & DirectAccessBit,
+    DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
     JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
   };
 
@@ -83,7 +83,7 @@ struct copy_using_evaluator_traits
                        && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
                        && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
     MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
-    MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+    MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
                        && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
       /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
          so it's only good for large enough sizes. */
diff --git a/third_party_includes/Eigen/src/Core/Assign_MKL.h b/third_party_includes/Eigen/src/Core/Assign_MKL.h
index 6c2ab92..6866095 100755
--- a/third_party_includes/Eigen/src/Core/Assign_MKL.h
+++ b/third_party_includes/Eigen/src/Core/Assign_MKL.h
@@ -84,7 +84,8 @@ class vml_assign_traits
   struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \
                    Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \
     typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
-    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                   \
+    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                       \
+      resize_if_allowed(dst, src, func);                                                                                        \
       eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \
       if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \
         VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \
@@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
                    Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \
     typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \
                     const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \
-    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                 \
+    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                     \
+      resize_if_allowed(dst, src, func);                                                                                      \
       eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \
       VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other);                                       \
       if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \
diff --git a/third_party_includes/Eigen/src/Core/ConditionEstimator.h b/third_party_includes/Eigen/src/Core/ConditionEstimator.h
index aa7efdc..51a2e5f 100644
--- a/third_party_includes/Eigen/src/Core/ConditionEstimator.h
+++ b/third_party_includes/Eigen/src/Core/ConditionEstimator.h
@@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
 {
   typedef typename Decomposition::RealScalar RealScalar;
   eigen_assert(dec.rows() == dec.cols());
-  if (dec.rows() == 0)              return RealScalar(1);
+  if (dec.rows() == 0)              return NumTraits<RealScalar>::infinity();
   if (matrix_norm == RealScalar(0)) return RealScalar(0);
   if (dec.rows() == 1)              return RealScalar(1);
   const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
diff --git a/third_party_includes/Eigen/src/Core/CoreEvaluators.h b/third_party_includes/Eigen/src/Core/CoreEvaluators.h
index f7c1eff..910889e 100644
--- a/third_party_includes/Eigen/src/Core/CoreEvaluators.h
+++ b/third_party_includes/Eigen/src/Core/CoreEvaluators.h
@@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
     OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
                              ? int(outer_stride_at_compile_time<ArgType>::ret)
                              : int(inner_stride_at_compile_time<ArgType>::ret),
-    MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
+    MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
     
     FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,    
     FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
     Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
     
     PacketAlignment = unpacket_traits<PacketScalar>::alignment,
-    Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+    Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
+                             && (OuterStrideAtCompileTime!=0)
+                             && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
     Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
   };
   typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
@@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
   EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
     : m_argImpl(block.nestedExpression()), 
       m_startRow(block.startRow()), 
-      m_startCol(block.startCol()) 
+      m_startCol(block.startCol()),
+      m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
   { }
  
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
   enum {
-    RowsAtCompileTime = XprType::RowsAtCompileTime
+    RowsAtCompileTime = XprType::RowsAtCompileTime,
+    ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
   };
  
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   { 
-    return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+    if (ForwardLinearAccess)
+      return m_argImpl.coeff(m_linear_offset.value() + index); 
+    else
+      return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   { 
-    return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+    if (ForwardLinearAccess)
+      return m_argImpl.coeffRef(m_linear_offset.value() + index); 
+    else
+      return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
   }
  
   template<int LoadMode, typename PacketType>
@@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const 
   { 
-    return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
-                                       RowsAtCompileTime == 1 ? index : 0);
+    if (ForwardLinearAccess)
+      return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
+    else
+      return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+                                         RowsAtCompileTime == 1 ? index : 0);
   }
   
   template<int StoreMode, typename PacketType>
@@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
   EIGEN_STRONG_INLINE
   void writePacket(Index index, const PacketType& x) 
   {
-    return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
-                                             RowsAtCompileTime == 1 ? index : 0,
-                                             x);
+    if (ForwardLinearAccess)
+      return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
+    else
+      return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+                                              RowsAtCompileTime == 1 ? index : 0,
+                                              x);
   }
  
 protected:
   evaluator<ArgType> m_argImpl;
   const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
   const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+  const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
 };
 
 // TODO: This evaluator does not actually use the child evaluator; 
diff --git a/third_party_includes/Eigen/src/Core/Diagonal.h b/third_party_includes/Eigen/src/Core/Diagonal.h
index 49e7112..afcaf35 100644
--- a/third_party_includes/Eigen/src/Core/Diagonal.h
+++ b/third_party_includes/Eigen/src/Core/Diagonal.h
@@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
     EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
 
     EIGEN_DEVICE_FUNC
-    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
+    {
+      eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
+    }
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
 
diff --git a/third_party_includes/Eigen/src/Core/Dot.h b/third_party_includes/Eigen/src/Core/Dot.h
index 06ef18b..1fe7a84 100644
--- a/third_party_includes/Eigen/src/Core/Dot.h
+++ b/third_party_includes/Eigen/src/Core/Dot.h
@@ -31,7 +31,8 @@ struct dot_nocheck
   typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
   typedef typename conj_prod::result_type ResScalar;
   EIGEN_DEVICE_FUNC
-  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+  EIGEN_STRONG_INLINE
+  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
   {
     return a.template binaryExpr<conj_prod>(b).sum();
   }
@@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
   typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
   typedef typename conj_prod::result_type ResScalar;
   EIGEN_DEVICE_FUNC
-  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+  EIGEN_STRONG_INLINE
+  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
   {
     return a.transpose().template binaryExpr<conj_prod>(b).sum();
   }
@@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE
 typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
@@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
   * \sa lpNorm(), dot(), squaredNorm()
   */
 template<typename Derived>
-inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
   return numext::sqrt(squaredNorm());
 }
@@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
   * \sa norm(), normalize()
   */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainObject
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
   typedef typename internal::nested_eval<Derived,2>::type _Nested;
@@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
   * \sa norm(), normalized()
   */
 template<typename Derived>
-inline void MatrixBase<Derived>::normalize()
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
 {
   RealScalar z = squaredNorm();
   // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
   * \sa stableNorm(), stableNormalize(), normalized()
   */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainObject
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::stableNormalized() const
 {
   typedef typename internal::nested_eval<Derived,3>::type _Nested;
@@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
   * \sa stableNorm(), stableNormalized(), normalize()
   */
 template<typename Derived>
-inline void MatrixBase<Derived>::stableNormalize()
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
 {
   RealScalar w = cwiseAbs().maxCoeff();
   RealScalar z = (derived()/w).squaredNorm();
diff --git a/third_party_includes/Eigen/src/Core/GeneralProduct.h b/third_party_includes/Eigen/src/Core/GeneralProduct.h
index 0f16cd8..6f0cc80 100644
--- a/third_party_includes/Eigen/src/Core/GeneralProduct.h
+++ b/third_party_includes/Eigen/src/Core/GeneralProduct.h
@@ -24,12 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
 
 template<int Size, int MaxSize> struct product_size_category
 {
-  enum { is_large = MaxSize == Dynamic ||
-                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
-                    (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
-         value = is_large  ? Large
-               : Size == 1 ? 1
-                           : Small
+  enum {
+    #ifndef EIGEN_CUDA_ARCH
+    is_large = MaxSize == Dynamic ||
+               Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+               (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+    #else
+    is_large = 0,
+    #endif
+    value = is_large  ? Large
+          : Size == 1 ? 1
+                      : Small
   };
 };
 
@@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
   *
   * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
   */
-#ifndef __CUDACC__
-
 template<typename Derived>
 template<typename OtherDerived>
 inline const Product<Derived, OtherDerived>
@@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
   return Product<Derived, OtherDerived>(derived(), other.derived());
 }
 
-#endif // __CUDACC__
-
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
   *
   * The returned product will behave like any other expressions: the coefficients of the product will be
diff --git a/third_party_includes/Eigen/src/Core/Map.h b/third_party_includes/Eigen/src/Core/Map.h
index 06d1967..548bf9a 100644
--- a/third_party_includes/Eigen/src/Core/Map.h
+++ b/third_party_includes/Eigen/src/Core/Map.h
@@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
 {
   typedef traits<PlainObjectType> TraitsBase;
   enum {
+    PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
+                             ? PlainObjectType::ColsAtCompileTime
+                             : PlainObjectType::RowsAtCompileTime,
+
     InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                              ? int(PlainObjectType::InnerStrideAtCompileTime)
                              : int(StrideType::InnerStrideAtCompileTime),
     OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
-                             ? int(PlainObjectType::OuterStrideAtCompileTime)
+                             ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
+                                ? Dynamic
+                                : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
                              : int(StrideType::OuterStrideAtCompileTime),
     Alignment = int(MapOptions)&int(AlignedMask),
     Flags0 = TraitsBase::Flags & (~NestByRefBit),
@@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
     EIGEN_DEVICE_FUNC
     inline Index outerStride() const
     {
-      return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
-           : IsVectorAtCompileTime ? this->size()
-           : int(Flags)&RowMajorBit ? this->cols()
-           : this->rows();
+      return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
+           : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
+           : IsVectorAtCompileTime ? (this->size() * innerStride())
+           : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
+           : (this->rows() * innerStride());
     }
 
     /** Constructor in the fixed-size case.
diff --git a/third_party_includes/Eigen/src/Core/MapBase.h b/third_party_includes/Eigen/src/Core/MapBase.h
index 020f939..668922f 100644
--- a/third_party_includes/Eigen/src/Core/MapBase.h
+++ b/third_party_includes/Eigen/src/Core/MapBase.h
@@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     enum {
       RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
       SizeAtCompileTime = Base::SizeAtCompileTime
     };
 
@@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
     {
 #if EIGEN_MAX_ALIGN_BYTES>0
+      // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
+      const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
+      EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
       eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
-                    || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
+                    || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
 #endif
     }
 
diff --git a/third_party_includes/Eigen/src/Core/MathFunctions.h b/third_party_includes/Eigen/src/Core/MathFunctions.h
index a648aa0..b249ce0 100644
--- a/third_party_includes/Eigen/src/Core/MathFunctions.h
+++ b/third_party_includes/Eigen/src/Core/MathFunctions.h
@@ -348,31 +348,7 @@ struct norm1_retval
 * Implementation of hypot                                                *
 ****************************************************************************/
 
-template<typename Scalar>
-struct hypot_impl
-{
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  static inline RealScalar run(const Scalar& x, const Scalar& y)
-  {
-    EIGEN_USING_STD_MATH(abs);
-    EIGEN_USING_STD_MATH(sqrt);
-    RealScalar _x = abs(x);
-    RealScalar _y = abs(y);
-    Scalar p, qp;
-    if(_x>_y)
-    {
-      p = _x;
-      qp = _y / p;
-    }
-    else
-    {
-      p = _y;
-      qp = _x / p;
-    }
-    if(p==RealScalar(0)) return RealScalar(0);
-    return p * sqrt(RealScalar(1) + qp*qp);
-  }
-};
+template<typename Scalar> struct hypot_impl;
 
 template<typename Scalar>
 struct hypot_retval
@@ -495,7 +471,7 @@ namespace std_fallback {
     typedef typename NumTraits<Scalar>::Real RealScalar;
     EIGEN_USING_STD_MATH(log);
     Scalar x1p = RealScalar(1) + x;
-    return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+    return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
   }
 }
 
@@ -640,21 +616,28 @@ template<typename Scalar>
 struct random_default_impl<Scalar, false, true>
 {
   static inline Scalar run(const Scalar& x, const Scalar& y)
-  { 
-    typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
-    if(y<x)
+  {
+    if (y <= x)
       return x;
-    // the following difference might overflow on a 32 bits system,
-    // but since y>=x the result converted to an unsigned long is still correct.
-    std::size_t range = ScalarX(y)-ScalarX(x);
-    std::size_t offset = 0;
-    // rejection sampling
-    std::size_t divisor = 1;
-    std::size_t multiplier = 1;
-    if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
-    else               multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
+    // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
+    typedef typename make_unsigned<Scalar>::type ScalarU;
+    // ScalarX is the widest of ScalarU and unsigned int.
+    // We'll deal only with ScalarX and unsigned int below thus avoiding signed
+    // types and arithmetic and signed overflows (which are undefined behavior).
+    typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
+    // The following difference doesn't overflow, provided our integer types are two's
+    // complement and have the same number of padding bits in signed and unsigned variants.
+    // This is the case in most modern implementations of C++.
+    ScalarX range = ScalarX(y) - ScalarX(x);
+    ScalarX offset = 0;
+    ScalarX divisor = 1;
+    ScalarX multiplier = 1;
+    const unsigned rand_max = RAND_MAX;
+    if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
+    else                   multiplier = 1 + range / (rand_max + 1);
+    // Rejection sampling.
     do {
-      offset = (std::size_t(std::rand()) * multiplier) / divisor;
+      offset = (unsigned(std::rand()) * multiplier) / divisor;
     } while (offset > range);
     return Scalar(ScalarX(x) + offset);
   }
@@ -1030,7 +1013,8 @@ inline int log2(int x)
 
 /** \returns the square root of \a x.
   *
-  * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+  * It is essentially equivalent to
+  * \code using std::sqrt; return sqrt(x); \endcode
   * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
   * specializations when SSE is enabled.
   *
diff --git a/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h b/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h
index 3c9ef22..9c1ceb0 100644
--- a/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h
+++ b/third_party_includes/Eigen/src/Core/MathFunctionsImpl.h
@@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
   return pdiv(p, q);
 }
 
+template<typename RealScalar>
+EIGEN_STRONG_INLINE
+RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
+{
+  EIGEN_USING_STD_MATH(sqrt);
+  RealScalar p, qp;
+  p = numext::maxi(x,y);
+  if(p==RealScalar(0)) return RealScalar(0);
+  qp = numext::mini(y,x) / p;    
+  return p * sqrt(RealScalar(1) + qp*qp);
+}
+
+template<typename Scalar>
+struct hypot_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static inline RealScalar run(const Scalar& x, const Scalar& y)
+  {
+    EIGEN_USING_STD_MATH(abs);
+    return positive_real_hypot<RealScalar>(abs(x), abs(y));
+  }
+};
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/third_party_includes/Eigen/src/Core/Matrix.h b/third_party_includes/Eigen/src/Core/Matrix.h
index 90c336d..7f4a7af 100644
--- a/third_party_includes/Eigen/src/Core/Matrix.h
+++ b/third_party_includes/Eigen/src/Core/Matrix.h
@@ -274,8 +274,6 @@ class Matrix
       : Base(std::move(other))
     {
       Base::_check_template_params();
-      if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
-        Base::_set_noalias(other);
     }
     EIGEN_DEVICE_FUNC
     Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
diff --git a/third_party_includes/Eigen/src/Core/MatrixBase.h b/third_party_includes/Eigen/src/Core/MatrixBase.h
index ce41218..e6c3590 100644
--- a/third_party_includes/Eigen/src/Core/MatrixBase.h
+++ b/third_party_includes/Eigen/src/Core/MatrixBase.h
@@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator-=(const MatrixBase<OtherDerived>& other);
 
-#ifdef __CUDACC__
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    const Product<Derived,OtherDerived,LazyProduct>
-    operator*(const MatrixBase<OtherDerived> &other) const
-    { return this->lazyProduct(other); }
-#else
-
-    template<typename OtherDerived>
     const Product<Derived,OtherDerived>
     operator*(const MatrixBase<OtherDerived> &other) const;
 
-#endif
-
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     const Product<Derived,OtherDerived,LazyProduct>
@@ -453,16 +444,24 @@ template<typename Derived> class MatrixBase
 ///////// MatrixFunctions module /////////
 
     typedef typename internal::stem_function<Scalar>::type StemFunction;
-    const MatrixExponentialReturnValue<Derived> exp() const;
+#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \
+    /** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
+    const ReturnType<Derived> Name() const;
+#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \
+    /** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
+    const ReturnType<Derived> Name(Argument) const;
+
+    EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)
+    /** \brief Helper function for the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>.*/
     const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
-    const MatrixFunctionReturnValue<Derived> cosh() const;
-    const MatrixFunctionReturnValue<Derived> sinh() const;
-    const MatrixFunctionReturnValue<Derived> cos() const;
-    const MatrixFunctionReturnValue<Derived> sin() const;
-    const MatrixSquareRootReturnValue<Derived> sqrt() const;
-    const MatrixLogarithmReturnValue<Derived> log() const;
-    const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
-    const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
+    EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
+    EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
+    EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue,        pow, power to \c p, const RealScalar& p)
+    EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
 
   protected:
     EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
diff --git a/third_party_includes/Eigen/src/Core/PlainObjectBase.h b/third_party_includes/Eigen/src/Core/PlainObjectBase.h
index 77f4f60..1dc7e22 100644
--- a/third_party_includes/Eigen/src/Core/PlainObjectBase.h
+++ b/third_party_includes/Eigen/src/Core/PlainObjectBase.h
@@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
       * \a data pointers.
       *
+      * Here is an example using strides:
+      * \include Matrix_Map_stride.cpp
+      * Output: \verbinclude Matrix_Map_stride.out
+      *
       * \see class Map
       */
     //@{
diff --git a/third_party_includes/Eigen/src/Core/Product.h b/third_party_includes/Eigen/src/Core/Product.h
index ae0c94b..676c480 100644
--- a/third_party_includes/Eigen/src/Core/Product.h
+++ b/third_party_includes/Eigen/src/Core/Product.h
@@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
     }
 
-    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
-    EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
 
     EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
     EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
@@ -127,7 +127,7 @@ class dense_product_base<Lhs, Rhs, Option, InnerProduct>
   using Base::derived;
   typedef typename Base::Scalar Scalar;
   
-  operator const Scalar() const
+  EIGEN_STRONG_INLINE operator const Scalar() const
   {
     return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
   }
@@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
     
   public:
   
-    EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
     {
       EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
       eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
       return internal::evaluator<Derived>(derived()).coeff(row,col);
     }
 
-    EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
     {
       EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
       eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
diff --git a/third_party_includes/Eigen/src/Core/ProductEvaluators.h b/third_party_includes/Eigen/src/Core/ProductEvaluators.h
index c42725d..9b99bd7 100644
--- a/third_party_includes/Eigen/src/Core/ProductEvaluators.h
+++ b/third_party_includes/Eigen/src/Core/ProductEvaluators.h
@@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
   typedef Product<Lhs, Rhs, Options> XprType;
   typedef product_evaluator<XprType> Base;
   
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
  
 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
                                const Product<Lhs, Rhs, DefaultProduct> > XprType;
   typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
 
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
     : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
   {}
 };
@@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
   typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
   typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
   
-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
     : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
         Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
         xpr.index() ))
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
 {
   template<typename Dst>
-  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
   }
   
   template<typename Dst>
-  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
   }
   
   template<typename Dst>
-  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
 };
 
@@ -312,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
   };
   
   template<typename Dst>
-  static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
   }
   
   template<typename Dst>
-  static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
   }
   
   template<typename Dst>
-  static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
     internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
   }
   
   template<typename Dst>
-  static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
   {
     internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
   }
@@ -785,7 +785,11 @@ struct diagonal_product_evaluator_base
     _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
     _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
     Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
-    Alignment = evaluator<MatrixType>::Alignment
+    Alignment = evaluator<MatrixType>::Alignment,
+
+    AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)
+                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
+                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
   };
   
   diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@@ -797,7 +801,10 @@ struct diagonal_product_evaluator_base
   
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
   {
-    return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
+    if(AsScalarProduct)
+      return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
+    else
+      return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
   }
   
 protected:
diff --git a/third_party_includes/Eigen/src/Core/Redux.h b/third_party_includes/Eigen/src/Core/Redux.h
index b6e8f88..760e9f8 100644
--- a/third_party_includes/Eigen/src/Core/Redux.h
+++ b/third_party_includes/Eigen/src/Core/Redux.h
@@ -407,7 +407,7 @@ class redux_evaluator
   */
 template<typename Derived>
 template<typename Func>
-typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::redux(const Func& func) const
 {
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
diff --git a/third_party_includes/Eigen/src/Core/Ref.h b/third_party_includes/Eigen/src/Core/Ref.h
index bdf24f5..9c6e3c5 100644
--- a/third_party_includes/Eigen/src/Core/Ref.h
+++ b/third_party_includes/Eigen/src/Core/Ref.h
@@ -95,6 +95,8 @@ template<typename Derived> class RefBase
   template<typename Expression>
   EIGEN_DEVICE_FUNC void construct(Expression& expr)
   {
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
+
     if(PlainObjectType::RowsAtCompileTime==1)
     {
       eigen_assert(expr.rows()==1 || expr.cols()==1);
diff --git a/third_party_includes/Eigen/src/Core/SelfAdjointView.h b/third_party_includes/Eigen/src/Core/SelfAdjointView.h
index 504c98f..b2e51f3 100644
--- a/third_party_includes/Eigen/src/Core/SelfAdjointView.h
+++ b/third_party_includes/Eigen/src/Core/SelfAdjointView.h
@@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
 
     EIGEN_DEVICE_FUNC
     explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
-    {}
+    {
+      EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
+    }
 
     EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_matrix.rows(); }
@@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
                                    TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
     }
 
-    typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
+    typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
     /** \sa MatrixBase::conjugate() const */
     EIGEN_DEVICE_FUNC
     inline const ConjugateReturnType conjugate() const
diff --git a/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h b/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h
index 50099df..7c89c2e 100644
--- a/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/third_party_includes/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -17,7 +17,6 @@ namespace Eigen {
 template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
-  typedef typename Derived::PlainObject PlainObject;
   internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
   return derived();
 }
@@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(co
 template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
 {
-  typedef typename Derived::PlainObject PlainObject;
   internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
   return derived();
 }
@@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(co
 template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
 {
-  typedef typename Derived::PlainObject PlainObject;
   internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
   return derived();
 }
@@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(co
 template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
-  typedef typename Derived::PlainObject PlainObject;
   internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
   return derived();
 }
diff --git a/third_party_includes/Eigen/src/Core/SolveTriangular.h b/third_party_includes/Eigen/src/Core/SolveTriangular.h
index 049890b..4652e2e 100644
--- a/third_party_includes/Eigen/src/Core/SolveTriangular.h
+++ b/third_party_includes/Eigen/src/Core/SolveTriangular.h
@@ -169,6 +169,9 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
   OtherDerived& other = _other.const_cast_derived();
   eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
   eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
+  // If solving for a 0x0 matrix, nothing to do, simply return.
+  if (derived().cols() == 0)
+    return;
 
   enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit)  && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
   typedef typename internal::conditional<copy,
diff --git a/third_party_includes/Eigen/src/Core/StableNorm.h b/third_party_includes/Eigen/src/Core/StableNorm.h
index be04ed4..88c8d98 100644
--- a/third_party_includes/Eigen/src/Core/StableNorm.h
+++ b/third_party_includes/Eigen/src/Core/StableNorm.h
@@ -165,7 +165,7 @@ MatrixBase<Derived>::stableNorm() const
   
   typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
   typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
-  DerivedCopy copy(derived());
+  const DerivedCopy copy(derived());
   
   enum {
     CanAlign = (   (int(DerivedCopyClean::Flags)&DirectAccessBit)
diff --git a/third_party_includes/Eigen/src/Core/Transpositions.h b/third_party_includes/Eigen/src/Core/Transpositions.h
index 19c17bb..86da5af 100644
--- a/third_party_includes/Eigen/src/Core/Transpositions.h
+++ b/third_party_includes/Eigen/src/Core/Transpositions.h
@@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
     const Product<OtherDerived, Transpose, AliasFreeProduct>
     operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
     {
-      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
+      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
     }
 
     /** \returns the \a matrix with the inverse transpositions applied to the rows.
diff --git a/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h b/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h
index 99439c8..7fa6196 100644
--- a/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h
+++ b/third_party_includes/Eigen/src/Core/arch/AVX/Complex.h
@@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
   }
 };
 
-template<> struct conj_helper<Packet8f, Packet4cf, false,false>
-{
-  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
-  { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet4cf, Packet8f, false,false>
-{
-  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
-  { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
 
 template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
 {
@@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
   }
 };
 
-template<> struct conj_helper<Packet4d, Packet2cd, false,false>
-{
-  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
-  { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet2cd, Packet4d, false,false>
-{
-  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
-  { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
 
 template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
 {
diff --git a/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h
index 195d40f..923a124 100644
--- a/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/third_party_includes/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -159,11 +159,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
 
 #ifdef __FMA__
 template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
-  // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
-  // and gcc stupidly generates a vfmadd132ps instruction,
-  // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
-  // the result of the product.
+#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
+  // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
+  //  and even register spilling with clang>=6.0 (bug 1637).
+  // Gcc stupidly generates a vfmadd132ps instruction.
+  // So let's enforce it to generate a vfmadd231ps instruction since the most common use
+  //  case is to accumulate the result of the product.
   Packet8f res = c;
   __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
   return res;
@@ -172,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
 #endif
 }
 template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
+#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
   // see above
   Packet4d res = c;
   __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
@@ -308,9 +309,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
 }
 
 #ifndef EIGEN_VECTORIZE_AVX512
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 #endif
 
 template<> EIGEN_STRONG_INLINE float  pfirst<Packet8f>(const Packet8f& a) {
@@ -333,9 +334,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
 {
    __m256d tmp = _mm256_shuffle_pd(a,a,5);
   return _mm256_permute2f128_pd(tmp, tmp, 1);
-
+  #if 0
+  // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
+  // exhibit the same latency/throughput, but it is here for future reference/benchmarking...
   __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
     return _mm256_permute_pd(swap_halves,5);
+  #endif
 }
 
 // pabs should be ok
diff --git a/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h b/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 399be0e..9c1717f 100644
--- a/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/third_party_includes/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
   //     x = x + x - 1.0;
   //   } else { x = x - 1.0; }
   __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
-  Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
+  Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
   x = psub(x, p16f_1);
-  e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
+  e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
   x = padd(x, tmp);
 
   Packet16f x2 = pmul(x, x);
@@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
   x = padd(x, y2);
 
   // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
-  return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
-                              _mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
+  return _mm512_mask_blend_ps(iszero_mask,
+                              _mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
+                              p16f_minus_inf);
 }
 #endif
 
@@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
   // select only the inverse sqrt of positive normal inputs (denormals are
   // flushed to zero and cause infs as well).
   __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
-  Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
-                                     _mm512_setzero_ps());
+  Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
 
   // Do a single step of Newton's iteration.
   x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
@@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
   // select only the inverse sqrt of positive normal inputs (denormals are
   // flushed to zero and cause infs as well).
   __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
-  Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
-                                    _mm512_setzero_pd());
+  Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
 
   // Do a first step of Newton's iteration.
   x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
   // select only the inverse sqrt of positive normal inputs (denormals are
   // flushed to zero and cause infs as well).
   __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
-  Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
-                                     _mm512_rsqrt14_ps(_x));
+  Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
 
   // Fill in NaNs and Infs for the negative/zero entries.
   __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
   Packet16f infs_and_nans = _mm512_mask_blend_ps(
-      neg_mask, p16f_nan,
-      _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
+      neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
 
   // Do a single step of Newton's iteration.
   x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
 
   // Insert NaNs and Infs in all the right places.
-  return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
+  return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
 }
 
 template <>
@@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
   // select only the inverse sqrt of positive normal inputs (denormals are
   // flushed to zero and cause infs as well).
   __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
-  Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
-                                    _mm512_rsqrt14_pd(_x));
+  Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
 
   // Fill in NaNs and Infs for the negative/zero entries.
   __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
   Packet8d infs_and_nans = _mm512_mask_blend_pd(
-      neg_mask, p8d_nan,
-      _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
+      neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
 
   // Do a first step of Newton's iteration.
   x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
   x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
 
   // Insert NaNs and Infs in all the right places.
-  return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
+  return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
 }
-#else
+#elif defined(EIGEN_VECTORIZE_AVX512ER)
 template <>
 EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
   return _mm512_rsqrt28_ps(x);
diff --git a/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h
index f6500a1..5adddc7 100644
--- a/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/third_party_includes/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
   pstore(to, pa);
 }
 
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 
 template <>
 EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
@@ -648,13 +648,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
 template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
 {
   // _mm512_abs_ps intrinsic not found, so hack around it
-  return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
+  return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
   // _mm512_abs_ps intrinsic not found, so hack around it
-  return (__m512d)_mm512_and_si512((__m512i)a,
-                                   _mm512_set1_epi64(0x7fffffffffffffff));
+  return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
+                                   _mm512_set1_epi64(0x7fffffffffffffff)));
 }
 
 #ifdef EIGEN_VECTORIZE_AVX512DQ
diff --git a/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h b/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h
index 67db2f8..3e66573 100644
--- a/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/third_party_includes/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
   }
 };
 
-template<> struct conj_helper<Packet4f, Packet2cf, false,false>
-{
-  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
-  { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet2cf, Packet4f, false,false>
-{
-  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
-  { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
@@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
     return pconj(internal::pmul(a, b));
   }
 };
-template<> struct conj_helper<Packet2d, Packet1cd, false,false>
-{
-  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
-  { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
-};
 
-template<> struct conj_helper<Packet1cd, Packet2d, false,false>
-{
-  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
-  { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
diff --git a/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h
index b3f1ea1..08a27d1 100755
--- a/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/third_party_includes/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD   = vec_sld((Packet16uc) vec_splat((Packet4u
 static Packet16uc p16uc_PSET32_WEVEN  = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
 static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
 #else
-static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; 
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
 static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
 static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
 static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
@@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
 
-template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  #ifdef __VSX__
+  Packet4f ret;
+  __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+  #else
+  return vec_min(a, b);
+  #endif
+}
 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
 
-template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  #ifdef __VSX__
+  Packet4f ret;
+  __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+  #else
+  return vec_max(a, b);
+  #endif
+}
 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
 
 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
@@ -764,7 +782,7 @@ typedef __vector __bool long         Packet2bl;
 
 static Packet2l  p2l_ONE  = { 1, 1 };
 static Packet2l  p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
-static Packet2d  p2d_ONE  = { 1.0, 1.0 }; 
+static Packet2d  p2d_ONE  = { 1.0, 1.0 };
 static Packet2d  p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
 static Packet2d  p2d_MZERO = { -0.0, -0.0 };
 
@@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
 // for some weird raisons, it has to be overloaded for packet of integers
 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
 
-template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  Packet2d ret;
+  __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+ }
 
-template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  Packet2d ret;
+  __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+}
 
 template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
 
@@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
   Packet2d v[2], sum;
   v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
   v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
- 
+
 #ifdef _BIG_ENDIAN
   sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
 #else
@@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
 
 template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
   Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
-  Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
+  Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
   return vec_sel(elsePacket, thenPacket, mask);
 }
 #endif // __VSX__
diff --git a/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h b/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h
index 294c517..755e620 100644
--- a/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h
+++ b/third_party_includes/Eigen/src/Core/arch/CUDA/Half.h
@@ -29,7 +29,7 @@
 // type Eigen::half (inheriting from CUDA's __half struct) with
 // operator overloads such that it behaves basically as an arithmetic
 // type. It will be quite slow on CPUs (so it is recommended to stay
-// in fp32 for CPUs, except for simple parameter conversions, I/O
+// in float32_bits for CPUs, except for simple parameter conversions, I/O
 // to disk and the likes), but fast on GPUs.
 
 
@@ -50,38 +50,45 @@ struct half;
 namespace half_impl {
 
 #if !defined(EIGEN_HAS_CUDA_FP16)
-
-// Make our own __half definition that is similar to CUDA's.
-struct __half {
-  EIGEN_DEVICE_FUNC __half() {}
-  explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
+// Make our own __half_raw definition that is similar to CUDA's.
+struct __half_raw {
+  EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
+  explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
   unsigned short x;
 };
-
+#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
+typedef __half __half_raw;
 #endif
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
 
-struct half_base : public __half {
+struct half_base : public __half_raw {
   EIGEN_DEVICE_FUNC half_base() {}
-  EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {}
-  EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {}
+  EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
+  EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+  EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
+#endif
 };
 
 } // namespace half_impl
 
 // Class definition.
 struct half : public half_impl::half_base {
-  #if !defined(EIGEN_HAS_CUDA_FP16)
-    typedef half_impl::__half __half;
+  #if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
+    typedef half_impl::__half_raw __half_raw;
   #endif
 
   EIGEN_DEVICE_FUNC half() {}
 
-  EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
+  EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
   EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+  EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
+#endif
 
   explicit EIGEN_DEVICE_FUNC half(bool b)
       : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
@@ -138,71 +145,125 @@ struct half : public half_impl::half_base {
   }
 };
 
+} // end namespace Eigen
+
+namespace std {
+template<>
+struct numeric_limits<Eigen::half> {
+  static const bool is_specialized = true;
+  static const bool is_signed = true;
+  static const bool is_integer = false;
+  static const bool is_exact = false;
+  static const bool has_infinity = true;
+  static const bool has_quiet_NaN = true;
+  static const bool has_signaling_NaN = true;
+  static const float_denorm_style has_denorm = denorm_present;
+  static const bool has_denorm_loss = false;
+  static const std::float_round_style round_style = std::round_to_nearest;
+  static const bool is_iec559 = false;
+  static const bool is_bounded = false;
+  static const bool is_modulo = false;
+  static const int digits = 11;
+  static const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static const int radix = 2;
+  static const int min_exponent = -13;
+  static const int min_exponent10 = -4;
+  static const int max_exponent = 16;
+  static const int max_exponent10 = 4;
+  static const bool traps = true;
+  static const bool tinyness_before = false;
+
+  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
+  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
+  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
+  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
+  static Eigen::half round_error() { return Eigen::half(0.5); }
+  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
+  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
+};
+
+// If std::numeric_limits<T> is specialized, should also specialize
+// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
+// std::numeric_limits<const volatile T>
+// https://stackoverflow.com/a/16519653/
+template<>
+struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
+template<>
+struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
+template<>
+struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
+} // end namespace std
+
+namespace Eigen {
+
 namespace half_impl {
 
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
 
 // Intrinsics for native fp16 support. Note that on current hardware,
-// these are no faster than fp32 arithmetic (you need to use the half2
+// these are no faster than float32_bits arithmetic (you need to use the half2
 // versions to get the ALU speed increased), but you do save the
 // conversion steps back and forth.
 
-__device__ half operator + (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
   return __hadd(a, b);
 }
-__device__ half operator * (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
   return __hmul(a, b);
 }
-__device__ half operator - (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
   return __hsub(a, b);
 }
-__device__ half operator / (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
   float num = __half2float(a);
   float denom = __half2float(b);
   return __float2half(num / denom);
 }
-__device__ half operator - (const half& a) {
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
   return __hneg(a);
 }
-__device__ half& operator += (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
   a = a + b;
   return a;
 }
-__device__ half& operator *= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
   a = a * b;
   return a;
 }
-__device__ half& operator -= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
   a = a - b;
   return a;
 }
-__device__ half& operator /= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
   a = a / b;
   return a;
 }
-__device__ bool operator == (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
   return __heq(a, b);
 }
-__device__ bool operator != (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
   return __hne(a, b);
 }
-__device__ bool operator < (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
   return __hlt(a, b);
 }
-__device__ bool operator <= (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
   return __hle(a, b);
 }
-__device__ bool operator > (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
   return __hgt(a, b);
 }
-__device__ bool operator >= (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
   return __hge(a, b);
 }
 
 #else  // Emulate support for half floats
 
 // Definitions for CPUs and older CUDA, mostly working through conversion
-// to/from fp32.
+// to/from float32_bits.
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
   return half(float(a) + float(b));
@@ -238,10 +299,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
   return a;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
-  return float(a) == float(b);
+  return numext::equal_strict(float(a),float(b));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
-  return float(a) != float(b);
+  return numext::not_equal_strict(float(a), float(b));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
   return float(a) < float(b);
@@ -269,34 +330,35 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
 // these in hardware. If we need more performance on older/other CPUs, they are
 // also possible to vectorize directly.
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
-  __half h;
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
+  __half_raw h;
   h.x = x;
   return h;
 }
 
-union FP32 {
+union float32_bits {
   unsigned int u;
   float f;
 };
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
-  return __float2half(ff);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
+  __half tmp_ff = __float2half(ff);
+  return *(__half_raw*)&tmp_ff;
 
 #elif defined(EIGEN_HAS_FP16_C)
-  __half h;
+  __half_raw h;
   h.x = _cvtss_sh(ff, 0);
   return h;
 
 #else
-  FP32 f; f.f = ff;
+  float32_bits f; f.f = ff;
 
-  const FP32 f32infty = { 255 << 23 };
-  const FP32 f16max = { (127 + 16) << 23 };
-  const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
+  const float32_bits f32infty = { 255 << 23 };
+  const float32_bits f16max = { (127 + 16) << 23 };
+  const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
   unsigned int sign_mask = 0x80000000u;
-  __half o;
+  __half_raw o;
   o.x = static_cast<unsigned short>(0x0u);
 
   unsigned int sign = f.u & sign_mask;
@@ -335,17 +397,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
 #endif
 }
 
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
   return __half2float(h);
 
 #elif defined(EIGEN_HAS_FP16_C)
   return _cvtsh_ss(h.x);
 
 #else
-  const FP32 magic = { 113 << 23 };
+  const float32_bits magic = { 113 << 23 };
   const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
-  FP32 o;
+  float32_bits o;
 
   o.u = (h.x & 0x7fff) << 13;             // exponent/mantissa bits
   unsigned int exp = shifted_exp & o.u;   // just the exponent
@@ -370,7 +432,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
   return (a.x & 0x7fff) == 0x7c00;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return __hisnan(a);
 #else
   return (a.x & 0x7fff) > 0x7c00;
@@ -386,11 +448,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
   return result;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
-  return half(::expf(float(a)));
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+  return half(hexp(a));
+#else
+   return half(::expf(float(a)));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
-  return Eigen::half(::hlog(a));
+#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+  return half(::hlog(a));
 #else
   return half(::logf(float(a)));
 #endif
@@ -402,7 +468,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
   return half(::log10f(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
-  return half(::sqrtf(float(a)));
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+  return half(hsqrt(a));
+#else
+    return half(::sqrtf(float(a)));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
   return half(::powf(float(a), float(b)));
@@ -420,14 +490,22 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
   return half(::tanhf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
+  return half(hfloor(a));
+#else
   return half(::floorf(float(a)));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
+  return half(hceil(a));
+#else
   return half(::ceilf(float(a)));
+#endif
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return __hlt(b, a) ? b : a;
 #else
   const float f1 = static_cast<float>(a);
@@ -436,7 +514,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return __hlt(a, b) ? b : a;
 #else
   const float f1 = static_cast<float>(a);
@@ -474,49 +552,6 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
 
 } // end namespace internal
 
-}  // end namespace Eigen
-
-namespace std {
-template<>
-struct numeric_limits<Eigen::half> {
-  static const bool is_specialized = true;
-  static const bool is_signed = true;
-  static const bool is_integer = false;
-  static const bool is_exact = false;
-  static const bool has_infinity = true;
-  static const bool has_quiet_NaN = true;
-  static const bool has_signaling_NaN = true;
-  static const float_denorm_style has_denorm = denorm_present;
-  static const bool has_denorm_loss = false;
-  static const std::float_round_style round_style = std::round_to_nearest;
-  static const bool is_iec559 = false;
-  static const bool is_bounded = false;
-  static const bool is_modulo = false;
-  static const int digits = 11;
-  static const int digits10 = 2;
-  //static const int max_digits10 = ;
-  static const int radix = 2;
-  static const int min_exponent = -13;
-  static const int min_exponent10 = -4;
-  static const int max_exponent = 16;
-  static const int max_exponent10 = 4;
-  static const bool traps = true;
-  static const bool tinyness_before = false;
-
-  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
-  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
-  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
-  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
-  static Eigen::half round_error() { return Eigen::half(0.5); }
-  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
-  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
-  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
-  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
-};
-}
-
-namespace Eigen {
-
 template<> struct NumTraits<Eigen::half>
     : GenericNumTraits<Eigen::half>
 {
@@ -557,7 +592,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
   return Eigen::half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return Eigen::half(::hlog(a));
 #else
   return Eigen::half(::logf(float(a)));
@@ -591,14 +626,18 @@ struct hash<Eigen::half> {
 
 
 // Add the missing shfl_xor intrinsic
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
+  #if EIGEN_CUDACC_VER < 90000
   return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
+  #else
+  return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
+  #endif
 }
 #endif
 
-// ldg() has an overload for __half, but we also need one for Eigen::half.
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
   return Eigen::half_impl::raw_uint16_to_half(
       __ldg(reinterpret_cast<const unsigned short*>(ptr)));
@@ -606,7 +645,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
 #endif
 
 
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
 namespace Eigen {
 namespace numext {
 
diff --git a/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index ae54225..c66d384 100644
--- a/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/third_party_includes/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -99,7 +99,8 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2&
 
 template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
   half2 result;
-  result.x = a.x & 0x7FFF7FFF;
+  unsigned temp = *(reinterpret_cast<const unsigned*>(&(a)));
+  *(reinterpret_cast<unsigned*>(&(result))) = temp & 0x7FFF7FFF;
   return result;
 }
 
@@ -275,7 +276,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
   return __floats2half2_rn(r1, r2);
 }
 
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
 
 template<>  __device__ EIGEN_STRONG_INLINE
 half2 plog<half2>(const half2& a) {
diff --git a/third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h b/third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h
new file mode 100644
index 0000000..4cfe34e
--- /dev/null
+++ b/third_party_includes/Eigen/src/Core/arch/Default/ConjHelper.h
@@ -0,0 +1,29 @@
+
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARCH_CONJ_HELPER_H
+#define EIGEN_ARCH_CONJ_HELPER_H
+
+#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)                                                          \
+  template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> {                                          \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
+    { return padd(c, pmul(x,y)); }                                                                                \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const                        \
+    { return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); }                                           \
+  };                                                                                                              \
+                                                                                                                  \
+  template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> {                                          \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
+    { return padd(c, pmul(x,y)); }                                                                                \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const                        \
+    { return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); }                                           \
+  };
+
+#endif // EIGEN_ARCH_CONJ_HELPER_H
diff --git a/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h b/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h
index 57e9b43..306a309 100644
--- a/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h
+++ b/third_party_includes/Eigen/src/Core/arch/NEON/Complex.h
@@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
 {
   float32x2_t r64;
-  r64 = vld1_f32((float *)&from);
+  r64 = vld1_f32((const float *)&from);
 
   return Packet2cf(vcombine_f32(r64, r64));
 }
@@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
   to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
 }
 
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((float *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
 
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
@@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
   }
 };
 
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
   // TODO optimize it for NEON
@@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
   s = vmulq_f32(b.v, b.v);
   rev_s = vrev64q_f32(s);
 
-  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
+  return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
 }
 
 EIGEN_DEVICE_FUNC inline void
@@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
 
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
 
 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
 {
@@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
   }
 };
 
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
   // TODO optimize it for NEON
diff --git a/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h
index 836fbc0..3d5ed0d 100644
--- a/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/third_party_includes/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -36,12 +36,43 @@ namespace internal {
 #endif
 #endif
 
+#if EIGEN_COMP_MSVC
+
+// In MSVC's arm_neon.h header file, all NEON vector types
+// are aliases to the same underlying type __n128.
+// We thus have to wrap them to make them different C++ types.
+// (See also bug 1428)
+
+template<typename T,int unique_id>
+struct eigen_packet_wrapper
+{
+  operator T&() { return m_val; }
+  operator const T&() const { return m_val; }
+  eigen_packet_wrapper() {}
+  eigen_packet_wrapper(const T &v) : m_val(v) {}
+  eigen_packet_wrapper& operator=(const T &v) {
+    m_val = v;
+    return *this;
+  }
+
+  T m_val;
+};
+typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
+typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
+typedef eigen_packet_wrapper<int32x4_t  ,2> Packet4i;
+typedef eigen_packet_wrapper<int32x2_t  ,3> Packet2i;
+typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
+
+#else
+
 typedef float32x2_t Packet2f;
 typedef float32x4_t Packet4f;
 typedef int32x4_t   Packet4i;
 typedef int32x2_t   Packet2i;
 typedef uint32x4_t  Packet4ui;
 
+#endif // EIGEN_COMP_MSVC
+
 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
   const Packet4f p4f_##NAME = pset1<Packet4f>(X)
 
diff --git a/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h b/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h
index 5607fe0..d075043 100644
--- a/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h
+++ b/third_party_includes/Eigen/src/Core/arch/SSE/Complex.h
@@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
                                      _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
 }
 
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 
 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
 {
@@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
   }
 };
 
-template<> struct conj_helper<Packet4f, Packet2cf, false,false>
-{
-  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
-  { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet2cf, Packet4f, false,false>
-{
-  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
-  { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
 
 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
@@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
 
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 
 template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
 {
@@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
   }
 };
 
-template<> struct conj_helper<Packet2d, Packet1cd, false,false>
-{
-  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
-  { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet1cd, Packet2d, false,false>
-{
-  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
-  { return padd(c, pmul(x,y)); }
-
-  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
-  { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
 
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
diff --git a/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h b/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h
index 3832de1..60e2517 100755
--- a/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/third_party_includes/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -28,7 +28,7 @@ namespace internal {
 #endif
 #endif
 
-#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
 // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
 // have overloads for both types without linking error.
 // One solution is to increase ABI version using -fabi-version=4 (or greater).
@@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
   pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
 }
 
+#if EIGEN_COMP_PGI
+typedef const void * SsePrefetchPtrType;
+#else
+typedef const char * SsePrefetchPtrType;
+#endif
+
 #ifndef EIGEN_VECTORIZE_AVX
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
 #endif
 
 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
@@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
 
 } // end namespace Eigen
 
+#if EIGEN_COMP_PGI
+// PGI++ does not define the following intrinsics in C++ mode.
+static inline __m128  _mm_castpd_ps   (__m128d x) { return reinterpret_cast<__m128&>(x);  }
+static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
+static inline __m128d _mm_castps_pd   (__m128  x) { return reinterpret_cast<__m128d&>(x); }
+static inline __m128i _mm_castps_si128(__m128  x) { return reinterpret_cast<__m128i&>(x); }
+static inline __m128  _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x);  }
+static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
+#endif
+
 #endif // EIGEN_PACKET_MATH_SSE_H
diff --git a/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h b/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h
index c848932..c6ca8c7 100644
--- a/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/third_party_includes/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -14,6 +14,7 @@ namespace Eigen {
 
 namespace internal {
 
+#ifndef EIGEN_VECTORIZE_AVX
 template <>
 struct type_casting_traits<float, int> {
   enum {
@@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
   };
 };
 
-template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
-  return _mm_cvttps_epi32(a);
-}
-
-
 template <>
 struct type_casting_traits<int, float> {
   enum {
@@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
   };
 };
 
-template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
-  return _mm_cvtepi32_ps(a);
-}
-
-
 template <>
 struct type_casting_traits<double, float> {
   enum {
@@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
   };
 };
 
-template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
-  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
-}
-
 template <>
 struct type_casting_traits<float, double> {
   enum {
@@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
     TgtCoeffRatio = 2
   };
 };
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+  return _mm_cvttps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+  return _mm_cvtepi32_ps(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
 
 template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
   // Simply discard the second half of the input
diff --git a/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h b/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h
index d39d2d1..1bfb733 100644
--- a/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/third_party_includes/Eigen/src/Core/arch/ZVector/Complex.h
@@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
   }
 };
 
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
 {
   // TODO optimize it for AltiVec
diff --git a/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h b/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h
index 96747ba..3eae6b8 100644
--- a/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/third_party_includes/Eigen/src/Core/functors/BinaryFunctors.h
@@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
 
 
 /** \internal
-  * \brief Template functor to compute the hypot of two scalars
+  * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
   *
   * \sa MatrixBase::stableNorm(), class Redux
   */
@@ -263,22 +263,15 @@ template<typename Scalar>
 struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
 {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
-//   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
   {
-    EIGEN_USING_STD_MATH(sqrt)
-    Scalar p, qp;
-    if(_x>_y)
-    {
-      p = _x;
-      qp = _y / p;
-    }
-    else
-    {
-      p = _y;
-      qp = _x / p;
-    }
-    return p * sqrt(Scalar(1) + qp*qp);
+    // This functor is used by hypotNorm only for which it is faster to first apply abs
+    // on all coefficients prior to reduction through hypot.
+    // This way we avoid calling abs on positive and real entries, and this also permits
+    // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
+    // through the same functor...
+    return internal::positive_real_hypot(x,y);
   }
 };
 template<typename Scalar>
diff --git a/third_party_includes/Eigen/src/Core/functors/StlFunctors.h b/third_party_includes/Eigen/src/Core/functors/StlFunctors.h
index 6df3fa5..9c1d758 100644
--- a/third_party_includes/Eigen/src/Core/functors/StlFunctors.h
+++ b/third_party_includes/Eigen/src/Core/functors/StlFunctors.h
@@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
 { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
 #endif
 
+#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
+// std::unary_negate is deprecated since c++17 and will be removed in c++20
 template<typename T>
 struct functor_traits<std::unary_negate<T> >
 { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
 
+// std::binary_negate is deprecated since c++17 and will be removed in c++20
 template<typename T>
 struct functor_traits<std::binary_negate<T> >
 { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+#endif
 
 #ifdef EIGEN_STDEXT_SUPPORT
 
diff --git a/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 45230bc..e3980f6 100644
--- a/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/third_party_includes/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1197,10 +1197,16 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
             EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
             RhsPacket B_0, B1, B2, B3, T0;
 
-   #define EIGEN_GEBGP_ONESTEP(K) \
+          // NOTE: the begin/end asm comments below work around bug 935!
+          // but they are not enough for gcc>=6 without FMA (bug 1637)
+          #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+            #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__  ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
+          #else
+            #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
+          #endif
+          #define EIGEN_GEBGP_ONESTEP(K) \
             do {                                                                \
               EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4");        \
-              EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
               traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0);                    \
               traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1);                    \
               traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3);  \
@@ -1212,6 +1218,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
               traits.madd(A1, B2,  C6, B2);                                     \
               traits.madd(A0, B3,  C3, T0);                                     \
               traits.madd(A1, B3,  C7, B3);                                     \
+              EIGEN_GEBP_2PX4_SPILLING_WORKAROUND                               \
               EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4");          \
             } while(false)
             
@@ -1526,10 +1533,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
           // The following piece of code wont work for 512 bit registers
           // Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
           // as nr (which is currently 4) for the return type.
-          typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
+          const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
           if ((SwappedTraits::LhsProgress % 4) == 0 &&
               (SwappedTraits::LhsProgress <= 8) &&
-              (SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
+              (SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr))
           {
             SAccPacket C0, C1, C2, C3;
             straits.initAcc(C0);
diff --git a/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 41e18ff..f6f9ebe 100644
--- a/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
                           const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
   { \
-    if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
+    if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \
       general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
       ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
     } else { \
@@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
    BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
    char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
    EIGTYPE beta(1); \
-   BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
+   BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
   } \
 };
 
@@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
   } \
 };
 
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk)
+#else
 EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
 EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk_)
+#endif
 
 // TODO hanlde complex cases
 // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
diff --git a/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index 7a3bdbf..b0f6b0d 100644
--- a/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -46,7 +46,7 @@ namespace internal {
 
 // gemm specialization
 
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
 template< \
   typename Index, \
   int LhsStorageOrder, bool ConjugateLhs, \
@@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
     ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
   } else b = _rhs; \
 \
-  BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+  BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 }};
 
-GEMM_SPECIALIZATION(double,   d,  double, d)
-GEMM_SPECIALIZATION(float,    f,  float,  s)
-GEMM_SPECIALIZATION(dcomplex, cd, double, z)
-GEMM_SPECIALIZATION(scomplex, cf, float,  c)
+#ifdef EIGEN_USE_MKL
+GEMM_SPECIALIZATION(double,   d,  double, dgemm)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  cgemm)
+#else
+GEMM_SPECIALIZATION(double,   d,  double, dgemm_)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm_)
+GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
+GEMM_SPECIALIZATION(scomplex, cf, float,  cgemm_)
+#endif
 
 } // end namespase internal
 
diff --git a/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h b/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h
index 3c1a7fc..a597c1f 100644
--- a/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
     alignmentPattern = AllAligned;
   }
 
-  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
-  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+  const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
 
   Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
   for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
     alignmentPattern = AllAligned;
   }
 
-  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
-  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+  const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
 
   Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
   for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
diff --git a/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index e3a5d58..6e36c2b 100644
--- a/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
 EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
 EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
 
-#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
 template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
 { \
@@ -113,14 +113,21 @@ static void run( \
     x_ptr=x_tmp.data(); \
     incx=1; \
   } else x_ptr=rhs; \
-  BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+  BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
 }\
 };
 
-EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, d)
-EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  s)
-EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
-EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
+#else
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  cgemv_)
+#endif
 
 } // end namespase internal
 
diff --git a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index a45238d..9a53185 100644
--- a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -40,7 +40,7 @@ namespace internal {
 
 /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
 
-#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } else b = _rhs; \
 \
-    BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
   } \
 };
 
 
-#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } \
 \
-    BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
   } \
 };
 
-EIGEN_BLAS_SYMM_L(double, double, d, d)
-EIGEN_BLAS_SYMM_L(float, float, f, s)
-EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
-EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
+#endif
 
 /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
 
-#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } else b = _lhs; \
 \
-    BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
   } \
 };
 
 
-#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } \
 \
-    BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
   } \
 };
 
-EIGEN_BLAS_SYMM_R(double, double, d, d)
-EIGEN_BLAS_SYMM_R(float, float, f, s)
-EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
-EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
+#endif
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index 38f23ac..1238345 100644
--- a/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
     x_tmp=map_x.conjugate(); \
     x_ptr=x_tmp.data(); \
   } else x_ptr=_rhs; \
-  BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+  BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
 }\
 };
 
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
+#else
 EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float,  chemv_)
+#endif
 
 } // end namespace internal
 
diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 6ec5a8a..f784507 100644
--- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
 
-    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+    // To work around an "error: member reference base type 'Matrix<...>
+    // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
+    // not a structure or union" compilation error in nvcc (tested V8.0.61),
+    // create a dummy internal::constructor_without_unaligned_array_assert
+    // object to pass to the Matrix constructor.
+    internal::constructor_without_unaligned_array_assert a;
+    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
     triangularBuffer.setZero();
     if((Mode&ZeroDiag)==ZeroDiag)
       triangularBuffer.diagonal().setZero();
@@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
 
-    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+    internal::constructor_without_unaligned_array_assert a;
+    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
     triangularBuffer.setZero();
     if((Mode&ZeroDiag)==ZeroDiag)
       triangularBuffer.diagonal().setZero();
@@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 {
   template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
   {
-    typedef typename Dest::Scalar     Scalar;
+    typedef typename Lhs::Scalar  LhsScalar;
+    typedef typename Rhs::Scalar  RhsScalar;
+    typedef typename Dest::Scalar Scalar;
     
     typedef internal::blas_traits<Lhs> LhsBlasTraits;
     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
@@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
 
-    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
-                               * RhsBlasTraits::extractScalarFactor(a_rhs);
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
+    Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
 
     typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
               Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
         &dst.coeffRef(0,0), dst.outerStride(),    // result info
         actualAlpha, blocking
       );
+
+    // Apply correction if the diagonal is unit and a scalar factor was nested:
+    if ((Mode&UnitDiag)==UnitDiag)
+    {
+      if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
+      {
+        Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+        dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
+      }
+      else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
+      {
+        Index diagSize = (std::min)(rhs.rows(),rhs.cols());
+        dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
+      }
+    }
   }
 };
 
diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index aecded6..a25197a 100644
--- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
 EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
 
 // implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, int Mode, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
    } \
    /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
 /* call ?trmm*/ \
-   BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
 \
 /* Add op(a_triangular)*b into res*/ \
    Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
   } \
 };
 
-EIGEN_BLAS_TRMM_L(double, double, d, d)
-EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMM_L(float, float, f, s)
-EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm)
+EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
+#endif
 
 // implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, int Mode, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
    } \
    /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
 /* call ?trmm*/ \
-   BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
 \
 /* Add op(a_triangular)*b into res*/ \
    Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
   } \
 };
 
-EIGEN_BLAS_TRMM_R(double, double, d, d)
-EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMM_R(float, float, f, s)
-EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm)
+EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
+#endif
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector.h
index 4b292e7..76bfa15 100644
--- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
     typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
     typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
 
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
-                                  * RhsBlasTraits::extractScalarFactor(rhs);
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
+    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
 
     enum {
       // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
       else
         dest = MappedDest(actualDestPtr, dest.size());
     }
+
+    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+    {
+      Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
+    }
   }
 };
 
@@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
     typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
     typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
 
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
-                                  * RhsBlasTraits::extractScalarFactor(rhs);
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
+    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
 
     enum {
       DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
@@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
             actualRhsPtr,1,
             dest.data(),dest.innerStride(),
             actualAlpha);
+
+    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+    {
+      Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
+    }
   }
 };
 
diff --git a/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 07bf26c..3d47a2b 100644
--- a/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
 EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
 
 // implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
   enum { \
@@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
    diag = IsUnitDiag ? 'U' : 'N'; \
 \
 /* call ?TRMV*/ \
-   BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
 \
 /* Add op(a_tr)rhs into res*/ \
-   BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
 /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
    if (size<(std::max)(rows,cols)) { \
      if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
        m = convert_index<BlasIndex>(size); \
        n = convert_index<BlasIndex>(cols-size); \
      } \
-     BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
    } \
   } \
 };
 
-EIGEN_BLAS_TRMV_CM(double,   double, d,  d)
-EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMV_CM(float,    float,  f,  s)
-EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d,)
+EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s,)
+EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8,  cf, c,)
+#else
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d, _)
+EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s, _)
+EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c, _)
+#endif
 
 // implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
   enum { \
@@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
    diag = IsUnitDiag ? 'U' : 'N'; \
 \
 /* call ?TRMV*/ \
-   BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
 \
 /* Add op(a_tr)rhs into res*/ \
-   BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
 /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
    if (size<(std::max)(rows,cols)) { \
      if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
        m = convert_index<BlasIndex>(size); \
        n = convert_index<BlasIndex>(cols-size); \
      } \
-     BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
    } \
   } \
 };
 
-EIGEN_BLAS_TRMV_RM(double,   double, d,  d)
-EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMV_RM(float,    float,  f,  s)
-EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,)
+EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,)
+EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8,  cf, c,)
+#else
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,_)
+EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,_)
+EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c,_)
+#endif
 
 } // end namespase internal
 
diff --git a/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 88c0fb7..f077511 100644
--- a/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/third_party_includes/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -38,7 +38,7 @@ namespace Eigen {
 namespace internal {
 
 // implements LeftSide op(triangular)^-1 * general
-#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
 template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
 struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
 { \
@@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
    } \
    if (IsUnitDiag) diag='U'; \
 /* call ?trsm*/ \
-   BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
  } \
 };
 
-EIGEN_BLAS_TRSM_L(double,   double, d)
-EIGEN_BLAS_TRSM_L(dcomplex, double, z)
-EIGEN_BLAS_TRSM_L(float,    float,  s)
-EIGEN_BLAS_TRSM_L(scomplex, float,  c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm)
+EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm)
+EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
+#else
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm_)
+EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm_)
+EIGEN_BLAS_TRSM_L(scomplex, float,  ctrsm_)
+#endif
 
 // implements RightSide general * op(triangular)^-1
-#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
 template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
 struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
 { \
@@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
    } \
    if (IsUnitDiag) diag='U'; \
 /* call ?trsm*/ \
-   BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
    /*std::cout << "TRMS_L specialization!\n";*/ \
  } \
 };
 
-EIGEN_BLAS_TRSM_R(double,   double, d)
-EIGEN_BLAS_TRSM_R(dcomplex, double, z)
-EIGEN_BLAS_TRSM_R(float,    float,  s)
-EIGEN_BLAS_TRSM_R(scomplex, float,  c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm)
+EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm)
+EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8,  ctrsm)
+#else
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm_)
+EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm_)
+EIGEN_BLAS_TRSM_R(scomplex, float,  ctrsm_)
+#endif
 
 } // end namespace internal
 
diff --git a/third_party_includes/Eigen/src/Core/util/DisableStupidWarnings.h b/third_party_includes/Eigen/src/Core/util/DisableStupidWarnings.h
index 7559e12..351bd6c 100755
--- a/third_party_includes/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/third_party_includes/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -43,12 +43,20 @@
   #endif
   #pragma clang diagnostic ignored "-Wconstant-logical-operand"
 
-#elif defined __GNUC__ && __GNUC__>=6
+#elif defined __GNUC__
 
-  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+  #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
     #pragma GCC diagnostic push
   #endif
-  #pragma GCC diagnostic ignored "-Wignored-attributes"
+  // g++ warns about local variables shadowing member functions, which is too strict
+  #pragma GCC diagnostic ignored "-Wshadow"
+  #if __GNUC__ == 4 && __GNUC_MINOR__ < 8
+    // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
+    #pragma GCC diagnostic ignored "-Wtype-limits"
+  #endif
+  #if __GNUC__>=6
+    #pragma GCC diagnostic ignored "-Wignored-attributes"
+  #endif
 
 #endif
 
diff --git a/third_party_includes/Eigen/src/Core/util/MKL_support.h b/third_party_includes/Eigen/src/Core/util/MKL_support.h
index 26b5966..b7d6ecc 100755
--- a/third_party_includes/Eigen/src/Core/util/MKL_support.h
+++ b/third_party_includes/Eigen/src/Core/util/MKL_support.h
@@ -49,10 +49,11 @@
   #define EIGEN_USE_LAPACKE
 #endif
 
-#if defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
   #define EIGEN_USE_MKL
 #endif
 
+
 #if defined EIGEN_USE_MKL
 #   include <mkl.h> 
 /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@@ -108,6 +109,10 @@
 #endif
 #endif
 
+#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
+#include "../../misc/blas.h"
+#endif
+
 namespace Eigen {
 
 typedef std::complex<double> dcomplex;
@@ -121,8 +126,5 @@ typedef int BlasIndex;
 
 } // end namespace Eigen
 
-#if defined(EIGEN_USE_BLAS)
-#include "../../misc/blas.h"
-#endif
 
 #endif // EIGEN_MKL_SUPPORT_H
diff --git a/third_party_includes/Eigen/src/Core/util/Macros.h b/third_party_includes/Eigen/src/Core/util/Macros.h
index 38d6ddb..aa054a0 100644
--- a/third_party_includes/Eigen/src/Core/util/Macros.h
+++ b/third_party_includes/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
 
 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 3
-#define EIGEN_MINOR_VERSION 4
+#define EIGEN_MINOR_VERSION 7
 
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                       (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -399,7 +399,7 @@
 // Does the compiler support variadic templates?
 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
 #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
-  && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
     // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
     //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
@@ -413,7 +413,7 @@
 
 #ifdef __CUDACC__
 // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
   #define EIGEN_HAS_CONSTEXPR 1
 #endif
 #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@@ -487,11 +487,13 @@
 // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
 // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
 // but GCC is still doing fine with just inline.
+#ifndef EIGEN_STRONG_INLINE
 #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
 #define EIGEN_STRONG_INLINE __forceinline
 #else
 #define EIGEN_STRONG_INLINE inline
 #endif
+#endif
 
 // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
 // attribute to maximize inlining. This should only be used when really necessary: in particular,
@@ -812,7 +814,8 @@ namespace Eigen {
 // just an empty macro !
 #define EIGEN_EMPTY
 
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 ||  defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
+  // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =;
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@@ -986,7 +989,13 @@ namespace Eigen {
 #   define EIGEN_NOEXCEPT
 #   define EIGEN_NOEXCEPT_IF(x)
 #   define EIGEN_NO_THROW throw()
-#   define EIGEN_EXCEPTION_SPEC(X) throw(X)
+#   if EIGEN_COMP_MSVC
+      // MSVC does not support exception specifications (warning C4290),
+      // and they are deprecated in c++11 anyway.
+#     define EIGEN_EXCEPTION_SPEC(X) throw()
+#   else
+#     define EIGEN_EXCEPTION_SPEC(X) throw(X)
+#   endif
 #endif
 
 #endif // EIGEN_MACROS_H
diff --git a/third_party_includes/Eigen/src/Core/util/Memory.h b/third_party_includes/Eigen/src/Core/util/Memory.h
index c634d7e..291383c 100644
--- a/third_party_includes/Eigen/src/Core/util/Memory.h
+++ b/third_party_includes/Eigen/src/Core/util/Memory.h
@@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
     throw std::bad_alloc();
   #else
     std::size_t huge = static_cast<std::size_t>(-1);
-    new int[huge];
+    ::operator new(huge);
   #endif
 }
 
@@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
     IntPtr size = IntPtr(end)-IntPtr(start);
     if(size==0) return;
     eigen_internal_assert(start!=0 && end!=0 && target!=0);
-    memcpy(target, start, size);
+    std::memcpy(target, start, size);
   }
 };
 
@@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 /** \class aligned_allocator
 * \ingroup Core_Module
 *
-* \brief STL compatible allocator to use with with 16 byte aligned types
+* \brief STL compatible allocator to use with types requiring a non standrad alignment.
+*
+* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
+* By default, it will thus provide at least 16 bytes alignment and more in following cases:
+*  - 32 bytes alignment if AVX is enabled.
+*  - 64 bytes alignment if AVX512 is enabled.
+*
+* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
+* \link TopicPreprocessorDirectivesPerformance there \endlink.
 *
 * Example:
 * \code
@@ -739,7 +747,15 @@ class aligned_allocator : public std::allocator<T>
   pointer allocate(size_type num, const void* /*hint*/ = 0)
   {
     internal::check_size_for_overflow<T>(num);
-    return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
+    size_type size = num * sizeof(T);
+#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
+    // workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
+    // It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
+    if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
+      return 0;
+    else
+#endif
+      return static_cast<pointer>( internal::aligned_malloc(size) );
   }
 
   void deallocate(pointer p, size_type /*num*/)
diff --git a/third_party_includes/Eigen/src/Core/util/Meta.h b/third_party_includes/Eigen/src/Core/util/Meta.h
index 7f63707..d31e954 100755
--- a/third_party_includes/Eigen/src/Core/util/Meta.h
+++ b/third_party_includes/Eigen/src/Core/util/Meta.h
@@ -109,6 +109,28 @@ template<> struct is_integral<unsigned int>    { enum { value = true }; };
 template<> struct is_integral<signed long>     { enum { value = true }; };
 template<> struct is_integral<unsigned long>   { enum { value = true }; };
 
+#if EIGEN_HAS_CXX11
+using std::make_unsigned;
+#else
+// TODO: Possibly improve this implementation of make_unsigned.
+// It is currently used only by
+// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
+template<typename> struct make_unsigned;
+template<> struct make_unsigned<char>             { typedef unsigned char type; };
+template<> struct make_unsigned<signed char>      { typedef unsigned char type; };
+template<> struct make_unsigned<unsigned char>    { typedef unsigned char type; };
+template<> struct make_unsigned<signed short>     { typedef unsigned short type; };
+template<> struct make_unsigned<unsigned short>   { typedef unsigned short type; };
+template<> struct make_unsigned<signed int>       { typedef unsigned int type; };
+template<> struct make_unsigned<unsigned int>     { typedef unsigned int type; };
+template<> struct make_unsigned<signed long>      { typedef unsigned long type; };
+template<> struct make_unsigned<unsigned long>    { typedef unsigned long type; };
+#if EIGEN_COMP_MSVC
+template<> struct make_unsigned<signed __int64>   { typedef unsigned __int64 type; };
+template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
+#endif
+#endif
+
 template <typename T> struct add_const { typedef const T type; };
 template <typename T> struct add_const<T&> { typedef T& type; };
 
@@ -485,6 +507,26 @@ T div_ceil(const T &a, const T &b)
   return (a+b-1) / b;
 }
 
+// The aim of the following functions is to bypass -Wfloat-equal warnings
+// when we really want a strict equality comparison on floating points.
+template<typename X, typename Y> EIGEN_STRONG_INLINE
+bool equal_strict(const X& x,const Y& y) { return x == y; }
+
+template<> EIGEN_STRONG_INLINE
+bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE
+bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
+
+template<typename X, typename Y> EIGEN_STRONG_INLINE
+bool not_equal_strict(const X& x,const Y& y) { return x != y; }
+
+template<> EIGEN_STRONG_INLINE
+bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE
+bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
+
 } // end namespace numext
 
 } // end namespace Eigen
diff --git a/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h b/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h
index 86b60f5..ecc82b7 100644
--- a/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/third_party_includes/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -8,7 +8,7 @@
     #pragma warning pop
   #elif defined __clang__
     #pragma clang diagnostic pop
-  #elif defined __GNUC__ && __GNUC__>=6
+  #elif defined __GNUC__  &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
     #pragma GCC diagnostic pop
   #endif
 
diff --git a/third_party_includes/Eigen/src/Core/util/StaticAssert.h b/third_party_includes/Eigen/src/Core/util/StaticAssert.h
index 983361a..500e477 100644
--- a/third_party_includes/Eigen/src/Core/util/StaticAssert.h
+++ b/third_party_includes/Eigen/src/Core/util/StaticAssert.h
@@ -24,6 +24,7 @@
  *
  */
 
+#ifndef EIGEN_STATIC_ASSERT
 #ifndef EIGEN_NO_STATIC_ASSERT
 
   #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
@@ -44,64 +45,65 @@
     struct static_assertion<true>
     {
       enum {
-        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
-        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
-        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
-        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
-        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
-        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
-        OUT_OF_RANGE_ACCESS,
-        YOU_MADE_A_PROGRAMMING_MISTAKE,
-        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
-        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
-        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
-        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
-        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
-        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
-        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
-        NUMERIC_TYPE_MUST_BE_REAL,
-        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
-        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
-        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
-        INVALID_MATRIX_PRODUCT,
-        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
-        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
-        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
-        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
-        INVALID_MATRIX_TEMPLATE_PARAMETERS,
-        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
-        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
-        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
-        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
-        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
-        YOU_ALREADY_SPECIFIED_THIS_STRIDE,
-        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
-        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
-        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
-        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
-        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
-        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
-        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
-        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
-        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
-        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
-        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
-        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
-        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
-        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
-        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
-        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
-        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
-        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
-        STORAGE_LAYOUT_DOES_NOT_MATCH,
-        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
-        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
-        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
-        THIS_TYPE_IS_NOT_SUPPORTED,
-        STORAGE_KIND_MUST_MATCH,
-        STORAGE_INDEX_MUST_MATCH,
-        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
+        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
+        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
+        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
+        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
+        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
+        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
+        OUT_OF_RANGE_ACCESS=1,
+        YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
+        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
+        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
+        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
+        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
+        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
+        NUMERIC_TYPE_MUST_BE_REAL=1,
+        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
+        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
+        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
+        INVALID_MATRIX_PRODUCT=1,
+        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
+        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
+        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
+        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
+        INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
+        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
+        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
+        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
+        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
+        YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
+        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
+        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
+        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
+        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
+        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
+        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
+        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
+        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
+        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
+        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
+        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
+        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
+        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
+        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
+        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
+        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
+        STORAGE_LAYOUT_DOES_NOT_MATCH=1,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
+        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
+        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
+        THIS_TYPE_IS_NOT_SUPPORTED=1,
+        STORAGE_KIND_MUST_MATCH=1,
+        STORAGE_INDEX_MUST_MATCH=1,
+        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
+        SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
       };
     };
 
@@ -131,7 +133,7 @@
   #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
 
 #endif // EIGEN_NO_STATIC_ASSERT
-
+#endif // EIGEN_STATIC_ASSERT
 
 // static assertion failing if the type \a TYPE is not a vector type
 #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
diff --git a/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index 36a91df..87d789b 100644
--- a/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/third_party_includes/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
     // Aliases:
     Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
     ComplexVectorType &cv = m_tmp;
-    const MatrixType &mZ = m_realQZ.matrixZ();
     const MatrixType &mS = m_realQZ.matrixS();
     const MatrixType &mT = m_realQZ.matrixT();
 
@@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
               }
             }
           }
-          m_eivec.col(i).real().noalias() = mZ.transpose() * v;
+          m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
           m_eivec.col(i).real().normalize();
           m_eivec.col(i).imag().setConstant(0);
         }
@@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
                               / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
             }
           }
-          m_eivec.col(i+1).noalias() = (mZ.transpose() * cv);
+          m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
           m_eivec.col(i+1).normalize();
           m_eivec.col(i) = m_eivec.col(i+1).conjugate();
         }
diff --git a/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
index 4fec8af..e4e4260 100644
--- a/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
+++ b/third_party_includes/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
@@ -66,7 +66,6 @@ template<typename Derived>
 inline typename MatrixBase<Derived>::EigenvaluesReturnType
 MatrixBase<Derived>::eigenvalues() const
 {
-  typedef typename internal::traits<Derived>::Scalar Scalar;
   return internal::eigenvalues_selector<Derived, NumTraits<Scalar>::IsComplex>::run(derived());
 }
 
@@ -88,7 +87,6 @@ template<typename MatrixType, unsigned int UpLo>
 inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
 SelfAdjointView<MatrixType, UpLo>::eigenvalues() const
 {
-  typedef typename SelfAdjointView<MatrixType, UpLo>::PlainObject PlainObject;
   PlainObject thisAsMatrix(*this);
   return SelfAdjointEigenSolver<PlainObject>(thisAsMatrix, false).eigenvalues();
 }
diff --git a/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h b/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h
index f5c8604..17ea903 100644
--- a/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h
+++ b/third_party_includes/Eigen/src/Eigenvalues/RealSchur.h
@@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
   Scalar exshift(0);   // sum of exceptional shifts
   Scalar norm = computeNormOfT();
 
-  if(norm!=0)
+  if(norm!=Scalar(0))
   {
     while (iu >= 0)
     {
@@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
       else // No convergence yet
       {
         // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
-        Vector3s firstHouseholderVector(0,0,0), shiftInfo;
+        Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
         computeShift(iu, iter, exshift, shiftInfo);
         iter = iter + 1;
         totalIter = totalIter + 1;
diff --git a/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h b/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
index 3891cf8..b0c947d 100644
--- a/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
+++ b/third_party_includes/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
@@ -37,7 +37,7 @@ namespace Eigen {
 
 /** \internal Specialization for the data types supported by LAPACKe */
 
-#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \
+#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
 template<> template<typename InputType> inline \
 SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
 SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
@@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
           && (options&EigVecMask)!=EigVecMask \
           && "invalid option parameter"); \
   bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
-  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \
+  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
   m_eivalues.resize(n,1); \
   m_subdiag.resize(n-1); \
   m_eivec = matrix; \
@@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
   } \
 \
   lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
-  matrix_order=LAPACKE_COLROW; \
   char jobz, uplo='L'/*, range='A'*/; \
   jobz = computeEigenvectors ? 'V' : 'N'; \
 \
-  info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
+  info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
   m_info = (info==0) ? Success : NoConvergence; \
   m_isInitialized = true; \
   m_eigenvectorsOk = computeEigenvectors; \
   return *this; \
 }
 
+#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME )              \
+        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor )  \
+        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor ) 
 
-EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev, ColMajor, LAPACK_COL_MAJOR)
-EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev, ColMajor, LAPACK_COL_MAJOR)
-EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR)
-EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev, ColMajor, LAPACK_COL_MAJOR)
-
-EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev, RowMajor, LAPACK_ROW_MAJOR)
-EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev, RowMajor, LAPACK_ROW_MAJOR)
-EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR)
-EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev)
+EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev)
+EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
+EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev)
 
 } // end namespace Eigen
 
diff --git a/third_party_includes/Eigen/src/Geometry/AngleAxis.h b/third_party_includes/Eigen/src/Geometry/AngleAxis.h
index 0af3c1b..83ee1be 100644
--- a/third_party_includes/Eigen/src/Geometry/AngleAxis.h
+++ b/third_party_includes/Eigen/src/Geometry/AngleAxis.h
@@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni
   if (n != Scalar(0))
   {
     m_angle = Scalar(2)*atan2(n, abs(q.w()));
-    if(q.w() < 0)
+    if(q.w() < Scalar(0))
       n = -n;
     m_axis  = q.vec() / n;
   }
diff --git a/third_party_includes/Eigen/src/Geometry/Quaternion.h b/third_party_includes/Eigen/src/Geometry/Quaternion.h
index 3e5a9ba..c3fd8c3 100644
--- a/third_party_includes/Eigen/src/Geometry/Quaternion.h
+++ b/third_party_includes/Eigen/src/Geometry/Quaternion.h
@@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3>
   typedef typename internal::traits<Derived>::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef typename internal::traits<Derived>::Coefficients Coefficients;
+  typedef typename Coefficients::CoeffReturnType CoeffReturnType;
+  typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
+                                        Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
+
+
   enum {
     Flags = Eigen::internal::traits<Derived>::Flags
   };
@@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3>
 
 
   /** \returns the \c x coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
   /** \returns the \c y coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
   /** \returns the \c z coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
   /** \returns the \c w coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
-
-  /** \returns a reference to the \c x coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
-  /** \returns a reference to the \c y coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
-  /** \returns a reference to the \c z coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
-  /** \returns a reference to the \c w coefficient */
-  EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
+  EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
+
+  /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
+  /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
+  /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
+  /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
 
   /** \returns a read-only vector expression of the imaginary part (x,y,z) */
   EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
diff --git a/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
index facdaf8..f66c846 100644
--- a/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/third_party_includes/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -168,7 +168,7 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
       {
         for(Index j=0; j<mat.outerSize(); ++j)
         {
-          RealScalar sum = mat.innerVector(j).squaredNorm();
+          RealScalar sum = mat.col(j).squaredNorm();
           if(sum>RealScalar(0))
             m_invdiag(j) = RealScalar(1)/sum;
           else
diff --git a/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
index 395daa8..f7ce471 100644
--- a/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/third_party_includes/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@@ -50,7 +50,8 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
     tol_error = 0;
     return;
   }
-  RealScalar threshold = tol*tol*rhsNorm2;
+  const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+  RealScalar threshold = numext::maxi(tol*tol*rhsNorm2,considerAsZero);
   RealScalar residualNorm2 = residual.squaredNorm();
   if (residualNorm2 < threshold)
   {
@@ -58,7 +59,7 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
     tol_error = sqrt(residualNorm2 / rhsNorm2);
     return;
   }
-  
+
   VectorType p(n);
   p = precond.solve(residual);      // initial search direction
 
diff --git a/third_party_includes/Eigen/src/Jacobi/Jacobi.h b/third_party_includes/Eigen/src/Jacobi/Jacobi.h
index c30326e..1998c63 100644
--- a/third_party_includes/Eigen/src/Jacobi/Jacobi.h
+++ b/third_party_includes/Eigen/src/Jacobi/Jacobi.h
@@ -65,11 +65,11 @@ template<typename Scalar> class JacobiRotation
     bool makeJacobi(const MatrixBase<Derived>&, Index p, Index q);
     bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);
 
-    void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0);
+    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0);
 
   protected:
-    void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::true_type);
-    void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::false_type);
+    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type);
+    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type);
 
     Scalar m_c, m_s;
 };
@@ -84,7 +84,6 @@ bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, co
 {
   using std::sqrt;
   using std::abs;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   RealScalar deno = RealScalar(2)*abs(y);
   if(deno < (std::numeric_limits<RealScalar>::min)())
   {
@@ -133,7 +132,7 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Ind
   * \f$ V = \left ( \begin{array}{c} p \\ q \end{array} \right )\f$ yields:
   * \f$ G^* V = \left ( \begin{array}{c} r \\ 0 \end{array} \right )\f$.
   *
-  * The value of \a z is returned if \a z is not null (the default is null).
+  * The value of \a r is returned if \a r is not null (the default is null).
   * Also note that G is built such that the cosine is always real.
   *
   * Example: \include Jacobi_makeGivens.cpp
@@ -146,9 +145,9 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Ind
   * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
   */
 template<typename Scalar>
-void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* z)
+void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r)
 {
-  makeGivens(p, q, z, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
+  makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
 }
 
 
@@ -298,61 +297,119 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
 }
 
 namespace internal {
-template<typename VectorX, typename VectorY, typename OtherScalar>
-void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+
+template<typename Scalar, typename OtherScalar,
+         int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
+struct apply_rotation_in_the_plane_selector
 {
-  typedef typename VectorX::Scalar Scalar;
-  enum {
-    PacketSize = packet_traits<Scalar>::size,
-    OtherPacketSize = packet_traits<OtherScalar>::size
-  };
-  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename packet_traits<OtherScalar>::type OtherPacket;
-  eigen_assert(xpr_x.size() == xpr_y.size());
-  Index size = xpr_x.size();
-  Index incrx = xpr_x.derived().innerStride();
-  Index incry = xpr_y.derived().innerStride();
+  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
+  {
+    for(Index i=0; i<size; ++i)
+    {
+      Scalar xi = *x;
+      Scalar yi = *y;
+      *x =  c * xi + numext::conj(s) * yi;
+      *y = -s * xi + numext::conj(c) * yi;
+      x += incrx;
+      y += incry;
+    }
+  }
+};
 
-  Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
-  Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
-  
-  OtherScalar c = j.c();
-  OtherScalar s = j.s();
-  if (c==OtherScalar(1) && s==OtherScalar(0))
-    return;
+template<typename Scalar, typename OtherScalar,
+         int SizeAtCompileTime, int MinAlignment>
+struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
+{
+  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
+  {
+    enum {
+      PacketSize = packet_traits<Scalar>::size,
+      OtherPacketSize = packet_traits<OtherScalar>::size
+    };
+    typedef typename packet_traits<Scalar>::type Packet;
+    typedef typename packet_traits<OtherScalar>::type OtherPacket;
+
+    /*** dynamic-size vectorized paths ***/
+    if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
+    {
+      // both vectors are sequentially stored in memory => vectorization
+      enum { Peeling = 2 };
 
-  /*** dynamic-size vectorized paths ***/
+      Index alignedStart = internal::first_default_aligned(y, size);
+      Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
 
-  if(VectorX::SizeAtCompileTime == Dynamic &&
-    (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
-    (PacketSize == OtherPacketSize) &&
-    ((incrx==1 && incry==1) || PacketSize == 1))
-  {
-    // both vectors are sequentially stored in memory => vectorization
-    enum { Peeling = 2 };
+      const OtherPacket pc = pset1<OtherPacket>(c);
+      const OtherPacket ps = pset1<OtherPacket>(s);
+      conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,false,false> pm;
 
-    Index alignedStart = internal::first_default_aligned(y, size);
-    Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
+      for(Index i=0; i<alignedStart; ++i)
+      {
+        Scalar xi = x[i];
+        Scalar yi = y[i];
+        x[i] =  c * xi + numext::conj(s) * yi;
+        y[i] = -s * xi + numext::conj(c) * yi;
+      }
 
-    const OtherPacket pc = pset1<OtherPacket>(c);
-    const OtherPacket ps = pset1<OtherPacket>(s);
-    conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
-    conj_helper<OtherPacket,Packet,false,false> pm;
+      Scalar* EIGEN_RESTRICT px = x + alignedStart;
+      Scalar* EIGEN_RESTRICT py = y + alignedStart;
 
-    for(Index i=0; i<alignedStart; ++i)
-    {
-      Scalar xi = x[i];
-      Scalar yi = y[i];
-      x[i] =  c * xi + numext::conj(s) * yi;
-      y[i] = -s * xi + numext::conj(c) * yi;
-    }
+      if(internal::first_default_aligned(x, size)==alignedStart)
+      {
+        for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
+        {
+          Packet xi = pload<Packet>(px);
+          Packet yi = pload<Packet>(py);
+          pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+          px += PacketSize;
+          py += PacketSize;
+        }
+      }
+      else
+      {
+        Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
+        for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
+        {
+          Packet xi   = ploadu<Packet>(px);
+          Packet xi1  = ploadu<Packet>(px+PacketSize);
+          Packet yi   = pload <Packet>(py);
+          Packet yi1  = pload <Packet>(py+PacketSize);
+          pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
+          pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+          pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
+          px += Peeling*PacketSize;
+          py += Peeling*PacketSize;
+        }
+        if(alignedEnd!=peelingEnd)
+        {
+          Packet xi = ploadu<Packet>(x+peelingEnd);
+          Packet yi = pload <Packet>(y+peelingEnd);
+          pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+        }
+      }
 
-    Scalar* EIGEN_RESTRICT px = x + alignedStart;
-    Scalar* EIGEN_RESTRICT py = y + alignedStart;
+      for(Index i=alignedEnd; i<size; ++i)
+      {
+        Scalar xi = x[i];
+        Scalar yi = y[i];
+        x[i] =  c * xi + numext::conj(s) * yi;
+        y[i] = -s * xi + numext::conj(c) * yi;
+      }
+    }
 
-    if(internal::first_default_aligned(x, size)==alignedStart)
+    /*** fixed-size vectorized path ***/
+    else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
     {
-      for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
+      const OtherPacket pc = pset1<OtherPacket>(c);
+      const OtherPacket ps = pset1<OtherPacket>(s);
+      conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,false,false> pm;
+      Scalar* EIGEN_RESTRICT px = x;
+      Scalar* EIGEN_RESTRICT py = y;
+      for(Index i=0; i<size; i+=PacketSize)
       {
         Packet xi = pload<Packet>(px);
         Packet yi = pload<Packet>(py);
@@ -362,76 +419,40 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
         py += PacketSize;
       }
     }
-    else
-    {
-      Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
-      for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
-      {
-        Packet xi   = ploadu<Packet>(px);
-        Packet xi1  = ploadu<Packet>(px+PacketSize);
-        Packet yi   = pload <Packet>(py);
-        Packet yi1  = pload <Packet>(py+PacketSize);
-        pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
-        pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
-        pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
-        px += Peeling*PacketSize;
-        py += Peeling*PacketSize;
-      }
-      if(alignedEnd!=peelingEnd)
-      {
-        Packet xi = ploadu<Packet>(x+peelingEnd);
-        Packet yi = pload <Packet>(y+peelingEnd);
-        pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
-      }
-    }
 
-    for(Index i=alignedEnd; i<size; ++i)
+    /*** non-vectorized path ***/
+    else
     {
-      Scalar xi = x[i];
-      Scalar yi = y[i];
-      x[i] =  c * xi + numext::conj(s) * yi;
-      y[i] = -s * xi + numext::conj(c) * yi;
+      apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
     }
   }
+};
 
-  /*** fixed-size vectorized path ***/
-  else if(VectorX::SizeAtCompileTime != Dynamic &&
-          (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
-          (PacketSize == OtherPacketSize) &&
-          (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
-  {
-    const OtherPacket pc = pset1<OtherPacket>(c);
-    const OtherPacket ps = pset1<OtherPacket>(s);
-    conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
-    conj_helper<OtherPacket,Packet,false,false> pm;
-    Scalar* EIGEN_RESTRICT px = x;
-    Scalar* EIGEN_RESTRICT py = y;
-    for(Index i=0; i<size; i+=PacketSize)
-    {
-      Packet xi = pload<Packet>(px);
-      Packet yi = pload<Packet>(py);
-      pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
-      pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
-      px += PacketSize;
-      py += PacketSize;
-    }
-  }
+template<typename VectorX, typename VectorY, typename OtherScalar>
+void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+{
+  typedef typename VectorX::Scalar Scalar;
+  const bool Vectorizable =    (VectorX::Flags & VectorY::Flags & PacketAccessBit)
+                            && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
 
-  /*** non-vectorized path ***/
-  else
-  {
-    for(Index i=0; i<size; ++i)
-    {
-      Scalar xi = *x;
-      Scalar yi = *y;
-      *x =  c * xi + numext::conj(s) * yi;
-      *y = -s * xi + numext::conj(c) * yi;
-      x += incrx;
-      y += incry;
-    }
-  }
+  eigen_assert(xpr_x.size() == xpr_y.size());
+  Index size = xpr_x.size();
+  Index incrx = xpr_x.derived().innerStride();
+  Index incry = xpr_y.derived().innerStride();
+
+  Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
+  Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
+  
+  OtherScalar c = j.c();
+  OtherScalar s = j.s();
+  if (c==OtherScalar(1) && s==OtherScalar(0))
+    return;
+
+  apply_rotation_in_the_plane_selector<
+    Scalar,OtherScalar,
+    VectorX::SizeAtCompileTime,
+    EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
+    Vectorizable>::run(x,incrx,y,incry,size,c,s);
 }
 
 } // end namespace internal
diff --git a/third_party_includes/Eigen/src/LU/InverseImpl.h b/third_party_includes/Eigen/src/LU/InverseImpl.h
index 018f99b..f49f233 100644
--- a/third_party_includes/Eigen/src/LU/InverseImpl.h
+++ b/third_party_includes/Eigen/src/LU/InverseImpl.h
@@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
     const RealScalar& absDeterminantThreshold
   ) const
 {
-  RealScalar determinant;
+  Scalar determinant;
   // i'd love to put some static assertions there, but SFINAE means that they have no effect...
   eigen_assert(rows() == cols());
   computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
diff --git a/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h b/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h
index d2ebfd7..160d8a5 100644
--- a/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/third_party_includes/Eigen/src/PaStiXSupport/PaStiXSupport.h
@@ -64,28 +64,28 @@ namespace internal
     typedef typename _MatrixType::StorageIndex StorageIndex;
   };
   
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
   {
     if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
     if (nbrhs == 0) {x = NULL; nbrhs=1;}
     s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); 
   }
   
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
   {
     if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
     if (nbrhs == 0) {x = NULL; nbrhs=1;}
     d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); 
   }
   
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
   {
     if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
     if (nbrhs == 0) {x = NULL; nbrhs=1;}
     c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); 
   }
   
-  void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
+  inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
   {
     if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
     if (nbrhs == 0) {x = NULL; nbrhs=1;}
diff --git a/third_party_includes/Eigen/src/SVD/BDCSVD.h b/third_party_includes/Eigen/src/SVD/BDCSVD.h
index d7a4271..1134d66 100644
--- a/third_party_includes/Eigen/src/SVD/BDCSVD.h
+++ b/third_party_includes/Eigen/src/SVD/BDCSVD.h
@@ -11,7 +11,7 @@
 // Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
 // Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
 // Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
-// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -696,7 +696,9 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
   for(Index i=0; i<m; ++i)
   {
     Index j = perm(i);
-    res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu));
+    // The following expression could be rewritten to involve only a single division,
+    // but this would make the expression more sensitive to overflow.
+    res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
   }
   return res;
 
@@ -708,9 +710,12 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
 {
   using std::abs;
   using std::swap;
+  using std::sqrt;
 
   Index n = col0.size();
   Index actual_n = n;
+  // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
+  // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
   while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
 
   for (Index k = 0; k < n; ++k)
@@ -732,7 +737,9 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       right = (diag(actual_n-1) + col0.matrix().norm());
     else
     {
-      // Skip deflated singular values
+      // Skip deflated singular values,
+      // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
+      // This should be equivalent to using perm[]
       Index l = k+1;
       while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
       right = diag(l);
@@ -818,15 +825,23 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       RealScalar leftShifted, rightShifted;
       if (shift == left)
       {
-        leftShifted = (std::numeric_limits<RealScalar>::min)();
+        // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
+        // the factor 2 is to be more conservative
+        leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
+
+        // check that we did it right:
+        eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
         // I don't understand why the case k==0 would be special there:
-        // if (k == 0) rightShifted = right - left; else 
-        rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe
+        // if (k == 0) rightShifted = right - left; else
+        rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
       }
       else
       {
-        leftShifted = -(right - left) * RealScalar(0.6);
-        rightShifted = -(std::numeric_limits<RealScalar>::min)();
+        leftShifted = -(right - left) * RealScalar(0.51);
+        if(k+1<n)
+          rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
+        else
+          rightShifted = -(std::numeric_limits<RealScalar>::min)();
       }
       
       RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
@@ -980,7 +995,7 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
   Index start = firstCol + shift;
   RealScalar c = m_computed(start, start);
   RealScalar s = m_computed(start+i, start);
-  RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
+  RealScalar r = numext::hypot(c,s);
   if (r == Literal(0))
   {
     m_computed(start+i, start+i) = Literal(0);
diff --git a/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h
index 5027215..ff0516f 100644
--- a/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h
+++ b/third_party_includes/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
     u    = (LAPACKE_TYPE*)m_matrixU.data(); \
   } else { ldu=1; u=&dummy; }\
   MatrixType localV; \
-  ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
+  lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
   if (computeV()) { \
-    localV.resize(ldvt, m_cols); \
+    localV.resize(vt_rows, m_cols); \
+    ldvt  = internal::convert_index<lapack_int>(localV.outerStride()); \
     vt   = (LAPACKE_TYPE*)localV.data(); \
   } else { ldvt=1; vt=&dummy; }\
   Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
diff --git a/third_party_includes/Eigen/src/SVD/SVDBase.h b/third_party_includes/Eigen/src/SVD/SVDBase.h
index cc90a3b..3d1ef37 100644
--- a/third_party_includes/Eigen/src/SVD/SVDBase.h
+++ b/third_party_includes/Eigen/src/SVD/SVDBase.h
@@ -180,8 +180,10 @@ class SVDBase
   RealScalar threshold() const
   {
     eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+    // this temporary is needed to workaround a MSVC issue
+    Index diagSize = (std::max<Index>)(1,m_diagSize);
     return m_usePrescribedThreshold ? m_prescribedThreshold
-                                    : (std::max<Index>)(1,m_diagSize)*NumTraits<Scalar>::epsilon();
+                                    : diagSize*NumTraits<Scalar>::epsilon();
   }
 
   /** \returns true if \a U (full or thin) is asked for in this SVD decomposition */
diff --git a/third_party_includes/Eigen/src/SparseCore/AmbiVector.h b/third_party_includes/Eigen/src/SparseCore/AmbiVector.h
index 8a5cc91..e0295f2 100644
--- a/third_party_includes/Eigen/src/SparseCore/AmbiVector.h
+++ b/third_party_includes/Eigen/src/SparseCore/AmbiVector.h
@@ -94,7 +94,7 @@ class AmbiVector
       Index allocSize = m_allocatedElements * sizeof(ListEl);
       allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
       Scalar* newBuffer = new Scalar[allocSize];
-      memcpy(newBuffer,  m_buffer,  copyElements * sizeof(ListEl));
+      std::memcpy(newBuffer,  m_buffer,  copyElements * sizeof(ListEl));
       delete[] m_buffer;
       m_buffer = newBuffer;
     }
diff --git a/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
index 492eb0a..9db119b 100644
--- a/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/third_party_includes/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@@ -17,7 +17,9 @@ namespace internal {
 template<typename Lhs, typename Rhs, typename ResultType>
 static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
 {
-  typedef typename remove_all<Lhs>::type::Scalar Scalar;
+  typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
+  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
+  typedef typename remove_all<ResultType>::type::Scalar ResScalar;
 
   // make sure to call innerSize/outerSize since we fake the storage order.
   Index rows = lhs.innerSize();
@@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
   eigen_assert(lhs.outerSize() == rhs.innerSize());
   
   ei_declare_aligned_stack_constructed_variable(bool,   mask,     rows, 0);
-  ei_declare_aligned_stack_constructed_variable(Scalar, values,   rows, 0);
+  ei_declare_aligned_stack_constructed_variable(ResScalar, values,   rows, 0);
   ei_declare_aligned_stack_constructed_variable(Index,  indices,  rows, 0);
   
   std::memset(mask,0,sizeof(bool)*rows);
@@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
     Index nnz = 0;
     for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
     {
-      Scalar y = rhsIt.value();
+      RhsScalar y = rhsIt.value();
       Index k = rhsIt.index();
       for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
       {
         Index i = lhsIt.index();
-        Scalar x = lhsIt.value();
+        LhsScalar x = lhsIt.value();
         if(!mask[i])
         {
           mask[i] = true;
@@ -166,11 +168,12 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-     typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
-     RowMajorMatrix rhsRow = rhs;
-     RowMajorMatrix resRow(lhs.rows(), rhs.cols());
-     internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
-     res = resRow;
+    typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
+    RowMajorRhs rhsRow = rhs;
+    RowMajorRes resRow(lhs.rows(), rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
+    res = resRow;
   }
 };
 
@@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix;
-    RowMajorMatrix lhsRow = lhs;
-    RowMajorMatrix resRow(lhs.rows(), rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
+    typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
+    RowMajorLhs lhsRow = lhs;
+    RowMajorRes resRow(lhs.rows(), rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
     res = resRow;
   }
 };
@@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
-    ColMajorMatrix lhsCol = lhs;
-    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
+    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
+    ColMajorLhs lhsCol = lhs;
+    ColMajorRes resCol(lhs.rows(), rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
     res = resCol;
   }
 };
@@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
-    ColMajorMatrix rhsCol = rhs;
-    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
+    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
+    ColMajorRhs rhsCol = rhs;
+    ColMajorRes resCol(lhs.rows(), rhs.cols());
+    internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
     res = resCol;
   }
 };
@@ -263,7 +269,8 @@ namespace internal {
 template<typename Lhs, typename Rhs, typename ResultType>
 static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
 {
-  typedef typename remove_all<Lhs>::type::Scalar Scalar;
+  typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
+  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
   Index cols = rhs.outerSize();
   eigen_assert(lhs.outerSize() == rhs.innerSize());
 
@@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs,
   {
     for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
     {
-      Scalar y = rhsIt.value();
+      RhsScalar y = rhsIt.value();
       Index k = rhsIt.index();
       for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
       {
         Index i = lhsIt.index();
-        Scalar x = lhsIt.value();
+        LhsScalar x = lhsIt.value();
         res.coeffRef(i,j) += x * y;
       }
     }
@@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
-    ColMajorMatrix lhsCol(lhs);
-    internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res);
+    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
+    ColMajorLhs lhsCol(lhs);
+    internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
   }
 };
 
@@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa
 {
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix;
-    ColMajorMatrix rhsCol(rhs);
-    internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res);
+    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
+    ColMajorRhs rhsCol(rhs);
+    internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
   }
 };
 
diff --git a/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h b/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h
index 323c232..0a2490b 100644
--- a/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h
+++ b/third_party_includes/Eigen/src/SparseCore/SparseMatrix.h
@@ -893,7 +893,7 @@ class SparseMatrix
 
       Index p = m_outerIndex[outer] + m_innerNonZeros[outer]++;
       m_data.index(p) = convert_index(inner);
-      return (m_data.value(p) = 0);
+      return (m_data.value(p) = Scalar(0));
     }
 
 private:
@@ -1274,7 +1274,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
   m_innerNonZeros[outer]++;
 
   m_data.index(p) = inner;
-  return (m_data.value(p) = 0);
+  return (m_data.value(p) = Scalar(0));
 }
 
 template<typename _Scalar, int _Options, typename _StorageIndex>
@@ -1381,7 +1381,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
   }
 
   m_data.index(p) = inner;
-  return (m_data.value(p) = 0);
+  return (m_data.value(p) = Scalar(0));
 }
 
 namespace internal {
diff --git a/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h b/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 5ab64f1..65611b3 100644
--- a/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/third_party_includes/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -311,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
         while (i && i.index()<j) ++i;
         if(i && i.index()==j)
         {
-          res(j,k) += alpha * i.value() * rhs(j,k);
+          res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
           ++i;
         }
       }
@@ -324,14 +324,14 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
       {
         LhsScalar lhs_ij = i.value();
         if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);
-        res_j += lhs_ij * rhs(i.index(),k);
+        res_j += lhs_ij * rhs.coeff(i.index(),k);
         res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;
       }
-      res(j,k) += alpha * res_j;
+      res.coeffRef(j,k) += alpha * res_j;
 
       // handle diagonal coeff
       if (ProcessFirstHalf && i && (i.index()==j))
-        res(j,k) += alpha * i.value() * rhs(j,k);
+        res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
     }
   }
 }
diff --git a/third_party_includes/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/third_party_includes/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
index 21c4190..88820a4 100644
--- a/third_party_includes/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/third_party_includes/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@@ -21,7 +21,8 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
 {
   // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);
 
-  typedef typename remove_all<Lhs>::type::Scalar Scalar;
+  typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
+  typedef typename remove_all<ResultType>::type::Scalar ResScalar;
   typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;
 
   // make sure to call innerSize/outerSize since we fake the storage order.
@@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
   eigen_assert(lhs.outerSize() == rhs.innerSize());
 
   // allocate a temporary buffer
-  AmbiVector<Scalar,StorageIndex> tempVector(rows);
+  AmbiVector<ResScalar,StorageIndex> tempVector(rows);
 
   // mimics a resizeByInnerOuter:
   if(ResultType::IsRowMajor)
@@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
     {
       // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
       tempVector.restart();
-      Scalar x = rhsIt.value();
+      RhsScalar x = rhsIt.value();
       for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
       {
         tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
       }
     }
     res.startVec(j);
-    for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
+    for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
       res.insertBackByOuterInner(j,it.index()) = it.value();
   }
   res.finalize();
@@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector;
 template<typename Lhs, typename Rhs, typename ResultType>
 struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
 {
-  typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
   typedef typename ResultType::RealScalar RealScalar;
 
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
@@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
   typedef typename ResultType::RealScalar RealScalar;
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
+    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
+    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
     ColMajorMatrixLhs colLhs(lhs);
     ColMajorMatrixRhs colRhs(rhs);
     internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);
@@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
   typedef typename ResultType::RealScalar RealScalar;
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
+    typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
     RowMajorMatrixLhs rowLhs(lhs);
     sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
   }
@@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
   typedef typename ResultType::RealScalar RealScalar;
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
+    typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
     RowMajorMatrixRhs rowRhs(rhs);
     sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
   }
@@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
   typedef typename ResultType::RealScalar RealScalar;
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
+    typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
     ColMajorMatrixRhs colRhs(rhs);
     internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
   }
@@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
   typedef typename ResultType::RealScalar RealScalar;
   static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
   {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
+    typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
     ColMajorMatrixLhs colLhs(lhs);
     internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
   }
diff --git a/third_party_includes/Eigen/src/SparseLU/SparseLU.h b/third_party_includes/Eigen/src/SparseLU/SparseLU.h
index f883ab3..7104831 100644
--- a/third_party_includes/Eigen/src/SparseLU/SparseLU.h
+++ b/third_party_includes/Eigen/src/SparseLU/SparseLU.h
@@ -499,8 +499,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); 
   eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
   
-  typedef typename IndexVector::Scalar StorageIndex; 
-  
   m_isInitialized = true;
   
   
diff --git a/third_party_includes/Eigen/src/SparseQR/SparseQR.h b/third_party_includes/Eigen/src/SparseQR/SparseQR.h
index 2d4498b..7409fca 100644
--- a/third_party_includes/Eigen/src/SparseQR/SparseQR.h
+++ b/third_party_includes/Eigen/src/SparseQR/SparseQR.h
@@ -52,7 +52,7 @@ namespace internal {
   * rank-revealing permutations. Use colsPermutation() to get it.
   * 
   * Q is the orthogonal matrix represented as products of Householder reflectors. 
-  * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose.
+  * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.
   * You can then apply it to a vector.
   * 
   * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
@@ -65,6 +65,7 @@ namespace internal {
   * \implsparsesolverconcept
   *
   * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
+  * \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
   * 
   */
 template<typename _MatrixType, typename _OrderingType>
@@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
 
       Index rank = this->rank();
       
-      // Compute Q^T * b;
+      // Compute Q^* * b;
       typename Dest::PlainObject y, b;
-      y = this->matrixQ().transpose() * B; 
+      y = this->matrixQ().adjoint() * B;
       b = y;
       
       // Solve with the triangular matrix R
@@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
   // Get the references 
   SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : 
   m_qr(qr),m_other(other),m_transpose(transpose) {}
-  inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); }
+  inline Index rows() const { return m_qr.matrixQ().rows(); }
   inline Index cols() const { return m_other.cols(); }
   
   // Assign to a vector
@@ -632,7 +633,10 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
     }
     else
     {
-      eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes");
+      eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && "Non conforming object sizes");
+
+      res.conservativeResize(rows(), cols());
+
       // Compute res = Q * other column by column
       for(Index j = 0; j < res.cols(); j++)
       {
@@ -641,7 +645,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
           Scalar tau = Scalar(0);
           tau = m_qr.m_Q.col(k).dot(res.col(j));
           if(tau==Scalar(0)) continue;
-          tau = tau * m_qr.m_hcoeffs(k);
+          tau = tau * numext::conj(m_qr.m_hcoeffs(k));
           res.col(j) -= tau * m_qr.m_Q.col(k);
         }
       }
@@ -650,7 +654,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
   
   const SparseQRType& m_qr;
   const Derived& m_other;
-  bool m_transpose;
+  bool m_transpose; // TODO this actually means adjoint
 };
 
 template<typename SparseQRType>
@@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
   {
     return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false);
   }
+  // To use for operations with the adjoint of Q
   SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const
   {
     return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
   }
   inline Index rows() const { return m_qr.rows(); }
-  inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); }
-  // To use for operations with the transpose of Q
+  inline Index cols() const { return m_qr.rows(); }
+  // To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment
   SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const
   {
     return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
@@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
   const SparseQRType& m_qr;
 };
 
+// TODO this actually represents the adjoint of Q
 template<typename SparseQRType>
 struct SparseQRMatrixQTransposeReturnType
 {
@@ -712,7 +718,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
   typedef typename DstXprType::StorageIndex StorageIndex;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
   {
-    typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows());
+    typename DstXprType::PlainObject idMat(src.rows(), src.cols());
     idMat.setIdentity();
     // Sort the sparse householder reflectors if needed
     const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();
diff --git a/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h b/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h
index 50a69f3..7261c7d 100644
--- a/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/third_party_includes/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
 template<typename Scalar, int Flags, typename Index>
 MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
 {
-  eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR
-         || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC);
+  eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
+         || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
 
   Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
 
diff --git a/third_party_includes/nanoflann.hpp b/third_party_includes/nanoflann.hpp
index 45c185b..0e22899 100644
--- a/third_party_includes/nanoflann.hpp
+++ b/third_party_includes/nanoflann.hpp
@@ -31,1960 +31,2009 @@
  *************************************************************************/
 
 /** \mainpage nanoflann C++ API documentation
-  *  nanoflann is a C++ header-only library for building KD-Trees, mostly
-  *  optimized for 2D or 3D point clouds.
-  *
-  *  nanoflann does not require compiling or installing, just an
-  *  #include <nanoflann.hpp> in your code.
-  *
-  *  See:
-  *   - <a href="modules.html" >C++ API organized by modules</a>
-  *   - <a href="https://github.com/jlblancoc/nanoflann" >Online README</a>
-  *   - <a href="http://jlblancoc.github.io/nanoflann/" >Doxygen documentation</a>
-  */
-
-#ifndef  NANOFLANN_HPP_
-#define  NANOFLANN_HPP_
+ *  nanoflann is a C++ header-only library for building KD-Trees, mostly
+ *  optimized for 2D or 3D point clouds.
+ *
+ *  nanoflann does not require compiling or installing, just an
+ *  #include <nanoflann.hpp> in your code.
+ *
+ *  See:
+ *   - <a href="modules.html" >C++ API organized by modules</a>
+ *   - <a href="https://github.com/jlblancoc/nanoflann" >Online README</a>
+ *   - <a href="http://jlblancoc.github.io/nanoflann/" >Doxygen
+ * documentation</a>
+ */
+
+#ifndef NANOFLANN_HPP_
+#define NANOFLANN_HPP_
 
-#include <vector>
-#include <cassert>
 #include <algorithm>
-#include <stdexcept>
-#include <cstdio>  // for fwrite()
-#define _USE_MATH_DEFINES // Required by MSVC to define M_PI,etc. in <cmath>
+#include <array>
+#include <cassert>
 #include <cmath>   // for abs()
+#include <cstdio>  // for fwrite()
 #include <cstdlib> // for abs()
-#include <limits>
+#include <functional>
+#include <limits> // std::reference_wrapper
+#include <stdexcept>
+#include <vector>
+
+/** Library version: 0xMmP (M=Major,m=minor,P=patch) */
+#define NANOFLANN_VERSION 0x130
 
 // Avoid conflicting declaration of min/max macros in windows headers
-#if !defined(NOMINMAX) && (defined(_WIN32) || defined(_WIN32_)  || defined(WIN32) || defined(_WIN64))
-# define NOMINMAX
-# ifdef max
-#  undef   max
-#  undef   min
-# endif
+#if !defined(NOMINMAX) &&                                                      \
+    (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64))
+#define NOMINMAX
+#ifdef max
+#undef max
+#undef min
+#endif
 #endif
 
-namespace nanoflann
-{
+namespace nanoflann {
 /** @addtogroup nanoflann_grp nanoflann C++ library for ANN
-  *  @{ */
-
-  	/** Library version: 0xMmP (M=Major,m=minor,P=patch) */
-	#define NANOFLANN_VERSION 0x123
-
-	/** @addtogroup result_sets_grp Result set classes
-	  *  @{ */
-	template <typename DistanceType, typename IndexType = size_t, typename CountType = size_t>
-	class KNNResultSet
-	{
-		IndexType * indices;
-		DistanceType* dists;
-		CountType capacity;
-		CountType count;
-
-	public:
-		inline KNNResultSet(CountType capacity_) : indices(0), dists(0), capacity(capacity_), count(0)
-		{
-		}
-
-		inline void init(IndexType* indices_, DistanceType* dists_)
-		{
-			indices = indices_;
-			dists = dists_;
-			count = 0;
-            if (capacity)
-                dists[capacity-1] = (std::numeric_limits<DistanceType>::max)();
-		}
-
-		inline CountType size() const
-		{
-			return count;
-		}
-
-		inline bool full() const
-		{
-			return count == capacity;
-		}
-
-
-                /**
-                 * Called during search to add an element matching the criteria.
-                 * @return true if the search should be continued, false if the results are sufficient
-                 */
-                inline bool addPoint(DistanceType dist, IndexType index)
-		{
-			CountType i;
-			for (i = count; i > 0; --i) {
-#ifdef NANOFLANN_FIRST_MATCH   // If defined and two points have the same distance, the one with the lowest-index will be returned first.
-				if ( (dists[i-1] > dist) || ((dist == dists[i-1]) && (indices[i-1] > index)) ) {
+ *  @{ */
+
+/** the PI constant (required to avoid MSVC missing symbols) */
+template <typename T> T pi_const() {
+  return static_cast<T>(3.14159265358979323846);
+}
+
+/**
+ * Traits if object is resizable and assignable (typically has a resize | assign
+ * method)
+ */
+template <typename T, typename = int> struct has_resize : std::false_type {};
+
+template <typename T>
+struct has_resize<T, decltype((void)std::declval<T>().resize(1), 0)>
+    : std::true_type {};
+
+template <typename T, typename = int> struct has_assign : std::false_type {};
+
+template <typename T>
+struct has_assign<T, decltype((void)std::declval<T>().assign(1, 0), 0)>
+    : std::true_type {};
+
+/**
+ * Free function to resize a resizable object
+ */
+template <typename Container>
+inline typename std::enable_if<has_resize<Container>::value, void>::type
+resize(Container &c, const size_t nElements) {
+  c.resize(nElements);
+}
+
+/**
+ * Free function that has no effects on non resizable containers (e.g.
+ * std::array) It raises an exception if the expected size does not match
+ */
+template <typename Container>
+inline typename std::enable_if<!has_resize<Container>::value, void>::type
+resize(Container &c, const size_t nElements) {
+  if (nElements != c.size())
+    throw std::logic_error("Try to change the size of a std::array.");
+}
+
+/**
+ * Free function to assign to a container
+ */
+template <typename Container, typename T>
+inline typename std::enable_if<has_assign<Container>::value, void>::type
+assign(Container &c, const size_t nElements, const T &value) {
+  c.assign(nElements, value);
+}
+
+/**
+ * Free function to assign to a std::array
+ */
+template <typename Container, typename T>
+inline typename std::enable_if<!has_assign<Container>::value, void>::type
+assign(Container &c, const size_t nElements, const T &value) {
+  for (size_t i = 0; i < nElements; i++)
+    c[i] = value;
+}
+
+/** @addtogroup result_sets_grp Result set classes
+ *  @{ */
+template <typename _DistanceType, typename _IndexType = size_t,
+          typename _CountType = size_t>
+class KNNResultSet {
+public:
+  typedef _DistanceType DistanceType;
+  typedef _IndexType IndexType;
+  typedef _CountType CountType;
+
+private:
+  IndexType *indices;
+  DistanceType *dists;
+  CountType capacity;
+  CountType count;
+
+public:
+  inline KNNResultSet(CountType capacity_)
+      : indices(0), dists(0), capacity(capacity_), count(0) {}
+
+  inline void init(IndexType *indices_, DistanceType *dists_) {
+    indices = indices_;
+    dists = dists_;
+    count = 0;
+    if (capacity)
+      dists[capacity - 1] = (std::numeric_limits<DistanceType>::max)();
+  }
+
+  inline CountType size() const { return count; }
+
+  inline bool full() const { return count == capacity; }
+
+  /**
+   * Called during search to add an element matching the criteria.
+   * @return true if the search should be continued, false if the results are
+   * sufficient
+   */
+  inline bool addPoint(DistanceType dist, IndexType index) {
+    CountType i;
+    for (i = count; i > 0; --i) {
+#ifdef NANOFLANN_FIRST_MATCH // If defined and two points have the same
+                             // distance, the one with the lowest-index will be
+                             // returned first.
+      if ((dists[i - 1] > dist) ||
+          ((dist == dists[i - 1]) && (indices[i - 1] > index))) {
 #else
-				if (dists[i-1] > dist) {
+      if (dists[i - 1] > dist) {
 #endif
-					if (i < capacity) {
-						dists[i] = dists[i-1];
-						indices[i] = indices[i-1];
-					}
-				}
-				else break;
-			}
-			if (i < capacity) {
-				dists[i] = dist;
-				indices[i] = index;
-			}
-			if (count < capacity) count++;
-
-                        // tell caller that the search shall continue
-                        return true;
-		}
-
-		inline DistanceType worstDist() const
-		{
-			return dists[capacity-1];
-		}
-	};
-
-	/** operator "<" for std::sort() */
-	struct IndexDist_Sorter
-	{
-		/** PairType will be typically: std::pair<IndexType,DistanceType> */
-		template <typename PairType>
-		inline bool operator()(const PairType &p1, const PairType &p2) const {
-			return p1.second < p2.second;
-		}
-	};
-
-	/**
-	 * A result-set class used when performing a radius based search.
-	 */
-	template <typename DistanceType, typename IndexType = size_t>
-	class RadiusResultSet
-	{
-	public:
-		const DistanceType radius;
-
-		std::vector<std::pair<IndexType, DistanceType> > &m_indices_dists;
-
-		inline RadiusResultSet(DistanceType radius_, std::vector<std::pair<IndexType,DistanceType> > &indices_dists) : radius(radius_), m_indices_dists(indices_dists)
-		{
-			init();
-		}
-
-		inline void init() { clear(); }
-		inline void clear() { m_indices_dists.clear(); }
-
-		inline size_t size() const { return m_indices_dists.size(); }
-
-		inline bool full() const { return true; }
-
-                /**
-                 * Called during search to add an element matching the criteria.
-                 * @return true if the search should be continued, false if the results are sufficient
-                 */
-                inline bool addPoint(DistanceType dist, IndexType index)
-		{
-			if (dist < radius)
-				m_indices_dists.push_back(std::make_pair(index, dist));
-                        return true;
-		}
-
-		inline DistanceType worstDist() const { return radius; }
-
-		/**
-		 * Find the worst result (furtherest neighbor) without copying or sorting
-		 * Pre-conditions: size() > 0
-		 */
-		std::pair<IndexType,DistanceType> worst_item() const
-		{
-		   if (m_indices_dists.empty()) throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on an empty list of results.");
-		   typedef typename std::vector<std::pair<IndexType, DistanceType> >::const_iterator DistIt;
-		   DistIt it = std::max_element(m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter());
-		   return *it;
-		}
-	};
-
-
-	/** @} */
-
-
-	/** @addtogroup loadsave_grp Load/save auxiliary functions
-	  * @{ */
-	template<typename T>
-	void save_value(FILE* stream, const T& value, size_t count = 1)
-	{
-		fwrite(&value, sizeof(value), count, stream);
-	}
-
-	template<typename T>
-	void save_value(FILE* stream, const std::vector<T>& value)
-	{
-		size_t size = value.size();
-		fwrite(&size, sizeof(size_t), 1, stream);
-		fwrite(&value[0], sizeof(T), size, stream);
-	}
-
-	template<typename T>
-	void load_value(FILE* stream, T& value, size_t count = 1)
-	{
-		size_t read_cnt = fread(&value, sizeof(value), count, stream);
-		if (read_cnt != count) {
-			throw std::runtime_error("Cannot read from file");
-		}
-	}
-
-
-	template<typename T>
-	void load_value(FILE* stream, std::vector<T>& value)
-	{
-		size_t size;
-		size_t read_cnt = fread(&size, sizeof(size_t), 1, stream);
-		if (read_cnt != 1) {
-			throw std::runtime_error("Cannot read from file");
-		}
-		value.resize(size);
-		read_cnt = fread(&value[0], sizeof(T), size, stream);
-		if (read_cnt != size) {
-			throw std::runtime_error("Cannot read from file");
-		}
-	}
-	/** @} */
-
-
-	/** @addtogroup metric_grp Metric (distance) classes
-	  * @{ */
-
-	struct Metric
-	{
-	};
-
-	/** Manhattan distance functor (generic version, optimized for high-dimensionality data sets).
-	  *  Corresponding distance traits: nanoflann::metric_L1
-	  * \tparam T Type of the elements (e.g. double, float, uint8_t)
-	  * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double, int64_t)
-	  */
-	template<class T, class DataSource, typename _DistanceType = T>
-	struct L1_Adaptor
-	{
-		typedef T ElementType;
-		typedef _DistanceType DistanceType;
-
-		const DataSource &data_source;
-
-		L1_Adaptor(const DataSource &_data_source) : data_source(_data_source) { }
-
-		inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const
-		{
-			DistanceType result = DistanceType();
-			const T* last = a + size;
-			const T* lastgroup = last - 3;
-			size_t d = 0;
-
-			/* Process 4 items with each loop for efficiency. */
-			while (a < lastgroup) {
-				const DistanceType diff0 = std::abs(a[0] - data_source.kdtree_get_pt(b_idx,d++));
-				const DistanceType diff1 = std::abs(a[1] - data_source.kdtree_get_pt(b_idx,d++));
-				const DistanceType diff2 = std::abs(a[2] - data_source.kdtree_get_pt(b_idx,d++));
-				const DistanceType diff3 = std::abs(a[3] - data_source.kdtree_get_pt(b_idx,d++));
-				result += diff0 + diff1 + diff2 + diff3;
-				a += 4;
-				if ((worst_dist > 0) && (result > worst_dist)) {
-					return result;
-				}
-			}
-			/* Process last 0-3 components.  Not needed for standard vector lengths. */
-			while (a < last) {
-				result += std::abs( *a++ - data_source.kdtree_get_pt(b_idx, d++) );
-			}
-			return result;
-		}
-
-		template <typename U, typename V>
-		inline DistanceType accum_dist(const U a, const V b, int ) const
-		{
-			return std::abs(a-b);
-		}
-	};
-
-	/** Squared Euclidean distance functor (generic version, optimized for high-dimensionality data sets).
-	  *  Corresponding distance traits: nanoflann::metric_L2
-	  * \tparam T Type of the elements (e.g. double, float, uint8_t)
-	  * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double, int64_t)
-	  */
-	template<class T, class DataSource, typename _DistanceType = T>
-	struct L2_Adaptor
-	{
-		typedef T ElementType;
-		typedef _DistanceType DistanceType;
-
-		const DataSource &data_source;
-
-		L2_Adaptor(const DataSource &_data_source) : data_source(_data_source) { }
-
-		inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const
-		{
-			DistanceType result = DistanceType();
-			const T* last = a + size;
-			const T* lastgroup = last - 3;
-			size_t d = 0;
-
-			/* Process 4 items with each loop for efficiency. */
-			while (a < lastgroup) {
-				const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx,d++);
-				const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx,d++);
-				const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx,d++);
-				const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx,d++);
-				result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
-				a += 4;
-				if ((worst_dist > 0) && (result > worst_dist)) {
-					return result;
-				}
-			}
-			/* Process last 0-3 components.  Not needed for standard vector lengths. */
-			while (a < last) {
-				const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++);
-				result += diff0 * diff0;
-			}
-			return result;
-		}
-
-		template <typename U, typename V>
-		inline DistanceType accum_dist(const U a, const V b, int ) const
-		{
-			return (a - b) * (a - b);
-		}
-	};
-
-	/** Squared Euclidean (L2) distance functor (suitable for low-dimensionality datasets, like 2D or 3D point clouds)
-	  *  Corresponding distance traits: nanoflann::metric_L2_Simple
-	  * \tparam T Type of the elements (e.g. double, float, uint8_t)
-	  * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double, int64_t)
-	  */
-	template<class T, class DataSource, typename _DistanceType = T>
-	struct L2_Simple_Adaptor
-	{
-		typedef T ElementType;
-		typedef _DistanceType DistanceType;
-
-		const DataSource &data_source;
-
-		L2_Simple_Adaptor(const DataSource &_data_source) : data_source(_data_source) { }
-
-		inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size) const {
-			DistanceType result = DistanceType();
-			for (size_t i = 0; i < size; ++i) {
-				const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i);
-				result += diff * diff;
-			}
-			return result;
-		}
-
-		template <typename U, typename V>
-		inline DistanceType accum_dist(const U a, const V b, int ) const
-		{
-			return (a - b) * (a - b);
-		}
-	};
-
-	/** SO2 distance functor
-	  *  Corresponding distance traits: nanoflann::metric_SO2
-	  * \tparam T Type of the elements (e.g. double, float)
-	  * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double)
-	  * orientation is constrained to be in [-pi, pi]
-	  */
-	template<class T, class DataSource, typename _DistanceType = T>
-	struct SO2_Adaptor
-	{
-		typedef T ElementType;
-		typedef _DistanceType DistanceType;
-
-		const DataSource &data_source;
-
-		SO2_Adaptor(const DataSource &_data_source) : data_source(_data_source) { }
-
-		inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size) const {
-			return accum_dist(a[size-1], data_source.kdtree_get_pt(b_idx, size - 1) , size - 1);
-		}
-
-		template <typename U, typename V>
-		inline DistanceType accum_dist(const U a, const V b, int ) const
-		{
-			DistanceType result = DistanceType();
-			result = b - a;
-			if (result > M_PI)
-				result -= 2. * M_PI;
-			else if (result < -M_PI)
-				result += 2. * M_PI;
-			return result;
-		}
-	};
-
-	/** SO3 distance functor (Uses L2_Simple)
-	  *  Corresponding distance traits: nanoflann::metric_SO3
-	  * \tparam T Type of the elements (e.g. double, float)
-	  * \tparam _DistanceType Type of distance variables (must be signed) (e.g. float, double)
-	  */
-	template<class T, class DataSource, typename _DistanceType = T>
-	struct SO3_Adaptor
-	{
-		typedef T ElementType;
-		typedef _DistanceType DistanceType;
-
-		L2_Simple_Adaptor<T, DataSource > distance_L2_Simple;
-
-		SO3_Adaptor(const DataSource &_data_source) : distance_L2_Simple(_data_source) { }
-
-		inline DistanceType evalMetric(const T* a, const size_t b_idx, size_t size) const {
-			return distance_L2_Simple.evalMetric(a, b_idx, size);
-		}
-
-		template <typename U, typename V>
-		inline DistanceType accum_dist(const U a, const V b, int idx) const
-		{
-			return distance_L2_Simple.accum_dist(a, b, idx);
-		}
-	};
-
-	/** Metaprogramming helper traits class for the L1 (Manhattan) metric */
-	struct metric_L1 : public Metric
-	{
-		template<class T, class DataSource>
-		struct traits {
-			typedef L1_Adaptor<T, DataSource> distance_t;
-		};
-	};
-	/** Metaprogramming helper traits class for the L2 (Euclidean) metric */
-	struct metric_L2 : public Metric 
-	{
-		template<class T, class DataSource>
-		struct traits {
-			typedef L2_Adaptor<T, DataSource> distance_t;
-		};
-	};
-	/** Metaprogramming helper traits class for the L2_simple (Euclidean) metric */
-	struct metric_L2_Simple : public Metric
-	{
-		template<class T, class DataSource>
-		struct traits {
-			typedef L2_Simple_Adaptor<T, DataSource> distance_t;
-		};
-	};
-	/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */
-	struct metric_SO2 : public Metric 
-	{
-		template<class T, class DataSource>
-		struct traits {
-			typedef SO2_Adaptor<T, DataSource> distance_t;
-		};
-	};
-	/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */
-	struct metric_SO3 : public Metric
-	{
-		template<class T, class DataSource>
-		struct traits {
-			typedef SO3_Adaptor<T, DataSource> distance_t;
-		};
-	};
-
-	/** @} */
-
-	/** @addtogroup param_grp Parameter structs
-	  * @{ */
-
-	/**  Parameters (see README.md) */
-	struct KDTreeSingleIndexAdaptorParams
-	{
-		KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10) :
-			leaf_max_size(_leaf_max_size)
-		{}
-
-		size_t leaf_max_size;
-	};
-
-	/** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */
-	struct SearchParams
-	{
-		/** Note: The first argument (checks_IGNORED_) is ignored, but kept for compatibility with the FLANN interface */
-		SearchParams(int checks_IGNORED_ = 32, float eps_ = 0, bool sorted_ = true ) :
-			checks(checks_IGNORED_), eps(eps_), sorted(sorted_) {}
-
-		int   checks;  //!< Ignored parameter (Kept for compatibility with the FLANN interface).
-		float eps;  //!< search for eps-approximate neighbours (default: 0)
-		bool sorted; //!< only for radius search, require neighbours sorted by distance (default: true)
-	};
-	/** @} */
-
-
-	/** @addtogroup memalloc_grp Memory allocation
-	  * @{ */
-
-	/**
-	 * Allocates (using C's malloc) a generic type T.
-	 *
-	 * Params:
-	 *     count = number of instances to allocate.
-	 * Returns: pointer (of type T*) to memory buffer
-	 */
-	template <typename T>
-	inline T* allocate(size_t count = 1)
-	{
-		T* mem = static_cast<T*>( ::malloc(sizeof(T)*count));
-		return mem;
-	}
-
-
-	/**
-	 * Pooled storage allocator
-	 *
-	 * The following routines allow for the efficient allocation of storage in
-	 * small chunks from a specified pool.  Rather than allowing each structure
-	 * to be freed individually, an entire pool of storage is freed at once.
-	 * This method has two advantages over just using malloc() and free().  First,
-	 * it is far more efficient for allocating small objects, as there is
-	 * no overhead for remembering all the information needed to free each
-	 * object or consolidating fragmented memory.  Second, the decision about
-	 * how long to keep an object is made at the time of allocation, and there
-	 * is no need to track down all the objects to free them.
-	 *
-	 */
-
-	const size_t     WORDSIZE = 16;
-	const size_t     BLOCKSIZE = 8192;
-
-	class PooledAllocator
-	{
-		/* We maintain memory alignment to word boundaries by requiring that all
-		    allocations be in multiples of the machine wordsize.  */
-		/* Size of machine word in bytes.  Must be power of 2. */
-		/* Minimum number of bytes requested at a time from	the system.  Must be multiple of WORDSIZE. */
-
-
-		size_t  remaining;  /* Number of bytes left in current block of storage. */
-		void*   base;     /* Pointer to base of current block of storage. */
-		void*   loc;      /* Current location in block to next allocate memory. */
-
-		void internal_init()
-		{
-			remaining = 0;
-			base = NULL;
-			usedMemory = 0;
-			wastedMemory = 0;
-		}
-
-	public:
-		size_t  usedMemory;
-		size_t  wastedMemory;
-
-		/**
-		    Default constructor. Initializes a new pool.
-		 */
-		PooledAllocator() {
-			internal_init();
-		}
-
-		/**
-		 * Destructor. Frees all the memory allocated in this pool.
-		 */
-		~PooledAllocator() {
-			free_all();
-		}
-
-		/** Frees all allocated memory chunks */
-		void free_all()
-		{
-			while (base != NULL) {
-				void *prev = *(static_cast<void**>( base)); /* Get pointer to prev block. */
-				::free(base);
-				base = prev;
-			}
-			internal_init();
-		}
-
-		/**
-		 * Returns a pointer to a piece of new memory of the given size in bytes
-		 * allocated from the pool.
-		 */
-		void* malloc(const size_t req_size)
-		{
-			/* Round size up to a multiple of wordsize.  The following expression
-			    only works for WORDSIZE that is a power of 2, by masking last bits of
-			    incremented size to zero.
-			 */
-			const size_t size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1);
-
-			/* Check whether a new block must be allocated.  Note that the first word
-			    of a block is reserved for a pointer to the previous block.
-			 */
-			if (size > remaining) {
-
-				wastedMemory += remaining;
-
-				/* Allocate new storage. */
-				const size_t blocksize = (size + sizeof(void*) + (WORDSIZE - 1) > BLOCKSIZE) ?
-							size + sizeof(void*) + (WORDSIZE - 1) : BLOCKSIZE;
-
-				// use the standard C malloc to allocate memory
-				void* m = ::malloc(blocksize);
-				if (!m) {
-					fprintf(stderr, "Failed to allocate memory.\n");
-					return NULL;
-				}
-
-				/* Fill first word of new block with pointer to previous block. */
-				static_cast<void**>(m)[0] = base;
-				base = m;
-
-				size_t shift = 0;
-				//int size_t = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) & (WORDSIZE-1))) & (WORDSIZE-1);
-
-				remaining = blocksize - sizeof(void*) - shift;
-				loc = (static_cast<char*>(m) + sizeof(void*) + shift);
-			}
-			void* rloc = loc;
-			loc = static_cast<char*>(loc) + size;
-			remaining -= size;
-
-			usedMemory += size;
-
-			return rloc;
-		}
-
-		/**
-		 * Allocates (using this pool) a generic type T.
-		 *
-		 * Params:
-		 *     count = number of instances to allocate.
-		 * Returns: pointer (of type T*) to memory buffer
-		 */
-		template <typename T>
-		T* allocate(const size_t count = 1)
-		{
-			T* mem = static_cast<T*>(this->malloc(sizeof(T)*count));
-			return mem;
-		}
-
-	};
-	/** @} */
-
-	/** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff
-	  * @{ */
-
-	// ----------------  CArray -------------------------
-	/** A STL container (as wrapper) for arrays of constant size defined at compile time (class imported from the MRPT project)
-	 * This code is an adapted version from Boost, modifed for its integration
-	 *	within MRPT (JLBC, Dec/2009) (Renamed array -> CArray to avoid possible potential conflicts).
-	 * See
-	 *      http://www.josuttis.com/cppcode
-	 * for details and the latest version.
-	 * See
-	 *      http://www.boost.org/libs/array for Documentation.
-	 * for documentation.
-	 *
-	 * (C) Copyright Nicolai M. Josuttis 2001.
-	 * Permission to copy, use, modify, sell and distribute this software
-	 * is granted provided this copyright notice appears in all copies.
-	 * This software is provided "as is" without express or implied
-	 * warranty, and with no claim as to its suitability for any purpose.
-	 *
-	 * 29 Jan 2004 - minor fixes (Nico Josuttis)
-	 * 04 Dec 2003 - update to synch with library TR1 (Alisdair Meredith)
-	 * 23 Aug 2002 - fix for Non-MSVC compilers combined with MSVC libraries.
-	 * 05 Aug 2001 - minor update (Nico Josuttis)
-	 * 20 Jan 2001 - STLport fix (Beman Dawes)
-	 * 29 Sep 2000 - Initial Revision (Nico Josuttis)
-	 *
-	 * Jan 30, 2004
-	 */
-    template <typename T, std::size_t N>
-    class CArray {
-      public:
-        T elems[N];    // fixed-size array of elements of type T
-
-      public:
-        // type definitions
-        typedef T              value_type;
-        typedef T*             iterator;
-        typedef const T*       const_iterator;
-        typedef T&             reference;
-        typedef const T&       const_reference;
-        typedef std::size_t    size_type;
-        typedef std::ptrdiff_t difference_type;
-
-        // iterator support
-        inline iterator begin() { return elems; }
-        inline const_iterator begin() const { return elems; }
-        inline iterator end() { return elems+N; }
-        inline const_iterator end() const { return elems+N; }
-
-        // reverse iterator support
-#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !defined(BOOST_MSVC_STD_ITERATOR) && !defined(BOOST_NO_STD_ITERATOR_TRAITS)
-        typedef std::reverse_iterator<iterator> reverse_iterator;
-        typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-#elif defined(_MSC_VER) && (_MSC_VER == 1300) && defined(BOOST_DINKUMWARE_STDLIB) && (BOOST_DINKUMWARE_STDLIB == 310)
-        // workaround for broken reverse_iterator in VC7
-        typedef std::reverse_iterator<std::_Ptrit<value_type, difference_type, iterator,
-                                      reference, iterator, reference> > reverse_iterator;
-        typedef std::reverse_iterator<std::_Ptrit<value_type, difference_type, const_iterator,
-                                      const_reference, iterator, reference> > const_reverse_iterator;
-#else
-        // workaround for broken reverse_iterator implementations
-        typedef std::reverse_iterator<iterator,T> reverse_iterator;
-        typedef std::reverse_iterator<const_iterator,T> const_reverse_iterator;
-#endif
-
-        reverse_iterator rbegin() { return reverse_iterator(end()); }
-        const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
-        reverse_iterator rend() { return reverse_iterator(begin()); }
-        const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
-        // operator[]
-        inline reference operator[](size_type i) { return elems[i]; }
-        inline const_reference operator[](size_type i) const { return elems[i]; }
-        // at() with range check
-        reference at(size_type i) { rangecheck(i); return elems[i]; }
-        const_reference at(size_type i) const { rangecheck(i); return elems[i]; }
-        // front() and back()
-        reference front() { return elems[0]; }
-        const_reference front() const { return elems[0]; }
-        reference back() { return elems[N-1]; }
-        const_reference back() const { return elems[N-1]; }
-        // size is constant
-        static inline size_type size() { return N; }
-        static bool empty() { return false; }
-        static size_type max_size() { return N; }
-        enum { static_size = N };
-		/** This method has no effects in this class, but raises an exception if the expected size does not match */
-		inline void resize(const size_t nElements) { if (nElements!=N) throw std::logic_error("Try to change the size of a CArray."); }
-        // swap (note: linear complexity in N, constant for given instantiation)
-        void swap (CArray<T,N>& y) { std::swap_ranges(begin(),end(),y.begin()); }
-        // direct access to data (read-only)
-        const T* data() const { return elems; }
-        // use array as C array (direct read/write access to data)
-        T* data() { return elems; }
-        // assignment with type conversion
-        template <typename T2> CArray<T,N>& operator= (const CArray<T2,N>& rhs) {
-            std::copy(rhs.begin(),rhs.end(), begin());
-            return *this;
+        if (i < capacity) {
+          dists[i] = dists[i - 1];
+          indices[i] = indices[i - 1];
+        }
+      } else
+        break;
+    }
+    if (i < capacity) {
+      dists[i] = dist;
+      indices[i] = index;
+    }
+    if (count < capacity)
+      count++;
+
+    // tell caller that the search shall continue
+    return true;
+  }
+
+  inline DistanceType worstDist() const { return dists[capacity - 1]; }
+};
+
+/** operator "<" for std::sort() */
+struct IndexDist_Sorter {
+  /** PairType will be typically: std::pair<IndexType,DistanceType> */
+  template <typename PairType>
+  inline bool operator()(const PairType &p1, const PairType &p2) const {
+    return p1.second < p2.second;
+  }
+};
+
+/**
+ * A result-set class used when performing a radius based search.
+ */
+template <typename _DistanceType, typename _IndexType = size_t>
+class RadiusResultSet {
+public:
+  typedef _DistanceType DistanceType;
+  typedef _IndexType IndexType;
+
+public:
+  const DistanceType radius;
+
+  std::vector<std::pair<IndexType, DistanceType>> &m_indices_dists;
+
+  inline RadiusResultSet(
+      DistanceType radius_,
+      std::vector<std::pair<IndexType, DistanceType>> &indices_dists)
+      : radius(radius_), m_indices_dists(indices_dists) {
+    init();
+  }
+
+  inline void init() { clear(); }
+  inline void clear() { m_indices_dists.clear(); }
+
+  inline size_t size() const { return m_indices_dists.size(); }
+
+  inline bool full() const { return true; }
+
+  /**
+   * Called during search to add an element matching the criteria.
+   * @return true if the search should be continued, false if the results are
+   * sufficient
+   */
+  inline bool addPoint(DistanceType dist, IndexType index) {
+    if (dist < radius)
+      m_indices_dists.push_back(std::make_pair(index, dist));
+    return true;
+  }
+
+  inline DistanceType worstDist() const { return radius; }
+
+  /**
+   * Find the worst result (furtherest neighbor) without copying or sorting
+   * Pre-conditions: size() > 0
+   */
+  std::pair<IndexType, DistanceType> worst_item() const {
+    if (m_indices_dists.empty())
+      throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on "
+                               "an empty list of results.");
+    typedef
+        typename std::vector<std::pair<IndexType, DistanceType>>::const_iterator
+            DistIt;
+    DistIt it = std::max_element(m_indices_dists.begin(), m_indices_dists.end(),
+                                 IndexDist_Sorter());
+    return *it;
+  }
+};
+
+/** @} */
+
+/** @addtogroup loadsave_grp Load/save auxiliary functions
+ * @{ */
+template <typename T>
+void save_value(FILE *stream, const T &value, size_t count = 1) {
+  fwrite(&value, sizeof(value), count, stream);
+}
+
+template <typename T>
+void save_value(FILE *stream, const std::vector<T> &value) {
+  size_t size = value.size();
+  fwrite(&size, sizeof(size_t), 1, stream);
+  fwrite(&value[0], sizeof(T), size, stream);
+}
+
+template <typename T>
+void load_value(FILE *stream, T &value, size_t count = 1) {
+  size_t read_cnt = fread(&value, sizeof(value), count, stream);
+  if (read_cnt != count) {
+    throw std::runtime_error("Cannot read from file");
+  }
+}
+
+template <typename T> void load_value(FILE *stream, std::vector<T> &value) {
+  size_t size;
+  size_t read_cnt = fread(&size, sizeof(size_t), 1, stream);
+  if (read_cnt != 1) {
+    throw std::runtime_error("Cannot read from file");
+  }
+  value.resize(size);
+  read_cnt = fread(&value[0], sizeof(T), size, stream);
+  if (read_cnt != size) {
+    throw std::runtime_error("Cannot read from file");
+  }
+}
+/** @} */
+
+/** @addtogroup metric_grp Metric (distance) classes
+ * @{ */
+
+struct Metric {};
+
+/** Manhattan distance functor (generic version, optimized for
+ * high-dimensionality data sets). Corresponding distance traits:
+ * nanoflann::metric_L1 \tparam T Type of the elements (e.g. double, float,
+ * uint8_t) \tparam _DistanceType Type of distance variables (must be signed)
+ * (e.g. float, double, int64_t)
+ */
+template <class T, class DataSource, typename _DistanceType = T>
+struct L1_Adaptor {
+  typedef T ElementType;
+  typedef _DistanceType DistanceType;
+
+  const DataSource &data_source;
+
+  L1_Adaptor(const DataSource &_data_source) : data_source(_data_source) {}
+
+  inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size,
+                                 DistanceType worst_dist = -1) const {
+    DistanceType result = DistanceType();
+    const T *last = a + size;
+    const T *lastgroup = last - 3;
+    size_t d = 0;
+
+    /* Process 4 items with each loop for efficiency. */
+    while (a < lastgroup) {
+      const DistanceType diff0 =
+          std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++));
+      const DistanceType diff1 =
+          std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++));
+      const DistanceType diff2 =
+          std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++));
+      const DistanceType diff3 =
+          std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++));
+      result += diff0 + diff1 + diff2 + diff3;
+      a += 4;
+      if ((worst_dist > 0) && (result > worst_dist)) {
+        return result;
+      }
+    }
+    /* Process last 0-3 components.  Not needed for standard vector lengths. */
+    while (a < last) {
+      result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++));
+    }
+    return result;
+  }
+
+  template <typename U, typename V>
+  inline DistanceType accum_dist(const U a, const V b, const size_t) const {
+    return std::abs(a - b);
+  }
+};
+
+/** Squared Euclidean distance functor (generic version, optimized for
+ * high-dimensionality data sets). Corresponding distance traits:
+ * nanoflann::metric_L2 \tparam T Type of the elements (e.g. double, float,
+ * uint8_t) \tparam _DistanceType Type of distance variables (must be signed)
+ * (e.g. float, double, int64_t)
+ */
+template <class T, class DataSource, typename _DistanceType = T>
+struct L2_Adaptor {
+  typedef T ElementType;
+  typedef _DistanceType DistanceType;
+
+  const DataSource &data_source;
+
+  L2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {}
+
+  inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size,
+                                 DistanceType worst_dist = -1) const {
+    DistanceType result = DistanceType();
+    const T *last = a + size;
+    const T *lastgroup = last - 3;
+    size_t d = 0;
+
+    /* Process 4 items with each loop for efficiency. */
+    while (a < lastgroup) {
+      const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx, d++);
+      const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx, d++);
+      const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx, d++);
+      const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx, d++);
+      result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
+      a += 4;
+      if ((worst_dist > 0) && (result > worst_dist)) {
+        return result;
+      }
+    }
+    /* Process last 0-3 components.  Not needed for standard vector lengths. */
+    while (a < last) {
+      const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++);
+      result += diff0 * diff0;
+    }
+    return result;
+  }
+
+  template <typename U, typename V>
+  inline DistanceType accum_dist(const U a, const V b, const size_t) const {
+    return (a - b) * (a - b);
+  }
+};
+
+/** Squared Euclidean (L2) distance functor (suitable for low-dimensionality
+ * datasets, like 2D or 3D point clouds) Corresponding distance traits:
+ * nanoflann::metric_L2_Simple \tparam T Type of the elements (e.g. double,
+ * float, uint8_t) \tparam _DistanceType Type of distance variables (must be
+ * signed) (e.g. float, double, int64_t)
+ */
+template <class T, class DataSource, typename _DistanceType = T>
+struct L2_Simple_Adaptor {
+  typedef T ElementType;
+  typedef _DistanceType DistanceType;
+
+  const DataSource &data_source;
+
+  L2_Simple_Adaptor(const DataSource &_data_source)
+      : data_source(_data_source) {}
+
+  inline DistanceType evalMetric(const T *a, const size_t b_idx,
+                                 size_t size) const {
+    DistanceType result = DistanceType();
+    for (size_t i = 0; i < size; ++i) {
+      const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i);
+      result += diff * diff;
+    }
+    return result;
+  }
+
+  template <typename U, typename V>
+  inline DistanceType accum_dist(const U a, const V b, const size_t) const {
+    return (a - b) * (a - b);
+  }
+};
+
+/** SO2 distance functor
+ *  Corresponding distance traits: nanoflann::metric_SO2
+ * \tparam T Type of the elements (e.g. double, float)
+ * \tparam _DistanceType Type of distance variables (must be signed) (e.g.
+ * float, double) orientation is constrained to be in [-pi, pi]
+ */
+template <class T, class DataSource, typename _DistanceType = T>
+struct SO2_Adaptor {
+  typedef T ElementType;
+  typedef _DistanceType DistanceType;
+
+  const DataSource &data_source;
+
+  SO2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {}
+
+  inline DistanceType evalMetric(const T *a, const size_t b_idx,
+                                 size_t size) const {
+    return accum_dist(a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1),
+                      size - 1);
+  }
+
+  /** Note: this assumes that input angles are already in the range [-pi,pi] */
+  template <typename U, typename V>
+  inline DistanceType accum_dist(const U a, const V b, const size_t) const {
+    DistanceType result = DistanceType(), PI = pi_const<DistanceType>();
+    result = b - a;
+    if (result > PI)
+      result -= 2 * PI;
+    else if (result < -PI)
+      result += 2 * PI;
+    return result;
+  }
+};
+
+/** SO3 distance functor (Uses L2_Simple)
+ *  Corresponding distance traits: nanoflann::metric_SO3
+ * \tparam T Type of the elements (e.g. double, float)
+ * \tparam _DistanceType Type of distance variables (must be signed) (e.g.
+ * float, double)
+ */
+template <class T, class DataSource, typename _DistanceType = T>
+struct SO3_Adaptor {
+  typedef T ElementType;
+  typedef _DistanceType DistanceType;
+
+  L2_Simple_Adaptor<T, DataSource> distance_L2_Simple;
+
+  SO3_Adaptor(const DataSource &_data_source)
+      : distance_L2_Simple(_data_source) {}
+
+  inline DistanceType evalMetric(const T *a, const size_t b_idx,
+                                 size_t size) const {
+    return distance_L2_Simple.evalMetric(a, b_idx, size);
+  }
+
+  template <typename U, typename V>
+  inline DistanceType accum_dist(const U a, const V b, const size_t idx) const {
+    return distance_L2_Simple.accum_dist(a, b, idx);
+  }
+};
+
+/** Metaprogramming helper traits class for the L1 (Manhattan) metric */
+struct metric_L1 : public Metric {
+  template <class T, class DataSource> struct traits {
+    typedef L1_Adaptor<T, DataSource> distance_t;
+  };
+};
+/** Metaprogramming helper traits class for the L2 (Euclidean) metric */
+struct metric_L2 : public Metric {
+  template <class T, class DataSource> struct traits {
+    typedef L2_Adaptor<T, DataSource> distance_t;
+  };
+};
+/** Metaprogramming helper traits class for the L2_simple (Euclidean) metric */
+struct metric_L2_Simple : public Metric {
+  template <class T, class DataSource> struct traits {
+    typedef L2_Simple_Adaptor<T, DataSource> distance_t;
+  };
+};
+/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */
+struct metric_SO2 : public Metric {
+  template <class T, class DataSource> struct traits {
+    typedef SO2_Adaptor<T, DataSource> distance_t;
+  };
+};
+/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */
+struct metric_SO3 : public Metric {
+  template <class T, class DataSource> struct traits {
+    typedef SO3_Adaptor<T, DataSource> distance_t;
+  };
+};
+
+/** @} */
+
+/** @addtogroup param_grp Parameter structs
+ * @{ */
+
+/**  Parameters (see README.md) */
+struct KDTreeSingleIndexAdaptorParams {
+  KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10)
+      : leaf_max_size(_leaf_max_size) {}
+
+  size_t leaf_max_size;
+};
+
+/** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */
+struct SearchParams {
+  /** Note: The first argument (checks_IGNORED_) is ignored, but kept for
+   * compatibility with the FLANN interface */
+  SearchParams(int checks_IGNORED_ = 32, float eps_ = 0, bool sorted_ = true)
+      : checks(checks_IGNORED_), eps(eps_), sorted(sorted_) {}
+
+  int checks;  //!< Ignored parameter (Kept for compatibility with the FLANN
+               //!< interface).
+  float eps;   //!< search for eps-approximate neighbours (default: 0)
+  bool sorted; //!< only for radius search, require neighbours sorted by
+               //!< distance (default: true)
+};
+/** @} */
+
+/** @addtogroup memalloc_grp Memory allocation
+ * @{ */
+
+/**
+ * Allocates (using C's malloc) a generic type T.
+ *
+ * Params:
+ *     count = number of instances to allocate.
+ * Returns: pointer (of type T*) to memory buffer
+ */
+template <typename T> inline T *allocate(size_t count = 1) {
+  T *mem = static_cast<T *>(::malloc(sizeof(T) * count));
+  return mem;
+}
+
+/**
+ * Pooled storage allocator
+ *
+ * The following routines allow for the efficient allocation of storage in
+ * small chunks from a specified pool.  Rather than allowing each structure
+ * to be freed individually, an entire pool of storage is freed at once.
+ * This method has two advantages over just using malloc() and free().  First,
+ * it is far more efficient for allocating small objects, as there is
+ * no overhead for remembering all the information needed to free each
+ * object or consolidating fragmented memory.  Second, the decision about
+ * how long to keep an object is made at the time of allocation, and there
+ * is no need to track down all the objects to free them.
+ *
+ */
+
+const size_t WORDSIZE = 16;
+const size_t BLOCKSIZE = 8192;
+
+class PooledAllocator {
+  /* We maintain memory alignment to word boundaries by requiring that all
+      allocations be in multiples of the machine wordsize.  */
+  /* Size of machine word in bytes.  Must be power of 2. */
+  /* Minimum number of bytes requested at a time from	the system.  Must be
+   * multiple of WORDSIZE. */
+
+  size_t remaining; /* Number of bytes left in current block of storage. */
+  void *base;       /* Pointer to base of current block of storage. */
+  void *loc;        /* Current location in block to next allocate memory. */
+
+  void internal_init() {
+    remaining = 0;
+    base = NULL;
+    usedMemory = 0;
+    wastedMemory = 0;
+  }
+
+public:
+  size_t usedMemory;
+  size_t wastedMemory;
+
+  /**
+      Default constructor. Initializes a new pool.
+   */
+  PooledAllocator() { internal_init(); }
+
+  /**
+   * Destructor. Frees all the memory allocated in this pool.
+   */
+  ~PooledAllocator() { free_all(); }
+
+  /** Frees all allocated memory chunks */
+  void free_all() {
+    while (base != NULL) {
+      void *prev =
+          *(static_cast<void **>(base)); /* Get pointer to prev block. */
+      ::free(base);
+      base = prev;
+    }
+    internal_init();
+  }
+
+  /**
+   * Returns a pointer to a piece of new memory of the given size in bytes
+   * allocated from the pool.
+   */
+  void *malloc(const size_t req_size) {
+    /* Round size up to a multiple of wordsize.  The following expression
+        only works for WORDSIZE that is a power of 2, by masking last bits of
+        incremented size to zero.
+     */
+    const size_t size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1);
+
+    /* Check whether a new block must be allocated.  Note that the first word
+        of a block is reserved for a pointer to the previous block.
+     */
+    if (size > remaining) {
+
+      wastedMemory += remaining;
+
+      /* Allocate new storage. */
+      const size_t blocksize =
+          (size + sizeof(void *) + (WORDSIZE - 1) > BLOCKSIZE)
+              ? size + sizeof(void *) + (WORDSIZE - 1)
+              : BLOCKSIZE;
+
+      // use the standard C malloc to allocate memory
+      void *m = ::malloc(blocksize);
+      if (!m) {
+        fprintf(stderr, "Failed to allocate memory.\n");
+        return NULL;
+      }
+
+      /* Fill first word of new block with pointer to previous block. */
+      static_cast<void **>(m)[0] = base;
+      base = m;
+
+      size_t shift = 0;
+      // int size_t = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) &
+      // (WORDSIZE-1))) & (WORDSIZE-1);
+
+      remaining = blocksize - sizeof(void *) - shift;
+      loc = (static_cast<char *>(m) + sizeof(void *) + shift);
+    }
+    void *rloc = loc;
+    loc = static_cast<char *>(loc) + size;
+    remaining -= size;
+
+    usedMemory += size;
+
+    return rloc;
+  }
+
+  /**
+   * Allocates (using this pool) a generic type T.
+   *
+   * Params:
+   *     count = number of instances to allocate.
+   * Returns: pointer (of type T*) to memory buffer
+   */
+  template <typename T> T *allocate(const size_t count = 1) {
+    T *mem = static_cast<T *>(this->malloc(sizeof(T) * count));
+    return mem;
+  }
+};
+/** @} */
+
+/** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff
+ * @{ */
+
+/** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors
+ * when DIM=-1. Fixed size version for a generic DIM:
+ */
+template <int DIM, typename T> struct array_or_vector_selector {
+  typedef std::array<T, DIM> container_t;
+};
+/** Dynamic size version */
+template <typename T> struct array_or_vector_selector<-1, T> {
+  typedef std::vector<T> container_t;
+};
+
+/** @} */
+
+/** kd-tree base-class
+ *
+ * Contains the member functions common to the classes KDTreeSingleIndexAdaptor
+ * and KDTreeSingleIndexDynamicAdaptor_.
+ *
+ * \tparam Derived The name of the class which inherits this class.
+ * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
+ * \tparam Distance The distance metric to use, these are all classes derived
+ * from nanoflann::Metric \tparam DIM Dimensionality of data points (e.g. 3 for
+ * 3D points) \tparam IndexType Will be typically size_t or int
+ */
+
+template <class Derived, typename Distance, class DatasetAdaptor, int DIM = -1,
+          typename IndexType = size_t>
+class KDTreeBaseClass {
+
+public:
+  /** Frees the previously-built index. Automatically called within
+   * buildIndex(). */
+  void freeIndex(Derived &obj) {
+    obj.pool.free_all();
+    obj.root_node = NULL;
+    obj.m_size_at_index_build = 0;
+  }
+
+  typedef typename Distance::ElementType ElementType;
+  typedef typename Distance::DistanceType DistanceType;
+
+  /*--------------------- Internal Data Structures --------------------------*/
+  struct Node {
+    /** Union used because a node can be either a LEAF node or a non-leaf node,
+     * so both data fields are never used simultaneously */
+    union {
+      struct leaf {
+        IndexType left, right; //!< Indices of points in leaf node
+      } lr;
+      struct nonleaf {
+        int divfeat;                  //!< Dimension used for subdivision.
+        DistanceType divlow, divhigh; //!< The values used for subdivision.
+      } sub;
+    } node_type;
+    Node *child1, *child2; //!< Child nodes (both=NULL mean its a leaf node)
+  };
+
+  typedef Node *NodePtr;
+
+  struct Interval {
+    ElementType low, high;
+  };
+
+  /**
+   *  Array of indices to vectors in the dataset.
+   */
+  std::vector<IndexType> vind;
+
+  NodePtr root_node;
+
+  size_t m_leaf_max_size;
+
+  size_t m_size;                //!< Number of current points in the dataset
+  size_t m_size_at_index_build; //!< Number of points in the dataset when the
+                                //!< index was built
+  int dim;                      //!< Dimensionality of each data point
+
+  /** Define "BoundingBox" as a fixed-size or variable-size container depending
+   * on "DIM" */
+  typedef
+      typename array_or_vector_selector<DIM, Interval>::container_t BoundingBox;
+
+  /** Define "distance_vector_t" as a fixed-size or variable-size container
+   * depending on "DIM" */
+  typedef typename array_or_vector_selector<DIM, DistanceType>::container_t
+      distance_vector_t;
+
+  /** The KD-tree used to find neighbours */
+
+  BoundingBox root_bbox;
+
+  /**
+   * Pooled memory allocator.
+   *
+   * Using a pooled memory allocator is more efficient
+   * than allocating memory directly when there is a large
+   * number small of memory allocations.
+   */
+  PooledAllocator pool;
+
+  /** Returns number of points in dataset  */
+  size_t size(const Derived &obj) const { return obj.m_size; }
+
+  /** Returns the length of each point in the dataset */
+  size_t veclen(const Derived &obj) {
+    return static_cast<size_t>(DIM > 0 ? DIM : obj.dim);
+  }
+
+  /// Helper accessor to the dataset points:
+  inline ElementType dataset_get(const Derived &obj, size_t idx,
+                                 int component) const {
+    return obj.dataset.kdtree_get_pt(idx, component);
+  }
+
+  /**
+   * Computes the inde memory usage
+   * Returns: memory used by the index
+   */
+  size_t usedMemory(Derived &obj) {
+    return obj.pool.usedMemory + obj.pool.wastedMemory +
+           obj.dataset.kdtree_get_point_count() *
+               sizeof(IndexType); // pool memory and vind array memory
+  }
+
+  void computeMinMax(const Derived &obj, IndexType *ind, IndexType count,
+                     int element, ElementType &min_elem,
+                     ElementType &max_elem) {
+    min_elem = dataset_get(obj, ind[0], element);
+    max_elem = dataset_get(obj, ind[0], element);
+    for (IndexType i = 1; i < count; ++i) {
+      ElementType val = dataset_get(obj, ind[i], element);
+      if (val < min_elem)
+        min_elem = val;
+      if (val > max_elem)
+        max_elem = val;
+    }
+  }
+
+  /**
+   * Create a tree node that subdivides the list of vecs from vind[first]
+   * to vind[last].  The routine is called recursively on each sublist.
+   *
+   * @param left index of the first vector
+   * @param right index of the last vector
+   */
+  NodePtr divideTree(Derived &obj, const IndexType left, const IndexType right,
+                     BoundingBox &bbox) {
+    NodePtr node = obj.pool.template allocate<Node>(); // allocate memory
+
+    /* If too few exemplars remain, then make this a leaf node. */
+    if ((right - left) <= static_cast<IndexType>(obj.m_leaf_max_size)) {
+      node->child1 = node->child2 = NULL; /* Mark as leaf node. */
+      node->node_type.lr.left = left;
+      node->node_type.lr.right = right;
+
+      // compute bounding-box of leaf points
+      for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
+        bbox[i].low = dataset_get(obj, obj.vind[left], i);
+        bbox[i].high = dataset_get(obj, obj.vind[left], i);
+      }
+      for (IndexType k = left + 1; k < right; ++k) {
+        for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
+          if (bbox[i].low > dataset_get(obj, obj.vind[k], i))
+            bbox[i].low = dataset_get(obj, obj.vind[k], i);
+          if (bbox[i].high < dataset_get(obj, obj.vind[k], i))
+            bbox[i].high = dataset_get(obj, obj.vind[k], i);
+        }
+      }
+    } else {
+      IndexType idx;
+      int cutfeat;
+      DistanceType cutval;
+      middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval,
+                   bbox);
+
+      node->node_type.sub.divfeat = cutfeat;
+
+      BoundingBox left_bbox(bbox);
+      left_bbox[cutfeat].high = cutval;
+      node->child1 = divideTree(obj, left, left + idx, left_bbox);
+
+      BoundingBox right_bbox(bbox);
+      right_bbox[cutfeat].low = cutval;
+      node->child2 = divideTree(obj, left + idx, right, right_bbox);
+
+      node->node_type.sub.divlow = left_bbox[cutfeat].high;
+      node->node_type.sub.divhigh = right_bbox[cutfeat].low;
+
+      for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
+        bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
+        bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
+      }
+    }
+
+    return node;
+  }
+
+  void middleSplit_(Derived &obj, IndexType *ind, IndexType count,
+                    IndexType &index, int &cutfeat, DistanceType &cutval,
+                    const BoundingBox &bbox) {
+    const DistanceType EPS = static_cast<DistanceType>(0.00001);
+    ElementType max_span = bbox[0].high - bbox[0].low;
+    for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) {
+      ElementType span = bbox[i].high - bbox[i].low;
+      if (span > max_span) {
+        max_span = span;
+      }
+    }
+    ElementType max_spread = -1;
+    cutfeat = 0;
+    for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
+      ElementType span = bbox[i].high - bbox[i].low;
+      if (span > (1 - EPS) * max_span) {
+        ElementType min_elem, max_elem;
+        computeMinMax(obj, ind, count, i, min_elem, max_elem);
+        ElementType spread = max_elem - min_elem;
+        ;
+        if (spread > max_spread) {
+          cutfeat = i;
+          max_spread = spread;
+        }
+      }
+    }
+    // split in the middle
+    DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2;
+    ElementType min_elem, max_elem;
+    computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem);
+
+    if (split_val < min_elem)
+      cutval = min_elem;
+    else if (split_val > max_elem)
+      cutval = max_elem;
+    else
+      cutval = split_val;
+
+    IndexType lim1, lim2;
+    planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2);
+
+    if (lim1 > count / 2)
+      index = lim1;
+    else if (lim2 < count / 2)
+      index = lim2;
+    else
+      index = count / 2;
+  }
+
+  /**
+   *  Subdivide the list of points by a plane perpendicular on axe corresponding
+   *  to the 'cutfeat' dimension at 'cutval' position.
+   *
+   *  On return:
+   *  dataset[ind[0..lim1-1]][cutfeat]<cutval
+   *  dataset[ind[lim1..lim2-1]][cutfeat]==cutval
+   *  dataset[ind[lim2..count]][cutfeat]>cutval
+   */
+  void planeSplit(Derived &obj, IndexType *ind, const IndexType count,
+                  int cutfeat, DistanceType &cutval, IndexType &lim1,
+                  IndexType &lim2) {
+    /* Move vector indices for left subtree to front of list. */
+    IndexType left = 0;
+    IndexType right = count - 1;
+    for (;;) {
+      while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval)
+        ++left;
+      while (right && left <= right &&
+             dataset_get(obj, ind[right], cutfeat) >= cutval)
+        --right;
+      if (left > right || !right)
+        break; // "!right" was added to support unsigned Index types
+      std::swap(ind[left], ind[right]);
+      ++left;
+      --right;
+    }
+    /* If either list is empty, it means that all remaining features
+     * are identical. Split in the middle to maintain a balanced tree.
+     */
+    lim1 = left;
+    right = count - 1;
+    for (;;) {
+      while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval)
+        ++left;
+      while (right && left <= right &&
+             dataset_get(obj, ind[right], cutfeat) > cutval)
+        --right;
+      if (left > right || !right)
+        break; // "!right" was added to support unsigned Index types
+      std::swap(ind[left], ind[right]);
+      ++left;
+      --right;
+    }
+    lim2 = left;
+  }
+
+  DistanceType computeInitialDistances(const Derived &obj,
+                                       const ElementType *vec,
+                                       distance_vector_t &dists) const {
+    assert(vec);
+    DistanceType distsq = DistanceType();
+
+    for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
+      if (vec[i] < obj.root_bbox[i].low) {
+        dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i);
+        distsq += dists[i];
+      }
+      if (vec[i] > obj.root_bbox[i].high) {
+        dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i);
+        distsq += dists[i];
+      }
+    }
+    return distsq;
+  }
+
+  void save_tree(Derived &obj, FILE *stream, NodePtr tree) {
+    save_value(stream, *tree);
+    if (tree->child1 != NULL) {
+      save_tree(obj, stream, tree->child1);
+    }
+    if (tree->child2 != NULL) {
+      save_tree(obj, stream, tree->child2);
+    }
+  }
+
+  void load_tree(Derived &obj, FILE *stream, NodePtr &tree) {
+    tree = obj.pool.template allocate<Node>();
+    load_value(stream, *tree);
+    if (tree->child1 != NULL) {
+      load_tree(obj, stream, tree->child1);
+    }
+    if (tree->child2 != NULL) {
+      load_tree(obj, stream, tree->child2);
+    }
+  }
+
+  /**  Stores the index in a binary file.
+   *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so when
+   * loading the index object it must be constructed associated to the same
+   * source of data points used while building it. See the example:
+   * examples/saveload_example.cpp \sa loadIndex  */
+  void saveIndex_(Derived &obj, FILE *stream) {
+    save_value(stream, obj.m_size);
+    save_value(stream, obj.dim);
+    save_value(stream, obj.root_bbox);
+    save_value(stream, obj.m_leaf_max_size);
+    save_value(stream, obj.vind);
+    save_tree(obj, stream, obj.root_node);
+  }
+
+  /**  Loads a previous index from a binary file.
+   *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so the
+   * index object must be constructed associated to the same source of data
+   * points used while building the index. See the example:
+   * examples/saveload_example.cpp \sa loadIndex  */
+  void loadIndex_(Derived &obj, FILE *stream) {
+    load_value(stream, obj.m_size);
+    load_value(stream, obj.dim);
+    load_value(stream, obj.root_bbox);
+    load_value(stream, obj.m_leaf_max_size);
+    load_value(stream, obj.vind);
+    load_tree(obj, stream, obj.root_node);
+  }
+};
+
+/** @addtogroup kdtrees_grp KD-tree classes and adaptors
+ * @{ */
+
+/** kd-tree static index
+ *
+ * Contains the k-d trees and other information for indexing a set of points
+ * for nearest-neighbor matching.
+ *
+ *  The class "DatasetAdaptor" must provide the following interface (can be
+ * non-virtual, inlined methods):
+ *
+ *  \code
+ *   // Must return the number of data poins
+ *   inline size_t kdtree_get_point_count() const { ... }
+ *
+ *
+ *   // Must return the dim'th component of the idx'th point in the class:
+ *   inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... }
+ *
+ *   // Optional bounding-box computation: return false to default to a standard
+ * bbox computation loop.
+ *   //   Return true if the BBOX was already computed by the class and returned
+ * in "bb" so it can be avoided to redo it again.
+ *   //   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3
+ * for point clouds) template <class BBOX> bool kdtree_get_bbox(BBOX &bb) const
+ *   {
+ *      bb[0].low = ...; bb[0].high = ...;  // 0th dimension limits
+ *      bb[1].low = ...; bb[1].high = ...;  // 1st dimension limits
+ *      ...
+ *      return true;
+ *   }
+ *
+ *  \endcode
+ *
+ * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
+ * \tparam Distance The distance metric to use: nanoflann::metric_L1,
+ * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM
+ * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will
+ * be typically size_t or int
+ */
+template <typename Distance, class DatasetAdaptor, int DIM = -1,
+          typename IndexType = size_t>
+class KDTreeSingleIndexAdaptor
+    : public KDTreeBaseClass<
+          KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM, IndexType>,
+          Distance, DatasetAdaptor, DIM, IndexType> {
+public:
+  /** Deleted copy constructor*/
+  KDTreeSingleIndexAdaptor(
+      const KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM, IndexType>
+          &) = delete;
+
+  /**
+   * The dataset used by this index
+   */
+  const DatasetAdaptor &dataset; //!< The source of our data
+
+  const KDTreeSingleIndexAdaptorParams index_params;
+
+  Distance distance;
+
+  typedef typename nanoflann::KDTreeBaseClass<
+      nanoflann::KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM,
+                                          IndexType>,
+      Distance, DatasetAdaptor, DIM, IndexType>
+      BaseClassRef;
+
+  typedef typename BaseClassRef::ElementType ElementType;
+  typedef typename BaseClassRef::DistanceType DistanceType;
+
+  typedef typename BaseClassRef::Node Node;
+  typedef Node *NodePtr;
+
+  typedef typename BaseClassRef::Interval Interval;
+  /** Define "BoundingBox" as a fixed-size or variable-size container depending
+   * on "DIM" */
+  typedef typename BaseClassRef::BoundingBox BoundingBox;
+
+  /** Define "distance_vector_t" as a fixed-size or variable-size container
+   * depending on "DIM" */
+  typedef typename BaseClassRef::distance_vector_t distance_vector_t;
+
+  /**
+   * KDTree constructor
+   *
+   * Refer to docs in README.md or online in
+   * https://github.com/jlblancoc/nanoflann
+   *
+   * The KD-Tree point dimension (the length of each point in the datase, e.g. 3
+   * for 3D points) is determined by means of:
+   *  - The \a DIM template parameter if >0 (highest priority)
+   *  - Otherwise, the \a dimensionality parameter of this constructor.
+   *
+   * @param inputData Dataset with the input features
+   * @param params Basically, the maximum leaf node size
+   */
+  KDTreeSingleIndexAdaptor(const int dimensionality,
+                           const DatasetAdaptor &inputData,
+                           const KDTreeSingleIndexAdaptorParams &params =
+                               KDTreeSingleIndexAdaptorParams())
+      : dataset(inputData), index_params(params), distance(inputData) {
+    BaseClassRef::root_node = NULL;
+    BaseClassRef::m_size = dataset.kdtree_get_point_count();
+    BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
+    BaseClassRef::dim = dimensionality;
+    if (DIM > 0)
+      BaseClassRef::dim = DIM;
+    BaseClassRef::m_leaf_max_size = params.leaf_max_size;
+
+    // Create a permutable array of indices to the input vectors.
+    init_vind();
+  }
+
+  /**
+   * Builds the index
+   */
+  void buildIndex() {
+    BaseClassRef::m_size = dataset.kdtree_get_point_count();
+    BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
+    init_vind();
+    this->freeIndex(*this);
+    BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
+    if (BaseClassRef::m_size == 0)
+      return;
+    computeBoundingBox(BaseClassRef::root_bbox);
+    BaseClassRef::root_node =
+        this->divideTree(*this, 0, BaseClassRef::m_size,
+                         BaseClassRef::root_bbox); // construct the tree
+  }
+
+  /** \name Query methods
+   * @{ */
+
+  /**
+   * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
+   * inside the result object.
+   *
+   * Params:
+   *     result = the result object in which the indices of the
+   * nearest-neighbors are stored vec = the vector for which to search the
+   * nearest neighbors
+   *
+   * \tparam RESULTSET Should be any ResultSet<DistanceType>
+   * \return  True if the requested neighbors could be found.
+   * \sa knnSearch, radiusSearch
+   */
+  template <typename RESULTSET>
+  bool findNeighbors(RESULTSET &result, const ElementType *vec,
+                     const SearchParams &searchParams) const {
+    assert(vec);
+    if (this->size(*this) == 0)
+      return false;
+    if (!BaseClassRef::root_node)
+      throw std::runtime_error(
+          "[nanoflann] findNeighbors() called before building the index.");
+    float epsError = 1 + searchParams.eps;
+
+    distance_vector_t
+        dists; // fixed or variable-sized container (depending on DIM)
+    auto zero = static_cast<decltype(result.worstDist())>(0);
+    assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim),
+           zero); // Fill it with zeros.
+    DistanceType distsq = this->computeInitialDistances(*this, vec, dists);
+    searchLevel(result, vec, BaseClassRef::root_node, distsq, dists,
+                epsError); // "count_leaf" parameter removed since was neither
+                           // used nor returned to the user.
+    return result.full();
+  }
+
+  /**
+   * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1].
+   * Their indices are stored inside the result object. \sa radiusSearch,
+   * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility
+   * with the original FLANN interface. \return Number `N` of valid points in
+   * the result set. Only the first `N` entries in `out_indices` and
+   * `out_distances_sq` will be valid. Return may be less than `num_closest`
+   * only if the number of elements in the tree is less than `num_closest`.
+   */
+  size_t knnSearch(const ElementType *query_point, const size_t num_closest,
+                   IndexType *out_indices, DistanceType *out_distances_sq,
+                   const int /* nChecks_IGNORED */ = 10) const {
+    nanoflann::KNNResultSet<DistanceType, IndexType> resultSet(num_closest);
+    resultSet.init(out_indices, out_distances_sq);
+    this->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
+    return resultSet.size();
+  }
+
+  /**
+   * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius.
+   *  The output is given as a vector of pairs, of which the first element is a
+   * point index and the second the corresponding distance. Previous contents of
+   * \a IndicesDists are cleared.
+   *
+   *  If searchParams.sorted==true, the output list is sorted by ascending
+   * distances.
+   *
+   *  For a better performance, it is advisable to do a .reserve() on the vector
+   * if you have any wild guess about the number of expected matches.
+   *
+   *  \sa knnSearch, findNeighbors, radiusSearchCustomCallback
+   * \return The number of points within the given radius (i.e. indices.size()
+   * or dists.size() )
+   */
+  size_t
+  radiusSearch(const ElementType *query_point, const DistanceType &radius,
+               std::vector<std::pair<IndexType, DistanceType>> &IndicesDists,
+               const SearchParams &searchParams) const {
+    RadiusResultSet<DistanceType, IndexType> resultSet(radius, IndicesDists);
+    const size_t nFound =
+        radiusSearchCustomCallback(query_point, resultSet, searchParams);
+    if (searchParams.sorted)
+      std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter());
+    return nFound;
+  }
+
+  /**
+   * Just like radiusSearch() but with a custom callback class for each point
+   * found in the radius of the query. See the source of RadiusResultSet<> as a
+   * start point for your own classes. \sa radiusSearch
+   */
+  template <class SEARCH_CALLBACK>
+  size_t radiusSearchCustomCallback(
+      const ElementType *query_point, SEARCH_CALLBACK &resultSet,
+      const SearchParams &searchParams = SearchParams()) const {
+    this->findNeighbors(resultSet, query_point, searchParams);
+    return resultSet.size();
+  }
+
+  /** @} */
+
+public:
+  /** Make sure the auxiliary list \a vind has the same size than the current
+   * dataset, and re-generate if size has changed. */
+  void init_vind() {
+    // Create a permutable array of indices to the input vectors.
+    BaseClassRef::m_size = dataset.kdtree_get_point_count();
+    if (BaseClassRef::vind.size() != BaseClassRef::m_size)
+      BaseClassRef::vind.resize(BaseClassRef::m_size);
+    for (size_t i = 0; i < BaseClassRef::m_size; i++)
+      BaseClassRef::vind[i] = i;
+  }
+
+  void computeBoundingBox(BoundingBox &bbox) {
+    resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim));
+    if (dataset.kdtree_get_bbox(bbox)) {
+      // Done! It was implemented in derived class
+    } else {
+      const size_t N = dataset.kdtree_get_point_count();
+      if (!N)
+        throw std::runtime_error("[nanoflann] computeBoundingBox() called but "
+                                 "no data points found.");
+      for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
+        bbox[i].low = bbox[i].high = this->dataset_get(*this, 0, i);
+      }
+      for (size_t k = 1; k < N; ++k) {
+        for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
+          if (this->dataset_get(*this, k, i) < bbox[i].low)
+            bbox[i].low = this->dataset_get(*this, k, i);
+          if (this->dataset_get(*this, k, i) > bbox[i].high)
+            bbox[i].high = this->dataset_get(*this, k, i);
         }
-        // assign one value to all elements
-        inline void assign (const T& value) { for (size_t i=0;i<N;i++) elems[i]=value; }
-        // assign (compatible with std::vector's one) (by JLBC for MRPT)
-        void assign (const size_t n, const T& value) { assert(N==n); for (size_t i=0;i<N;i++) elems[i]=value; }
-      private:
-        // check range (may be private because it is static)
-        static void rangecheck (size_type i) { if (i >= size()) { throw std::out_of_range("CArray<>: index out of range"); } }
-    }; // end of CArray
-
-	/** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors when DIM=-1.
-	  * Fixed size version for a generic DIM:
-	  */
-	template <int DIM, typename T>
-	struct array_or_vector_selector
-	{
-		typedef CArray<T, DIM> container_t;
-	};
-	/** Dynamic size version */
-	template <typename T>
-	struct array_or_vector_selector<-1, T> {
-		typedef std::vector<T> container_t;
-	};
-	
-	/** @} */
-
-	/** kd-tree base-class
-	 *
-	 * Contains the member functions common to the classes KDTreeSingleIndexAdaptor and KDTreeSingleIndexDynamicAdaptor_.
-	 *
-	 * \tparam Derived The name of the class which inherits this class.
-	 * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
-	 * \tparam Distance The distance metric to use, these are all classes derived from nanoflann::Metric
-	 * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points)
-	 * \tparam IndexType Will be typically size_t or int
-	 */
-
-	template<class Derived, typename Distance, class DatasetAdaptor, int DIM = -1, typename IndexType = size_t>
-	class KDTreeBaseClass
-	{
-
-	public:
-		/** Frees the previously-built index. Automatically called within buildIndex(). */
-		void freeIndex(Derived &obj)
-		{
-			obj.pool.free_all();
-			obj.root_node = NULL;
-			obj.m_size_at_index_build = 0;
-		}
-
-		typedef typename Distance::ElementType  ElementType;
-		typedef typename Distance::DistanceType DistanceType;
-
-		/*--------------------- Internal Data Structures --------------------------*/
-		struct Node
-		{
-			/** Union used because a node can be either a LEAF node or a non-leaf node, so both data fields are never used simultaneously */
-			union {
-				struct leaf
-                                {
-					IndexType    left, right;  //!< Indices of points in leaf node
-				} lr;
-				struct nonleaf
-                                {
-					int          divfeat; //!< Dimension used for subdivision.
-					DistanceType divlow, divhigh; //!< The values used for subdivision.
-				} sub;
-			} node_type;
-			Node *child1, *child2;  //!< Child nodes (both=NULL mean its a leaf node)
-		};
-		
-		typedef Node* NodePtr;
-
-		struct Interval
-		{
-			ElementType low, high;
-		};
-
-		/**
-		 *  Array of indices to vectors in the dataset.
-		 */
-		std::vector<IndexType> vind;
-
-		NodePtr root_node;
-
-		size_t m_leaf_max_size;
-
-		size_t m_size; //!< Number of current points in the dataset
-		size_t m_size_at_index_build; //!< Number of points in the dataset when the index was built
-		int dim;  //!< Dimensionality of each data point
-
-		/** Define "BoundingBox" as a fixed-size or variable-size container depending on "DIM" */
-		typedef typename array_or_vector_selector<DIM, Interval>::container_t BoundingBox;
-
-		/** Define "distance_vector_t" as a fixed-size or variable-size container depending on "DIM" */
-		typedef typename array_or_vector_selector<DIM, DistanceType>::container_t distance_vector_t;
-
-		/** The KD-tree used to find neighbours */
-		
-		BoundingBox root_bbox;
-
-		/**
-		 * Pooled memory allocator.
-		 *
-		 * Using a pooled memory allocator is more efficient
-		 * than allocating memory directly when there is a large
-		 * number small of memory allocations.
-		 */
-		PooledAllocator pool;
-
-		/** Returns number of points in dataset  */
-		size_t size(const Derived &obj) const { return obj.m_size; }
-
-		/** Returns the length of each point in the dataset */
-		size_t veclen(const Derived &obj) {
-			return static_cast<size_t>(DIM>0 ? DIM : obj.dim);
-		}
-
-		/// Helper accessor to the dataset points:
-		inline ElementType dataset_get(const Derived &obj, size_t idx, int component) const{
-			return obj.dataset.kdtree_get_pt(idx, component);
-		}
-
-		/**
-		 * Computes the inde memory usage
-		 * Returns: memory used by the index
-		 */
-		size_t usedMemory(Derived &obj)
-		{
-			return obj.pool.usedMemory + obj.pool.wastedMemory + obj.dataset.kdtree_get_point_count() * sizeof(IndexType);  // pool memory and vind array memory
-		}
-
-		void computeMinMax(const Derived &obj, IndexType* ind, IndexType count, int element, ElementType& min_elem, ElementType& max_elem)
-		{
-			min_elem = dataset_get(obj, ind[0],element);
-			max_elem = dataset_get(obj, ind[0],element);
-			for (IndexType i = 1; i < count; ++i) {
-				ElementType val = dataset_get(obj, ind[i], element);
-				if (val < min_elem) min_elem = val;
-				if (val > max_elem) max_elem = val;
-			}
-		}
-
-		/**
-		 * Create a tree node that subdivides the list of vecs from vind[first]
-		 * to vind[last].  The routine is called recursively on each sublist.
-		 *
-		 * @param left index of the first vector
-		 * @param right index of the last vector
-		 */
-		NodePtr divideTree(Derived &obj, const IndexType left, const IndexType right, BoundingBox& bbox)
-		{
-			NodePtr node = obj.pool.template allocate<Node>(); // allocate memory
-
-			/* If too few exemplars remain, then make this a leaf node. */
-			if ( (right - left) <= static_cast<IndexType>(obj.m_leaf_max_size) ) {
-				node->child1 = node->child2 = NULL;    /* Mark as leaf node. */
-				node->node_type.lr.left = left;
-				node->node_type.lr.right = right;
-
-				// compute bounding-box of leaf points
-				for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
-					bbox[i].low = dataset_get(obj, obj.vind[left], i);
-					bbox[i].high = dataset_get(obj, obj.vind[left], i);
-				}
-				for (IndexType k = left + 1; k < right; ++k) {
-					for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
-						if (bbox[i].low > dataset_get(obj, obj.vind[k], i)) bbox[i].low = dataset_get(obj, obj.vind[k], i);
-						if (bbox[i].high < dataset_get(obj, obj.vind[k], i)) bbox[i].high = dataset_get(obj, obj.vind[k], i);
-					}
-				}
-			}
-			else {
-				IndexType idx;
-				int cutfeat;
-				DistanceType cutval;
-				middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval, bbox);
-
-				node->node_type.sub.divfeat = cutfeat;
-
-				BoundingBox left_bbox(bbox);
-				left_bbox[cutfeat].high = cutval;
-				node->child1 = divideTree(obj, left, left + idx, left_bbox);
-
-				BoundingBox right_bbox(bbox);
-				right_bbox[cutfeat].low = cutval;
-				node->child2 = divideTree(obj, left + idx, right, right_bbox);
-
-				node->node_type.sub.divlow = left_bbox[cutfeat].high;
-				node->node_type.sub.divhigh = right_bbox[cutfeat].low;
-
-				for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
-					bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low);
-					bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
-				}
-			}
-
-			return node;
-		}
-
-		void middleSplit_(Derived &obj, IndexType* ind, IndexType count, IndexType& index, int& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
-		{
-			const DistanceType EPS = static_cast<DistanceType>(0.00001);
-			ElementType max_span = bbox[0].high-bbox[0].low;
-			for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) {
-				ElementType span = bbox[i].high - bbox[i].low;
-				if (span > max_span) {
-					max_span = span;
-				}
-			}
-			ElementType max_spread = -1;
-			cutfeat = 0;
-			for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) {
-				ElementType span = bbox[i].high-bbox[i].low;
-				if (span > (1 - EPS) * max_span) {
-					ElementType min_elem, max_elem;
-					computeMinMax(obj, ind, count, i, min_elem, max_elem);
-					ElementType spread = max_elem - min_elem;;
-					if (spread > max_spread) {
-						cutfeat = i;
-						max_spread = spread;
-					}
-				}
-			}
-			// split in the middle
-			DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2;
-			ElementType min_elem, max_elem;
-			computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem);
-
-			if (split_val < min_elem) cutval = min_elem;
-			else if (split_val > max_elem) cutval = max_elem;
-			else cutval = split_val;
-
-			IndexType lim1, lim2;
-			planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2);
-
-			if (lim1 > count / 2) index = lim1;
-			else if (lim2 < count / 2) index = lim2;
-			else index = count/2;
-		}
-
-		/**
-		 *  Subdivide the list of points by a plane perpendicular on axe corresponding
-		 *  to the 'cutfeat' dimension at 'cutval' position.
-		 *
-		 *  On return:
-		 *  dataset[ind[0..lim1-1]][cutfeat]<cutval
-		 *  dataset[ind[lim1..lim2-1]][cutfeat]==cutval
-		 *  dataset[ind[lim2..count]][cutfeat]>cutval
-		 */
-		void planeSplit(Derived &obj, IndexType* ind, const IndexType count, int cutfeat, DistanceType &cutval, IndexType& lim1, IndexType& lim2)
-		{
-			/* Move vector indices for left subtree to front of list. */
-			IndexType left = 0;
-			IndexType right = count-1;
-			for (;; ) {
-				while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval) ++left;
-				while (right && left <= right && dataset_get(obj, ind[right], cutfeat) >= cutval) --right;
-				if (left > right || !right) break;  // "!right" was added to support unsigned Index types
-				std::swap(ind[left], ind[right]);
-				++left;
-				--right;
-			}
-			/* If either list is empty, it means that all remaining features
-			 * are identical. Split in the middle to maintain a balanced tree.
-			 */
-			lim1 = left;
-			right = count-1;
-			for (;; ) {
-				while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval) ++left;
-				while (right && left <= right && dataset_get(obj, ind[right], cutfeat) > cutval) --right;
-				if (left > right || !right) break;  // "!right" was added to support unsigned Index types
-				std::swap(ind[left], ind[right]);
-				++left;
-				--right;
-			}
-			lim2 = left;
-		}
-
-		DistanceType computeInitialDistances(const Derived &obj, const ElementType* vec, distance_vector_t& dists) const
-		{
-			assert(vec);
-			DistanceType distsq = DistanceType();
-
-			for (int i = 0; i < (DIM>0 ? DIM : obj.dim); ++i) {
-				if (vec[i] < obj.root_bbox[i].low) {
-					dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i);
-					distsq += dists[i];
-				}
-				if (vec[i] > obj.root_bbox[i].high) {
-					dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i);
-					distsq += dists[i];
-				}
-			}
-			return distsq;
-		}
-
-		void save_tree(Derived &obj, FILE* stream, NodePtr tree)
-		{
-			save_value(stream, *tree);
-			if (tree->child1 != NULL) {
-				save_tree(obj, stream, tree->child1);
-			}
-			if (tree->child2 != NULL) {
-				save_tree(obj, stream, tree->child2);
-			}
-		}
-
-
-		void load_tree(Derived &obj, FILE* stream, NodePtr& tree)
-		{
-			tree = obj.pool.template allocate<Node>();
-			load_value(stream, *tree);
-			if (tree->child1 != NULL) {
-				load_tree(obj, stream, tree->child1);
-			}
-			if (tree->child2 != NULL) {
-				load_tree(obj, stream, tree->child2);
-			}
-		}
-
-		/**  Stores the index in a binary file.
-		  *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so when loading the index object it must be constructed associated to the same source of data points used while building it.
-		  * See the example: examples/saveload_example.cpp
-		  * \sa loadIndex  */
-		void saveIndex_(Derived &obj, FILE* stream)
-		{
-			save_value(stream, obj.m_size);
-			save_value(stream, obj.dim);
-			save_value(stream, obj.root_bbox);
-			save_value(stream, obj.m_leaf_max_size);
-			save_value(stream, obj.vind);
-			save_tree(obj, stream, obj.root_node);
-		}
-
-		/**  Loads a previous index from a binary file.
-		  *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so the index object must be constructed associated to the same source of data points used while building the index.
-		  * See the example: examples/saveload_example.cpp
-		  * \sa loadIndex  */
-		void loadIndex_(Derived &obj, FILE* stream)
-		{
-			load_value(stream, obj.m_size);
-			load_value(stream, obj.dim);
-			load_value(stream, obj.root_bbox);
-			load_value(stream, obj.m_leaf_max_size);
-			load_value(stream, obj.vind);
-			load_tree(obj, stream, obj.root_node);
-		}
-
-	};
-
-
-	/** @addtogroup kdtrees_grp KD-tree classes and adaptors
-	  * @{ */
-
-	/** kd-tree static index
-	 *
-	 * Contains the k-d trees and other information for indexing a set of points
-	 * for nearest-neighbor matching.
-	 *
-	 *  The class "DatasetAdaptor" must provide the following interface (can be non-virtual, inlined methods):
-	 *
-	 *  \code
-	 *   // Must return the number of data poins
-	 *   inline size_t kdtree_get_point_count() const { ... }
-	 *
-	 *
-	 *   // Must return the dim'th component of the idx'th point in the class:
-	 *   inline T kdtree_get_pt(const size_t idx, int dim) const { ... }
-	 *
-	 *   // Optional bounding-box computation: return false to default to a standard bbox computation loop.
-	 *   //   Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again.
-	 *   //   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds)
-	 *   template <class BBOX>
-	 *   bool kdtree_get_bbox(BBOX &bb) const
-	 *   {
-	 *      bb[0].low = ...; bb[0].high = ...;  // 0th dimension limits
-	 *      bb[1].low = ...; bb[1].high = ...;  // 1st dimension limits
-	 *      ...
-	 *      return true;
-	 *   }
-	 *
-	 *  \endcode
-	 *
-	 * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
-	 * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
-	 * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points)
-	 * \tparam IndexType Will be typically size_t or int
-	 */
-	template <typename Distance, class DatasetAdaptor, int DIM = -1, typename IndexType = size_t>
-	class KDTreeSingleIndexAdaptor : public KDTreeBaseClass<KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM, IndexType>, Distance, DatasetAdaptor, DIM, IndexType>
-	{
-	public:
-		/** Deleted copy constructor*/
-		KDTreeSingleIndexAdaptor(const KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM, IndexType>&) = delete;
-		
-		/**
-		 * The dataset used by this index
-		 */
-		const DatasetAdaptor &dataset; //!< The source of our data
-
-		const KDTreeSingleIndexAdaptorParams index_params;
-
-		Distance distance;
-
-		typedef typename nanoflann::KDTreeBaseClass<nanoflann::KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM, IndexType>, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef;
-
-		typedef typename BaseClassRef::ElementType ElementType;
-		typedef typename BaseClassRef::DistanceType DistanceType;
-
-		typedef typename BaseClassRef::Node Node;
-		typedef Node* NodePtr;
-
-		typedef typename BaseClassRef::Interval Interval;
-		/** Define "BoundingBox" as a fixed-size or variable-size container depending on "DIM" */
-		typedef typename BaseClassRef::BoundingBox BoundingBox;
-
-		/** Define "distance_vector_t" as a fixed-size or variable-size container depending on "DIM" */
-		typedef typename BaseClassRef::distance_vector_t distance_vector_t;
-
-		/**
-		 * KDTree constructor
-		 *
-		 * Refer to docs in README.md or online in https://github.com/jlblancoc/nanoflann
-		 *
-		 * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 for 3D points)
-		 * is determined by means of:
-		 *  - The \a DIM template parameter if >0 (highest priority)
-		 *  - Otherwise, the \a dimensionality parameter of this constructor.
-		 *
-		 * @param inputData Dataset with the input features
-		 * @param params Basically, the maximum leaf node size
-		 */
-		KDTreeSingleIndexAdaptor(const int dimensionality, const DatasetAdaptor& inputData, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams() ) :
-			dataset(inputData), index_params(params), distance(inputData)
-		{
-			BaseClassRef::root_node = NULL;
-			BaseClassRef::m_size = dataset.kdtree_get_point_count();
-			BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
-			BaseClassRef::dim = dimensionality;
-			if (DIM>0) BaseClassRef::dim = DIM;
-			BaseClassRef::m_leaf_max_size = params.leaf_max_size;
-
-			// Create a permutable array of indices to the input vectors.
-			init_vind();
-		}
-
-		/**
-		 * Builds the index
-		 */
-		void buildIndex()
-		{
-			BaseClassRef::m_size = dataset.kdtree_get_point_count();
-			BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
-			init_vind();
-			this->freeIndex(*this);
-			BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
-			if(BaseClassRef::m_size == 0) return;
-			computeBoundingBox(BaseClassRef::root_bbox);
-			BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox );   // construct the tree
-		}
-
-		/** \name Query methods
-		  * @{ */
-
-		/**
-		 * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored inside
-		 * the result object.
-		 *
-		 * Params:
-		 *     result = the result object in which the indices of the nearest-neighbors are stored
-		 *     vec = the vector for which to search the nearest neighbors
-		 *
-		 * \tparam RESULTSET Should be any ResultSet<DistanceType>
-         * \return  True if the requested neighbors could be found.
-		 * \sa knnSearch, radiusSearch
-		 */
-		template <typename RESULTSET>
-		bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const
-		{
-			assert(vec);
-            if (this->size(*this) == 0)
-                return false;
-			if (!BaseClassRef::root_node)
-                throw std::runtime_error("[nanoflann] findNeighbors() called before building the index.");
-			float epsError = 1 + searchParams.eps;
-
-			distance_vector_t dists; // fixed or variable-sized container (depending on DIM)
-			dists.assign((DIM > 0 ? DIM : BaseClassRef::dim), 0); // Fill it with zeros.
-			DistanceType distsq = this->computeInitialDistances(*this, vec, dists);
-			searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError);  // "count_leaf" parameter removed since was neither used nor returned to the user.
-            return result.full();
-		}
-
-		/**
-		 * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. Their indices are stored inside
-		 * the result object.
-		 *  \sa radiusSearch, findNeighbors
-		 * \note nChecks_IGNORED is ignored but kept for compatibility with the original FLANN interface.
-		 * \return Number `N` of valid points in the result set. Only the first `N` entries in `out_indices` and `out_distances_sq` will be valid.
-		 *         Return may be less than `num_closest` only if the number of elements in the tree is less than `num_closest`.
-		 */
-		size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const
-		{
-			nanoflann::KNNResultSet<DistanceType,IndexType> resultSet(num_closest);
-			resultSet.init(out_indices, out_distances_sq);
-			this->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
-			return resultSet.size();
-		}
-
-		/**
-		 * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius.
-		 *  The output is given as a vector of pairs, of which the first element is a point index and the second the corresponding distance.
-		 *  Previous contents of \a IndicesDists are cleared.
-		 *
-		 *  If searchParams.sorted==true, the output list is sorted by ascending distances.
-		 *
-		 *  For a better performance, it is advisable to do a .reserve() on the vector if you have any wild guess about the number of expected matches.
-		 *
-		 *  \sa knnSearch, findNeighbors, radiusSearchCustomCallback
-		 * \return The number of points within the given radius (i.e. indices.size() or dists.size() )
-		 */
-		size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector<std::pair<IndexType, DistanceType> >& IndicesDists, const SearchParams& searchParams) const
-		{
-			RadiusResultSet<DistanceType, IndexType> resultSet(radius, IndicesDists);
-			const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams);
-			if (searchParams.sorted)
-				std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter() );
-			return nFound;
-		}
-
-		/**
-		 * Just like radiusSearch() but with a custom callback class for each point found in the radius of the query.
-		 * See the source of RadiusResultSet<> as a start point for your own classes.
-		 * \sa radiusSearch
-		 */
-		template <class SEARCH_CALLBACK>
-		size_t radiusSearchCustomCallback(const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams& searchParams = SearchParams() ) const
-		{
-			this->findNeighbors(resultSet, query_point, searchParams);
-			return resultSet.size();
-		}
-
-		/** @} */
-
-	public:
-		/** Make sure the auxiliary list \a vind has the same size than the current dataset, and re-generate if size has changed. */
-		void init_vind()
-		{
-			// Create a permutable array of indices to the input vectors.
-			BaseClassRef::m_size = dataset.kdtree_get_point_count();
-			if (BaseClassRef::vind.size() != BaseClassRef::m_size) BaseClassRef::vind.resize(BaseClassRef::m_size);
-			for (size_t i = 0; i < BaseClassRef::m_size; i++) BaseClassRef::vind[i] = i;
-		}
-
-		void computeBoundingBox(BoundingBox& bbox)
-		{
-			bbox.resize((DIM > 0 ? DIM : BaseClassRef::dim));
-			if (dataset.kdtree_get_bbox(bbox))
-			{
-				// Done! It was implemented in derived class
-			}
-			else
-			{
-				const size_t N = dataset.kdtree_get_point_count();
-				if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but no data points found.");
-				for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
-					bbox[i].low =
-					bbox[i].high = this->dataset_get(*this, 0, i);
-				}
-				for (size_t k = 1; k < N; ++k) {
-					for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
-						if (this->dataset_get(*this, k, i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, k, i);
-						if (this->dataset_get(*this, k, i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, k, i);
-					}
-				}
-			}
-		}
-
-		/**
-		 * Performs an exact search in the tree starting from a node.
-		 * \tparam RESULTSET Should be any ResultSet<DistanceType>
-                 * \return true if the search should be continued, false if the results are sufficient
-		 */
-		template <class RESULTSET>
-                bool searchLevel(RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq,
-						 distance_vector_t& dists, const float epsError) const
-		{
-			/* If this is a leaf node, then do check and return. */
-			if ((node->child1 == NULL) && (node->child2 == NULL)) {
-				//count_leaf += (node->lr.right-node->lr.left);  // Removed since was neither used nor returned to the user.
-				DistanceType worst_dist = result_set.worstDist();
-				for (IndexType i = node->node_type.lr.left; i<node->node_type.lr.right; ++i) {
-					const IndexType index = BaseClassRef::vind[i];// reorder... : i;
-					DistanceType dist = distance.evalMetric(vec, index, (DIM > 0 ? DIM : BaseClassRef::dim));
-					if (dist < worst_dist) {
-                                                if(!result_set.addPoint(dist, BaseClassRef::vind[i])) {
-                                                    // the resultset doesn't want to receive any more points, we're done searching!
-                                                    return false;
-                                                }
-					}
-				}
-                                return true;
-			}
-
-			/* Which child branch should be taken first? */
-			int idx = node->node_type.sub.divfeat;
-			ElementType val = vec[idx];
-			DistanceType diff1 = val - node->node_type.sub.divlow;
-			DistanceType diff2 = val - node->node_type.sub.divhigh;
-
-			NodePtr bestChild;
-			NodePtr otherChild;
-			DistanceType cut_dist;
-			if ((diff1 + diff2) < 0) {
-				bestChild = node->child1;
-				otherChild = node->child2;
-				cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx);
-			}
-			else {
-				bestChild = node->child2;
-				otherChild = node->child1;
-				cut_dist = distance.accum_dist( val, node->node_type.sub.divlow, idx);
-			}
-
-			/* Call recursively to search next level down. */
-                        if(!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) {
-                            // the resultset doesn't want to receive any more points, we're done searching!
-                            return false;
-                        }
-
-			DistanceType dst = dists[idx];
-			mindistsq = mindistsq + cut_dist - dst;
-			dists[idx] = cut_dist;
-			if (mindistsq*epsError <= result_set.worstDist()) {
-                            if(!searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError)) {
-                                // the resultset doesn't want to receive any more points, we're done searching!
-                                return false;
-                            }
-			}
-			dists[idx] = dst;
-                        return true;
-		}
-
-	public:
-		/**  Stores the index in a binary file.
-		  *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so when loading the index object it must be constructed associated to the same source of data points used while building it.
-		  * See the example: examples/saveload_example.cpp
-		  * \sa loadIndex  */
-		void saveIndex(FILE* stream)
-		{
-			this->saveIndex_(*this, stream);
-		}
-
-		/**  Loads a previous index from a binary file.
-		  *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so the index object must be constructed associated to the same source of data points used while building the index.
-		  * See the example: examples/saveload_example.cpp
-		  * \sa loadIndex  */
-		void loadIndex(FILE* stream)
-		{
-			this->loadIndex_(*this, stream);
-		}
-
-	};   // class KDTree
-
-
-	/** kd-tree dynamic index
-	 *
-	 * Contains the k-d trees and other information for indexing a set of points
-	 * for nearest-neighbor matching.
-	 *
-	 *  The class "DatasetAdaptor" must provide the following interface (can be non-virtual, inlined methods):
-	 *
-	 *  \code
-	 *   // Must return the number of data poins
-	 *   inline size_t kdtree_get_point_count() const { ... }
-	 *
-	 *   // Must return the dim'th component of the idx'th point in the class:
-	 *   inline T kdtree_get_pt(const size_t idx, int dim) const { ... }
-	 *
-	 *   // Optional bounding-box computation: return false to default to a standard bbox computation loop.
-	 *   //   Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again.
-	 *   //   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds)
-	 *   template <class BBOX>
-	 *   bool kdtree_get_bbox(BBOX &bb) const
-	 *   {
-	 *      bb[0].low = ...; bb[0].high = ...;  // 0th dimension limits
-	 *      bb[1].low = ...; bb[1].high = ...;  // 1st dimension limits
-	 *      ...
-	 *      return true;
-	 *   }
-	 *
-	 *  \endcode
-	 *
-	 * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
-	 * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
-	 * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points)
-	 * \tparam IndexType Will be typically size_t or int
-	 */	 
-	template <typename Distance, class DatasetAdaptor, int DIM = -1, typename IndexType = size_t>
-	class KDTreeSingleIndexDynamicAdaptor_ : public KDTreeBaseClass<KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM, IndexType>, Distance, DatasetAdaptor, DIM, IndexType>
-	{
-	public:
-
-		/**
-		 * The dataset used by this index
-		 */
-		const DatasetAdaptor &dataset; //!< The source of our data
-
-		KDTreeSingleIndexAdaptorParams index_params;
-
-		std::vector<int> &treeIndex;
-
-		Distance distance;
-
-		typedef typename nanoflann::KDTreeBaseClass<nanoflann::KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM, IndexType>, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef;
-
-		typedef typename BaseClassRef::ElementType  ElementType;
-		typedef typename BaseClassRef::DistanceType  DistanceType;
-
-		typedef typename BaseClassRef::Node Node;
-		typedef Node* NodePtr;
-
-		typedef typename BaseClassRef::Interval Interval;
-		/** Define "BoundingBox" as a fixed-size or variable-size container depending on "DIM" */
-		typedef typename BaseClassRef::BoundingBox BoundingBox;
-
-		/** Define "distance_vector_t" as a fixed-size or variable-size container depending on "DIM" */
-		typedef typename BaseClassRef::distance_vector_t distance_vector_t;
-
-		/**
-		 * KDTree constructor
-		 *
-		 * Refer to docs in README.md or online in https://github.com/jlblancoc/nanoflann
-		 *
-		 * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 for 3D points)
-		 * is determined by means of:
-		 *  - The \a DIM template parameter if >0 (highest priority)
-		 *  - Otherwise, the \a dimensionality parameter of this constructor.
-		 *
-		 * @param inputData Dataset with the input features
-		 * @param params Basically, the maximum leaf node size
-		 */
-		KDTreeSingleIndexDynamicAdaptor_(const int dimensionality, const DatasetAdaptor& inputData, std::vector<int>& treeIndex_, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams()) :
-			dataset(inputData), index_params(params), treeIndex(treeIndex_), distance(inputData)
-		{
-			BaseClassRef::root_node = NULL;
-			BaseClassRef::m_size = 0;
-			BaseClassRef::m_size_at_index_build = 0;
-			BaseClassRef::dim = dimensionality;
-			if (DIM>0) BaseClassRef::dim = DIM;
-			BaseClassRef::m_leaf_max_size = params.leaf_max_size;
-		}
-
-
-		/** Assignment operator definiton */
-		KDTreeSingleIndexDynamicAdaptor_ operator=( const KDTreeSingleIndexDynamicAdaptor_& rhs ) {
-		      KDTreeSingleIndexDynamicAdaptor_ tmp( rhs );
-		      std::swap( BaseClassRef::vind, tmp.BaseClassRef::vind );
-		      std::swap( BaseClassRef::m_leaf_max_size, tmp.BaseClassRef::m_leaf_max_size );
-		      std::swap( index_params, tmp.index_params );
-		      std::swap( treeIndex, tmp.treeIndex );
-		      std::swap( BaseClassRef::m_size, tmp.BaseClassRef::m_size );
-		      std::swap( BaseClassRef::m_size_at_index_build, tmp.BaseClassRef::m_size_at_index_build );
-		      std::swap( BaseClassRef::root_node, tmp.BaseClassRef::root_node );
-		      std::swap( BaseClassRef::root_bbox, tmp.BaseClassRef::root_bbox );
-		      std::swap( BaseClassRef::pool, tmp.BaseClassRef::pool );
-		      return *this;
-		 }
-
-		/**
-		 * Builds the index
-		 */
-		void buildIndex()
-		{
-			BaseClassRef::m_size = BaseClassRef::vind.size();
-			this->freeIndex(*this);
-			BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
-			if(BaseClassRef::m_size == 0) return;
-			computeBoundingBox(BaseClassRef::root_bbox);
-			BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox );   // construct the tree
-		}
-
-		/** \name Query methods
-		  * @{ */
-
-		/**
-		 * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored inside
-		 * the result object.
-		 *
-		 * Params:
-		 *     result = the result object in which the indices of the nearest-neighbors are stored
-		 *     vec = the vector for which to search the nearest neighbors
-		 *
-		 * \tparam RESULTSET Should be any ResultSet<DistanceType>
-         * \return  True if the requested neighbors could be found.
-		 * \sa knnSearch, radiusSearch
-		 */
-		template <typename RESULTSET>
-		bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const
-		{
-			assert(vec);
-            if (this->size(*this) == 0)
-                return false;
-			if (!BaseClassRef::root_node)
-                return false;
-			float epsError = 1 + searchParams.eps;
-
-			distance_vector_t dists; // fixed or variable-sized container (depending on DIM)
-			dists.assign((DIM > 0 ? DIM : BaseClassRef::dim) , 0); // Fill it with zeros.
-			DistanceType distsq = this->computeInitialDistances(*this, vec, dists);
-			searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError);  // "count_leaf" parameter removed since was neither used nor returned to the user.
-            return result.full();
-		}
-
-		/**
-		 * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. Their indices are stored inside
-		 * the result object.
-		 *  \sa radiusSearch, findNeighbors
-		 * \note nChecks_IGNORED is ignored but kept for compatibility with the original FLANN interface.
-		 * \return Number `N` of valid points in the result set. Only the first `N` entries in `out_indices` and `out_distances_sq` will be valid. 
-		 *         Return may be less than `num_closest` only if the number of elements in the tree is less than `num_closest`.
-		 */
-		size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const
-		{
-			nanoflann::KNNResultSet<DistanceType,IndexType> resultSet(num_closest);
-			resultSet.init(out_indices, out_distances_sq);
-			this->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
-			return resultSet.size();
-		}
-
-		/**
-		 * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius.
-		 *  The output is given as a vector of pairs, of which the first element is a point index and the second the corresponding distance.
-		 *  Previous contents of \a IndicesDists are cleared.
-		 *
-		 *  If searchParams.sorted==true, the output list is sorted by ascending distances.
-		 *
-		 *  For a better performance, it is advisable to do a .reserve() on the vector if you have any wild guess about the number of expected matches.
-		 *
-		 *  \sa knnSearch, findNeighbors, radiusSearchCustomCallback
-		 * \return The number of points within the given radius (i.e. indices.size() or dists.size() )
-		 */
-		size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector<std::pair<IndexType,DistanceType> >& IndicesDists, const SearchParams& searchParams) const
-		{
-			RadiusResultSet<DistanceType,IndexType> resultSet(radius, IndicesDists);
-			const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams);
-			if (searchParams.sorted)
-				std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter() );
-			return nFound;
-		}
-
-		/**
-		 * Just like radiusSearch() but with a custom callback class for each point found in the radius of the query.
-		 * See the source of RadiusResultSet<> as a start point for your own classes.
-		 * \sa radiusSearch
-		 */
-		template <class SEARCH_CALLBACK>
-		size_t radiusSearchCustomCallback(const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams& searchParams = SearchParams() ) const
-		{
-			this->findNeighbors(resultSet, query_point, searchParams);
-			return resultSet.size();
-		}
-
-		/** @} */
-
-	public:
-
-
-		void computeBoundingBox(BoundingBox& bbox)
-		{
-			bbox.resize((DIM > 0 ? DIM : BaseClassRef::dim));
-			if (dataset.kdtree_get_bbox(bbox))
-			{
-				// Done! It was implemented in derived class
-			}
-			else
-			{
-				const size_t N = BaseClassRef::m_size;
-				if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but no data points found.");
-				for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
-					bbox[i].low =
-					bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[0], i);
-				}
-				for (size_t k = 1; k < N; ++k) {
-					for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
-						if (this->dataset_get(*this, BaseClassRef::vind[k], i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, BaseClassRef::vind[k], i);
-						if (this->dataset_get(*this, BaseClassRef::vind[k], i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[k], i);
-					}
-				}
-			}
-		}
-
-		/**
-		 * Performs an exact search in the tree starting from a node.
-		 * \tparam RESULTSET Should be any ResultSet<DistanceType>
-		 */
-		template <class RESULTSET>
-		void searchLevel(RESULTSET& result_set, const ElementType* vec, const NodePtr node, DistanceType mindistsq,
-						 distance_vector_t& dists, const float epsError) const
-		{
-			/* If this is a leaf node, then do check and return. */
-			if ((node->child1 == NULL) && (node->child2 == NULL)) {
-				//count_leaf += (node->lr.right-node->lr.left);  // Removed since was neither used nor returned to the user.
-				DistanceType worst_dist = result_set.worstDist();
-				for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) {
-					const IndexType index = BaseClassRef::vind[i];// reorder... : i;
-					if(treeIndex[index] == -1)
-						continue;
-					DistanceType dist = distance.evalMetric(vec, index, (DIM > 0 ? DIM : BaseClassRef::dim));
-					if (dist<worst_dist) {
-						result_set.addPoint(dist, BaseClassRef::vind[i]);
-					}
-				}
-				return;
-			}
-
-			/* Which child branch should be taken first? */
-			int idx = node->node_type.sub.divfeat;
-			ElementType val = vec[idx];
-			DistanceType diff1 = val - node->node_type.sub.divlow;
-			DistanceType diff2 = val - node->node_type.sub.divhigh;
-
-			NodePtr bestChild;
-			NodePtr otherChild;
-			DistanceType cut_dist;
-			if ((diff1 + diff2) < 0) {
-				bestChild = node->child1;
-				otherChild = node->child2;
-				cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx);
-			}
-			else {
-				bestChild = node->child2;
-				otherChild = node->child1;
-				cut_dist = distance.accum_dist( val, node->node_type.sub.divlow, idx);
-			}
-
-			/* Call recursively to search next level down. */
-			searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError);
-
-			DistanceType dst = dists[idx];
-			mindistsq = mindistsq + cut_dist - dst;
-			dists[idx] = cut_dist;
-			if (mindistsq*epsError <= result_set.worstDist()) {
-				searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError);
-			}
-			dists[idx] = dst;
-		}
-
-	public:
-		/**  Stores the index in a binary file.
-		  *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so when loading the index object it must be constructed associated to the same source of data points used while building it.
-		  * See the example: examples/saveload_example.cpp
-		  * \sa loadIndex  */
-		void saveIndex(FILE* stream)
-		{
-			this->saveIndex_(*this, stream);
-		}
-
-		/**  Loads a previous index from a binary file.
-		  *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so the index object must be constructed associated to the same source of data points used while building the index.
-		  * See the example: examples/saveload_example.cpp
-		  * \sa loadIndex  */
-		void loadIndex(FILE* stream)
-		{
-			this->loadIndex_(*this, stream);
-		}
-
-	};
-
-
-	/** kd-tree dynaimic index
-	 *
-	 * class to create multiple static index and merge their results to behave as single dynamic index as proposed in Logarithmic Approach.
-	 *  
-	 *  Example of usage:
-	 *  examples/dynamic_pointcloud_example.cpp
-	 *
-	 * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
-	 * \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
-	 * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points)
-	 * \tparam IndexType Will be typically size_t or int
-	 */
-	template <typename Distance, class DatasetAdaptor,int DIM = -1, typename IndexType = size_t>
-	class KDTreeSingleIndexDynamicAdaptor
-	{
-	public:
-		typedef typename Distance::ElementType  ElementType;
-		typedef typename Distance::DistanceType DistanceType;
-	protected:
-
-		size_t m_leaf_max_size;
-		size_t treeCount;
-		size_t pointCount;
-
-		/**
-		 * The dataset used by this index
-		 */
-		const DatasetAdaptor &dataset; //!< The source of our data
-
-		std::vector<int> treeIndex; //!< treeIndex[idx] is the index of tree in which point at idx is stored. treeIndex[idx]=-1 means that point has been removed.
-
-		KDTreeSingleIndexAdaptorParams index_params;
-
-		int dim;  //!< Dimensionality of each data point
-
-		typedef KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM> index_container_t;
-		std::vector<index_container_t> index;
-
-	public:
-		/** Get a const ref to the internal list of indices; the number of indices is adapted dynamically as 
-		  * the dataset grows in size. */
-		const std::vector<index_container_t> & getAllIndices() const {
-			return index;
-		}
-
-	private:
-		/** finds position of least significant unset bit */
-		int First0Bit(IndexType num)
-		{
-			int pos = 0;
-			while(num&1)
-			{
-				num = num>>1;
-				pos++;
-			}
-			return pos;
-		}
-
-		/** Creates multiple empty trees to handle dynamic support */
-		void init()
-		{
-			typedef KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM> my_kd_tree_t;
-			std::vector<my_kd_tree_t> index_(treeCount, my_kd_tree_t(dim /*dim*/, dataset, treeIndex, index_params));
-			index=index_;
-		}
-
-	public:
-
-		Distance distance;
-
-		/**
-		 * KDTree constructor
-		 *
-		 * Refer to docs in README.md or online in https://github.com/jlblancoc/nanoflann
-		 *
-		 * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 for 3D points)
-		 * is determined by means of:
-		 *  - The \a DIM template parameter if >0 (highest priority)
-		 *  - Otherwise, the \a dimensionality parameter of this constructor.
-		 *
-		 * @param inputData Dataset with the input features
-		 * @param params Basically, the maximum leaf node size
-		 */
-		KDTreeSingleIndexDynamicAdaptor(const int dimensionality, const DatasetAdaptor& inputData, const KDTreeSingleIndexAdaptorParams& params = KDTreeSingleIndexAdaptorParams() , const size_t maximumPointCount = 1000000000U) :
-			dataset(inputData), index_params(params), distance(inputData)
-		{
-			treeCount = std::log2(maximumPointCount);
-			pointCount = 0U;
-			dim = dimensionality;
-			treeIndex.clear();
-			if (DIM > 0) dim = DIM;
-			m_leaf_max_size = params.leaf_max_size;
-			init();
-			int num_initial_points = dataset.kdtree_get_point_count();
-			if (num_initial_points > 0) {
-				addPoints(0, num_initial_points - 1);
-			}
-		}
-
-		/** Deleted copy constructor*/
-		KDTreeSingleIndexDynamicAdaptor(const KDTreeSingleIndexDynamicAdaptor<Distance, DatasetAdaptor, DIM, IndexType>&) = delete;
-
-
-		/** Add points to the set, Inserts all points from [start, end] */
-		void addPoints(IndexType start, IndexType end)
-		{
-			int count = end - start + 1;
-			treeIndex.resize(treeIndex.size() + count);
-			for(IndexType idx = start; idx <= end; idx++) {
-				int pos = First0Bit(pointCount);
-				index[pos].vind.clear();
-				treeIndex[pointCount]=pos;
-				for(int i = 0; i < pos; i++) {
-					for(int j = 0; j < static_cast<int>(index[i].vind.size()); j++) {
-						index[pos].vind.push_back(index[i].vind[j]);
-						treeIndex[index[i].vind[j]] = pos;
-					}
-					index[i].vind.clear();
-					index[i].freeIndex(index[i]);
-				}
-				index[pos].vind.push_back(idx);
-				index[pos].buildIndex();
-				pointCount++;
-			}
-		}
-
-		/** Remove a point from the set (Lazy Deletion) */
-		void removePoint(size_t idx)
-		{
-			if(idx >= pointCount)
-				return;
-			treeIndex[idx] = -1;
-		}
-
-		/**
-		 * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored inside
-		 * the result object.
-		 *
-		 * Params:
-		 *     result = the result object in which the indices of the nearest-neighbors are stored
-		 *     vec = the vector for which to search the nearest neighbors
-		 *
-		 * \tparam RESULTSET Should be any ResultSet<DistanceType>
-         * \return  True if the requested neighbors could be found.
-		 * \sa knnSearch, radiusSearch
-		 */
-		template <typename RESULTSET>
-		bool findNeighbors(RESULTSET& result, const ElementType* vec, const SearchParams& searchParams) const
-		{
-			for(size_t i = 0; i < treeCount; i++)
-			{
-				index[i].findNeighbors(result, &vec[0], searchParams);
-			}
-			return result.full();
-		}
-
-	}; 
-
-	/** An L2-metric KD-tree adaptor for working with data directly stored in an Eigen Matrix, without duplicating the data storage.
-	  *  Each row in the matrix represents a point in the state space.
-	  *
-	  *  Example of usage:
-	  * \code
-	  * 	Eigen::Matrix<num_t,Dynamic,Dynamic>  mat;
-	  * 	// Fill out "mat"...
-	  *
-	  * 	typedef KDTreeEigenMatrixAdaptor< Eigen::Matrix<num_t,Dynamic,Dynamic> >  my_kd_tree_t;
-	  * 	const int max_leaf = 10;
-	  * 	my_kd_tree_t   mat_index(mat, max_leaf );
-	  * 	mat_index.index->buildIndex();
-	  * 	mat_index.index->...
-	  * \endcode
-	  *
-	  *  \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality for the points in the data set, allowing more compiler optimizations.
-	  *  \tparam Distance The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
-	  */
-	template <class MatrixType, class Distance = nanoflann::metric_L2>
-	struct KDTreeEigenMatrixAdaptor
-	{
-		typedef KDTreeEigenMatrixAdaptor<MatrixType,Distance> self_t;
-		typedef typename MatrixType::Scalar              num_t;
-		typedef typename MatrixType::Index IndexType;
-		typedef typename Distance::template traits<num_t,self_t>::distance_t metric_t;
-		typedef KDTreeSingleIndexAdaptor< metric_t,self_t, MatrixType::ColsAtCompileTime,IndexType>  index_t;
-
-		index_t* index; //! The kd-tree index for the user to call its methods as usual with any other FLANN index.
-
-		/// Constructor: takes a const ref to the matrix object with the data points
-		KDTreeEigenMatrixAdaptor(const MatrixType &mat, const int leaf_max_size = 10) : m_data_matrix(mat)
-		{
-			const IndexType dims = mat.cols();
-			index = new index_t( dims, *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size ) );
-			index->buildIndex();
-		}
-	public:
-		/** Deleted copy constructor */
-		KDTreeEigenMatrixAdaptor(const self_t&) = delete;
-
-		~KDTreeEigenMatrixAdaptor() {
-			delete index;
-		}
-
-		const MatrixType &m_data_matrix;
-
-		/** Query for the \a num_closest closest points to a given point (entered as query_point[0:dim-1]).
-		  *  Note that this is a short-cut method for index->findNeighbors().
-		  *  The user can also call index->... methods as desired.
-		  * \note nChecks_IGNORED is ignored but kept for compatibility with the original FLANN interface.
-		  */
-		inline void query(const num_t *query_point, const size_t num_closest, IndexType *out_indices, num_t *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const
-		{
-			nanoflann::KNNResultSet<num_t, IndexType> resultSet(num_closest);
-			resultSet.init(out_indices, out_distances_sq);
-			index->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
-		}
-
-		/** @name Interface expected by KDTreeSingleIndexAdaptor
-		  * @{ */
-
-		const self_t & derived() const {
-			return *this;
-		}
-		self_t & derived()       {
-			return *this;
-		}
-
-		// Must return the number of data points
-		inline size_t kdtree_get_point_count() const {
-			return m_data_matrix.rows();
-		}
-
-		// Returns the dim'th component of the idx'th point in the class:
-		inline num_t kdtree_get_pt(const IndexType idx, int dim) const {
-			return m_data_matrix.coeff(idx, IndexType(dim));
-		}
-
-		// Optional bounding-box computation: return false to default to a standard bbox computation loop.
-		//   Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again.
-		//   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds)
-		template <class BBOX>
-		bool kdtree_get_bbox(BBOX& /*bb*/) const {
-			return false;
-		}
-
-		/** @} */
-
-	}; // end of KDTreeEigenMatrixAdaptor
-	/** @} */
+      }
+    }
+  }
+
+  /**
+   * Performs an exact search in the tree starting from a node.
+   * \tparam RESULTSET Should be any ResultSet<DistanceType>
+   * \return true if the search should be continued, false if the results are
+   * sufficient
+   */
+  template <class RESULTSET>
+  bool searchLevel(RESULTSET &result_set, const ElementType *vec,
+                   const NodePtr node, DistanceType mindistsq,
+                   distance_vector_t &dists, const float epsError) const {
+    /* If this is a leaf node, then do check and return. */
+    if ((node->child1 == NULL) && (node->child2 == NULL)) {
+      // count_leaf += (node->lr.right-node->lr.left);  // Removed since was
+      // neither used nor returned to the user.
+      DistanceType worst_dist = result_set.worstDist();
+      for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right;
+           ++i) {
+        const IndexType index = BaseClassRef::vind[i]; // reorder... : i;
+        DistanceType dist = distance.evalMetric(
+            vec, index, (DIM > 0 ? DIM : BaseClassRef::dim));
+        if (dist < worst_dist) {
+          if (!result_set.addPoint(dist, BaseClassRef::vind[i])) {
+            // the resultset doesn't want to receive any more points, we're done
+            // searching!
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    /* Which child branch should be taken first? */
+    int idx = node->node_type.sub.divfeat;
+    ElementType val = vec[idx];
+    DistanceType diff1 = val - node->node_type.sub.divlow;
+    DistanceType diff2 = val - node->node_type.sub.divhigh;
+
+    NodePtr bestChild;
+    NodePtr otherChild;
+    DistanceType cut_dist;
+    if ((diff1 + diff2) < 0) {
+      bestChild = node->child1;
+      otherChild = node->child2;
+      cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx);
+    } else {
+      bestChild = node->child2;
+      otherChild = node->child1;
+      cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx);
+    }
+
+    /* Call recursively to search next level down. */
+    if (!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) {
+      // the resultset doesn't want to receive any more points, we're done
+      // searching!
+      return false;
+    }
+
+    DistanceType dst = dists[idx];
+    mindistsq = mindistsq + cut_dist - dst;
+    dists[idx] = cut_dist;
+    if (mindistsq * epsError <= result_set.worstDist()) {
+      if (!searchLevel(result_set, vec, otherChild, mindistsq, dists,
+                       epsError)) {
+        // the resultset doesn't want to receive any more points, we're done
+        // searching!
+        return false;
+      }
+    }
+    dists[idx] = dst;
+    return true;
+  }
+
+public:
+  /**  Stores the index in a binary file.
+   *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so when
+   * loading the index object it must be constructed associated to the same
+   * source of data points used while building it. See the example:
+   * examples/saveload_example.cpp \sa loadIndex  */
+  void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); }
+
+  /**  Loads a previous index from a binary file.
+   *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so the
+   * index object must be constructed associated to the same source of data
+   * points used while building the index. See the example:
+   * examples/saveload_example.cpp \sa loadIndex  */
+  void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); }
+
+}; // class KDTree
+
+/** kd-tree dynamic index
+ *
+ * Contains the k-d trees and other information for indexing a set of points
+ * for nearest-neighbor matching.
+ *
+ *  The class "DatasetAdaptor" must provide the following interface (can be
+ * non-virtual, inlined methods):
+ *
+ *  \code
+ *   // Must return the number of data poins
+ *   inline size_t kdtree_get_point_count() const { ... }
+ *
+ *   // Must return the dim'th component of the idx'th point in the class:
+ *   inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... }
+ *
+ *   // Optional bounding-box computation: return false to default to a standard
+ * bbox computation loop.
+ *   //   Return true if the BBOX was already computed by the class and returned
+ * in "bb" so it can be avoided to redo it again.
+ *   //   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3
+ * for point clouds) template <class BBOX> bool kdtree_get_bbox(BBOX &bb) const
+ *   {
+ *      bb[0].low = ...; bb[0].high = ...;  // 0th dimension limits
+ *      bb[1].low = ...; bb[1].high = ...;  // 1st dimension limits
+ *      ...
+ *      return true;
+ *   }
+ *
+ *  \endcode
+ *
+ * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
+ * \tparam Distance The distance metric to use: nanoflann::metric_L1,
+ * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM
+ * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will
+ * be typically size_t or int
+ */
+template <typename Distance, class DatasetAdaptor, int DIM = -1,
+          typename IndexType = size_t>
+class KDTreeSingleIndexDynamicAdaptor_
+    : public KDTreeBaseClass<KDTreeSingleIndexDynamicAdaptor_<
+                                 Distance, DatasetAdaptor, DIM, IndexType>,
+                             Distance, DatasetAdaptor, DIM, IndexType> {
+public:
+  /**
+   * The dataset used by this index
+   */
+  const DatasetAdaptor &dataset; //!< The source of our data
+
+  KDTreeSingleIndexAdaptorParams index_params;
+
+  std::vector<int> &treeIndex;
+
+  Distance distance;
+
+  typedef typename nanoflann::KDTreeBaseClass<
+      nanoflann::KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM,
+                                                  IndexType>,
+      Distance, DatasetAdaptor, DIM, IndexType>
+      BaseClassRef;
+
+  typedef typename BaseClassRef::ElementType ElementType;
+  typedef typename BaseClassRef::DistanceType DistanceType;
+
+  typedef typename BaseClassRef::Node Node;
+  typedef Node *NodePtr;
+
+  typedef typename BaseClassRef::Interval Interval;
+  /** Define "BoundingBox" as a fixed-size or variable-size container depending
+   * on "DIM" */
+  typedef typename BaseClassRef::BoundingBox BoundingBox;
+
+  /** Define "distance_vector_t" as a fixed-size or variable-size container
+   * depending on "DIM" */
+  typedef typename BaseClassRef::distance_vector_t distance_vector_t;
+
+  /**
+   * KDTree constructor
+   *
+   * Refer to docs in README.md or online in
+   * https://github.com/jlblancoc/nanoflann
+   *
+   * The KD-Tree point dimension (the length of each point in the datase, e.g. 3
+   * for 3D points) is determined by means of:
+   *  - The \a DIM template parameter if >0 (highest priority)
+   *  - Otherwise, the \a dimensionality parameter of this constructor.
+   *
+   * @param inputData Dataset with the input features
+   * @param params Basically, the maximum leaf node size
+   */
+  KDTreeSingleIndexDynamicAdaptor_(
+      const int dimensionality, const DatasetAdaptor &inputData,
+      std::vector<int> &treeIndex_,
+      const KDTreeSingleIndexAdaptorParams &params =
+          KDTreeSingleIndexAdaptorParams())
+      : dataset(inputData), index_params(params), treeIndex(treeIndex_),
+        distance(inputData) {
+    BaseClassRef::root_node = NULL;
+    BaseClassRef::m_size = 0;
+    BaseClassRef::m_size_at_index_build = 0;
+    BaseClassRef::dim = dimensionality;
+    if (DIM > 0)
+      BaseClassRef::dim = DIM;
+    BaseClassRef::m_leaf_max_size = params.leaf_max_size;
+  }
+
+  /** Assignment operator definiton */
+  KDTreeSingleIndexDynamicAdaptor_
+  operator=(const KDTreeSingleIndexDynamicAdaptor_ &rhs) {
+    KDTreeSingleIndexDynamicAdaptor_ tmp(rhs);
+    std::swap(BaseClassRef::vind, tmp.BaseClassRef::vind);
+    std::swap(BaseClassRef::m_leaf_max_size, tmp.BaseClassRef::m_leaf_max_size);
+    std::swap(index_params, tmp.index_params);
+    std::swap(treeIndex, tmp.treeIndex);
+    std::swap(BaseClassRef::m_size, tmp.BaseClassRef::m_size);
+    std::swap(BaseClassRef::m_size_at_index_build,
+              tmp.BaseClassRef::m_size_at_index_build);
+    std::swap(BaseClassRef::root_node, tmp.BaseClassRef::root_node);
+    std::swap(BaseClassRef::root_bbox, tmp.BaseClassRef::root_bbox);
+    std::swap(BaseClassRef::pool, tmp.BaseClassRef::pool);
+    return *this;
+  }
+
+  /**
+   * Builds the index
+   */
+  void buildIndex() {
+    BaseClassRef::m_size = BaseClassRef::vind.size();
+    this->freeIndex(*this);
+    BaseClassRef::m_size_at_index_build = BaseClassRef::m_size;
+    if (BaseClassRef::m_size == 0)
+      return;
+    computeBoundingBox(BaseClassRef::root_bbox);
+    BaseClassRef::root_node =
+        this->divideTree(*this, 0, BaseClassRef::m_size,
+                         BaseClassRef::root_bbox); // construct the tree
+  }
+
+  /** \name Query methods
+   * @{ */
+
+  /**
+   * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
+   * inside the result object.
+   *
+   * Params:
+   *     result = the result object in which the indices of the
+   * nearest-neighbors are stored vec = the vector for which to search the
+   * nearest neighbors
+   *
+   * \tparam RESULTSET Should be any ResultSet<DistanceType>
+   * \return  True if the requested neighbors could be found.
+   * \sa knnSearch, radiusSearch
+   */
+  template <typename RESULTSET>
+  bool findNeighbors(RESULTSET &result, const ElementType *vec,
+                     const SearchParams &searchParams) const {
+    assert(vec);
+    if (this->size(*this) == 0)
+      return false;
+    if (!BaseClassRef::root_node)
+      return false;
+    float epsError = 1 + searchParams.eps;
+
+    // fixed or variable-sized container (depending on DIM)
+    distance_vector_t dists;
+    // Fill it with zeros.
+    assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim),
+           static_cast<typename distance_vector_t::value_type>(0));
+    DistanceType distsq = this->computeInitialDistances(*this, vec, dists);
+    searchLevel(result, vec, BaseClassRef::root_node, distsq, dists,
+                epsError); // "count_leaf" parameter removed since was neither
+                           // used nor returned to the user.
+    return result.full();
+  }
+
+  /**
+   * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1].
+   * Their indices are stored inside the result object. \sa radiusSearch,
+   * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility
+   * with the original FLANN interface. \return Number `N` of valid points in
+   * the result set. Only the first `N` entries in `out_indices` and
+   * `out_distances_sq` will be valid. Return may be less than `num_closest`
+   * only if the number of elements in the tree is less than `num_closest`.
+   */
+  size_t knnSearch(const ElementType *query_point, const size_t num_closest,
+                   IndexType *out_indices, DistanceType *out_distances_sq,
+                   const int /* nChecks_IGNORED */ = 10) const {
+    nanoflann::KNNResultSet<DistanceType, IndexType> resultSet(num_closest);
+    resultSet.init(out_indices, out_distances_sq);
+    this->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
+    return resultSet.size();
+  }
+
+  /**
+   * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius.
+   *  The output is given as a vector of pairs, of which the first element is a
+   * point index and the second the corresponding distance. Previous contents of
+   * \a IndicesDists are cleared.
+   *
+   *  If searchParams.sorted==true, the output list is sorted by ascending
+   * distances.
+   *
+   *  For a better performance, it is advisable to do a .reserve() on the vector
+   * if you have any wild guess about the number of expected matches.
+   *
+   *  \sa knnSearch, findNeighbors, radiusSearchCustomCallback
+   * \return The number of points within the given radius (i.e. indices.size()
+   * or dists.size() )
+   */
+  size_t
+  radiusSearch(const ElementType *query_point, const DistanceType &radius,
+               std::vector<std::pair<IndexType, DistanceType>> &IndicesDists,
+               const SearchParams &searchParams) const {
+    RadiusResultSet<DistanceType, IndexType> resultSet(radius, IndicesDists);
+    const size_t nFound =
+        radiusSearchCustomCallback(query_point, resultSet, searchParams);
+    if (searchParams.sorted)
+      std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter());
+    return nFound;
+  }
+
+  /**
+   * Just like radiusSearch() but with a custom callback class for each point
+   * found in the radius of the query. See the source of RadiusResultSet<> as a
+   * start point for your own classes. \sa radiusSearch
+   */
+  template <class SEARCH_CALLBACK>
+  size_t radiusSearchCustomCallback(
+      const ElementType *query_point, SEARCH_CALLBACK &resultSet,
+      const SearchParams &searchParams = SearchParams()) const {
+    this->findNeighbors(resultSet, query_point, searchParams);
+    return resultSet.size();
+  }
+
+  /** @} */
+
+public:
+  void computeBoundingBox(BoundingBox &bbox) {
+    resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim));
+
+    if (dataset.kdtree_get_bbox(bbox)) {
+      // Done! It was implemented in derived class
+    } else {
+      const size_t N = BaseClassRef::m_size;
+      if (!N)
+        throw std::runtime_error("[nanoflann] computeBoundingBox() called but "
+                                 "no data points found.");
+      for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
+        bbox[i].low = bbox[i].high =
+            this->dataset_get(*this, BaseClassRef::vind[0], i);
+      }
+      for (size_t k = 1; k < N; ++k) {
+        for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) {
+          if (this->dataset_get(*this, BaseClassRef::vind[k], i) < bbox[i].low)
+            bbox[i].low = this->dataset_get(*this, BaseClassRef::vind[k], i);
+          if (this->dataset_get(*this, BaseClassRef::vind[k], i) > bbox[i].high)
+            bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[k], i);
+        }
+      }
+    }
+  }
+
+  /**
+   * Performs an exact search in the tree starting from a node.
+   * \tparam RESULTSET Should be any ResultSet<DistanceType>
+   */
+  template <class RESULTSET>
+  void searchLevel(RESULTSET &result_set, const ElementType *vec,
+                   const NodePtr node, DistanceType mindistsq,
+                   distance_vector_t &dists, const float epsError) const {
+    /* If this is a leaf node, then do check and return. */
+    if ((node->child1 == NULL) && (node->child2 == NULL)) {
+      // count_leaf += (node->lr.right-node->lr.left);  // Removed since was
+      // neither used nor returned to the user.
+      DistanceType worst_dist = result_set.worstDist();
+      for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right;
+           ++i) {
+        const IndexType index = BaseClassRef::vind[i]; // reorder... : i;
+        if (treeIndex[index] == -1)
+          continue;
+        DistanceType dist = distance.evalMetric(
+            vec, index, (DIM > 0 ? DIM : BaseClassRef::dim));
+        if (dist < worst_dist) {
+          if (!result_set.addPoint(
+                  static_cast<typename RESULTSET::DistanceType>(dist),
+                  static_cast<typename RESULTSET::IndexType>(
+                      BaseClassRef::vind[i]))) {
+            // the resultset doesn't want to receive any more points, we're done
+            // searching!
+            return; // false;
+          }
+        }
+      }
+      return;
+    }
+
+    /* Which child branch should be taken first? */
+    int idx = node->node_type.sub.divfeat;
+    ElementType val = vec[idx];
+    DistanceType diff1 = val - node->node_type.sub.divlow;
+    DistanceType diff2 = val - node->node_type.sub.divhigh;
+
+    NodePtr bestChild;
+    NodePtr otherChild;
+    DistanceType cut_dist;
+    if ((diff1 + diff2) < 0) {
+      bestChild = node->child1;
+      otherChild = node->child2;
+      cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx);
+    } else {
+      bestChild = node->child2;
+      otherChild = node->child1;
+      cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx);
+    }
+
+    /* Call recursively to search next level down. */
+    searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError);
+
+    DistanceType dst = dists[idx];
+    mindistsq = mindistsq + cut_dist - dst;
+    dists[idx] = cut_dist;
+    if (mindistsq * epsError <= result_set.worstDist()) {
+      searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError);
+    }
+    dists[idx] = dst;
+  }
+
+public:
+  /**  Stores the index in a binary file.
+   *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so when
+   * loading the index object it must be constructed associated to the same
+   * source of data points used while building it. See the example:
+   * examples/saveload_example.cpp \sa loadIndex  */
+  void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); }
+
+  /**  Loads a previous index from a binary file.
+   *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so the
+   * index object must be constructed associated to the same source of data
+   * points used while building the index. See the example:
+   * examples/saveload_example.cpp \sa loadIndex  */
+  void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); }
+};
+
+/** kd-tree dynaimic index
+ *
+ * class to create multiple static index and merge their results to behave as
+ * single dynamic index as proposed in Logarithmic Approach.
+ *
+ *  Example of usage:
+ *  examples/dynamic_pointcloud_example.cpp
+ *
+ * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
+ * \tparam Distance The distance metric to use: nanoflann::metric_L1,
+ * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM
+ * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will
+ * be typically size_t or int
+ */
+template <typename Distance, class DatasetAdaptor, int DIM = -1,
+          typename IndexType = size_t>
+class KDTreeSingleIndexDynamicAdaptor {
+public:
+  typedef typename Distance::ElementType ElementType;
+  typedef typename Distance::DistanceType DistanceType;
+
+protected:
+  size_t m_leaf_max_size;
+  size_t treeCount;
+  size_t pointCount;
+
+  /**
+   * The dataset used by this index
+   */
+  const DatasetAdaptor &dataset; //!< The source of our data
+
+  std::vector<int> treeIndex; //!< treeIndex[idx] is the index of tree in which
+                              //!< point at idx is stored. treeIndex[idx]=-1
+                              //!< means that point has been removed.
+
+  KDTreeSingleIndexAdaptorParams index_params;
+
+  int dim; //!< Dimensionality of each data point
+
+  typedef KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM>
+      index_container_t;
+  std::vector<index_container_t> index;
+
+public:
+  /** Get a const ref to the internal list of indices; the number of indices is
+   * adapted dynamically as the dataset grows in size. */
+  const std::vector<index_container_t> &getAllIndices() const { return index; }
+
+private:
+  /** finds position of least significant unset bit */
+  int First0Bit(IndexType num) {
+    int pos = 0;
+    while (num & 1) {
+      num = num >> 1;
+      pos++;
+    }
+    return pos;
+  }
+
+  /** Creates multiple empty trees to handle dynamic support */
+  void init() {
+    typedef KDTreeSingleIndexDynamicAdaptor_<Distance, DatasetAdaptor, DIM>
+        my_kd_tree_t;
+    std::vector<my_kd_tree_t> index_(
+        treeCount, my_kd_tree_t(dim /*dim*/, dataset, treeIndex, index_params));
+    index = index_;
+  }
+
+public:
+  Distance distance;
+
+  /**
+   * KDTree constructor
+   *
+   * Refer to docs in README.md or online in
+   * https://github.com/jlblancoc/nanoflann
+   *
+   * The KD-Tree point dimension (the length of each point in the datase, e.g. 3
+   * for 3D points) is determined by means of:
+   *  - The \a DIM template parameter if >0 (highest priority)
+   *  - Otherwise, the \a dimensionality parameter of this constructor.
+   *
+   * @param inputData Dataset with the input features
+   * @param params Basically, the maximum leaf node size
+   */
+  KDTreeSingleIndexDynamicAdaptor(const int dimensionality,
+                                  const DatasetAdaptor &inputData,
+                                  const KDTreeSingleIndexAdaptorParams &params =
+                                      KDTreeSingleIndexAdaptorParams(),
+                                  const size_t maximumPointCount = 1000000000U)
+      : dataset(inputData), index_params(params), distance(inputData) {
+    treeCount = static_cast<size_t>(std::log2(maximumPointCount));
+    pointCount = 0U;
+    dim = dimensionality;
+    treeIndex.clear();
+    if (DIM > 0)
+      dim = DIM;
+    m_leaf_max_size = params.leaf_max_size;
+    init();
+    const size_t num_initial_points = dataset.kdtree_get_point_count();
+    if (num_initial_points > 0) {
+      addPoints(0, num_initial_points - 1);
+    }
+  }
+
+  /** Deleted copy constructor*/
+  KDTreeSingleIndexDynamicAdaptor(
+      const KDTreeSingleIndexDynamicAdaptor<Distance, DatasetAdaptor, DIM,
+                                            IndexType> &) = delete;
+
+  /** Add points to the set, Inserts all points from [start, end] */
+  void addPoints(IndexType start, IndexType end) {
+    size_t count = end - start + 1;
+    treeIndex.resize(treeIndex.size() + count);
+    for (IndexType idx = start; idx <= end; idx++) {
+      int pos = First0Bit(pointCount);
+      index[pos].vind.clear();
+      treeIndex[pointCount] = pos;
+      for (int i = 0; i < pos; i++) {
+        for (int j = 0; j < static_cast<int>(index[i].vind.size()); j++) {
+          index[pos].vind.push_back(index[i].vind[j]);
+          if (treeIndex[index[i].vind[j]] != -1)
+            treeIndex[index[i].vind[j]] = pos;
+        }
+        index[i].vind.clear();
+        index[i].freeIndex(index[i]);
+      }
+      index[pos].vind.push_back(idx);
+      index[pos].buildIndex();
+      pointCount++;
+    }
+  }
+
+  /** Remove a point from the set (Lazy Deletion) */
+  void removePoint(size_t idx) {
+    if (idx >= pointCount)
+      return;
+    treeIndex[idx] = -1;
+  }
+
+  /**
+   * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
+   * inside the result object.
+   *
+   * Params:
+   *     result = the result object in which the indices of the
+   * nearest-neighbors are stored vec = the vector for which to search the
+   * nearest neighbors
+   *
+   * \tparam RESULTSET Should be any ResultSet<DistanceType>
+   * \return  True if the requested neighbors could be found.
+   * \sa knnSearch, radiusSearch
+   */
+  template <typename RESULTSET>
+  bool findNeighbors(RESULTSET &result, const ElementType *vec,
+                     const SearchParams &searchParams) const {
+    for (size_t i = 0; i < treeCount; i++) {
+      index[i].findNeighbors(result, &vec[0], searchParams);
+    }
+    return result.full();
+  }
+};
+
+/** An L2-metric KD-tree adaptor for working with data directly stored in an
+ * Eigen Matrix, without duplicating the data storage. Each row in the matrix
+ * represents a point in the state space.
+ *
+ *  Example of usage:
+ * \code
+ * 	Eigen::Matrix<num_t,Dynamic,Dynamic>  mat;
+ * 	// Fill out "mat"...
+ *
+ * 	typedef KDTreeEigenMatrixAdaptor< Eigen::Matrix<num_t,Dynamic,Dynamic> >
+ * my_kd_tree_t; const int max_leaf = 10; my_kd_tree_t   mat_index(mat, max_leaf
+ * ); mat_index.index->buildIndex(); mat_index.index->... \endcode
+ *
+ *  \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality
+ * for the points in the data set, allowing more compiler optimizations. \tparam
+ * Distance The distance metric to use: nanoflann::metric_L1,
+ * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
+ */
+template <class MatrixType, int DIM = -1, class Distance = nanoflann::metric_L2>
+struct KDTreeEigenMatrixAdaptor {
+  typedef KDTreeEigenMatrixAdaptor<MatrixType, DIM, Distance> self_t;
+  typedef typename MatrixType::Scalar num_t;
+  typedef typename MatrixType::Index IndexType;
+  typedef
+      typename Distance::template traits<num_t, self_t>::distance_t metric_t;
+  typedef KDTreeSingleIndexAdaptor<metric_t, self_t,
+                                   MatrixType::ColsAtCompileTime, IndexType>
+      index_t;
+
+  index_t *index; //! The kd-tree index for the user to call its methods as
+                  //! usual with any other FLANN index.
+
+  /// Constructor: takes a const ref to the matrix object with the data points
+  KDTreeEigenMatrixAdaptor(const size_t dimensionality,
+                           const std::reference_wrapper<const MatrixType> &mat,
+                           const int leaf_max_size = 10)
+      : m_data_matrix(mat) {
+    const auto dims = mat.get().cols();
+    if (size_t(dims) != dimensionality)
+      throw std::runtime_error(
+          "Error: 'dimensionality' must match column count in data matrix");
+    if (DIM > 0 && int(dims) != DIM)
+      throw std::runtime_error(
+          "Data set dimensionality does not match the 'DIM' template argument");
+    index =
+        new index_t(static_cast<int>(dims), *this /* adaptor */,
+                    nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
+    index->buildIndex();
+  }
+
+public:
+  /** Deleted copy constructor */
+  KDTreeEigenMatrixAdaptor(const self_t &) = delete;
+
+  ~KDTreeEigenMatrixAdaptor() { delete index; }
+
+  const std::reference_wrapper<const MatrixType> m_data_matrix;
+
+  /** Query for the \a num_closest closest points to a given point (entered as
+   * query_point[0:dim-1]). Note that this is a short-cut method for
+   * index->findNeighbors(). The user can also call index->... methods as
+   * desired. \note nChecks_IGNORED is ignored but kept for compatibility with
+   * the original FLANN interface.
+   */
+  inline void query(const num_t *query_point, const size_t num_closest,
+                    IndexType *out_indices, num_t *out_distances_sq,
+                    const int /* nChecks_IGNORED */ = 10) const {
+    nanoflann::KNNResultSet<num_t, IndexType> resultSet(num_closest);
+    resultSet.init(out_indices, out_distances_sq);
+    index->findNeighbors(resultSet, query_point, nanoflann::SearchParams());
+  }
+
+  /** @name Interface expected by KDTreeSingleIndexAdaptor
+   * @{ */
+
+  const self_t &derived() const { return *this; }
+  self_t &derived() { return *this; }
+
+  // Must return the number of data points
+  inline size_t kdtree_get_point_count() const {
+    return m_data_matrix.get().rows();
+  }
+
+  // Returns the dim'th component of the idx'th point in the class:
+  inline num_t kdtree_get_pt(const IndexType idx, size_t dim) const {
+    return m_data_matrix.get().coeff(idx, IndexType(dim));
+  }
+
+  // Optional bounding-box computation: return false to default to a standard
+  // bbox computation loop.
+  //   Return true if the BBOX was already computed by the class and returned in
+  //   "bb" so it can be avoided to redo it again. Look at bb.size() to find out
+  //   the expected dimensionality (e.g. 2 or 3 for point clouds)
+  template <class BBOX> bool kdtree_get_bbox(BBOX & /*bb*/) const {
+    return false;
+  }
+
+  /** @} */
+
+}; // end of KDTreeEigenMatrixAdaptor
+   /** @} */
 
 /** @} */ // end of grouping
-} // end of NS
-
+} // namespace nanoflann
 
 #endif /* NANOFLANN_HPP_ */