Skip to content
2 changes: 2 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

=== Enhancements

* Compute outlier feature influence via the Gateaux derivative to improve attribution
for high dimension vectors. (See {ml-pull}2256[#2256].)
* Improve classification and regression model train runtimes for data sets with many
numeric features. (See {ml-pull}2380[#2380], {ml-pull}2388[#2388] and {ml-pull}2388[#2390].)
* Increase the limit on the maximum number of classes to 100 for training classification
Expand Down
350 changes: 180 additions & 170 deletions include/maths/analytics/COutliers.h

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions include/maths/common/CLinearAlgebra.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,18 @@ struct SConstant<CSymmetricMatrixNxN<T, N>> {
}
};

//! \brief Gets the identity matrix with specified dimesion.
template<typename T, std::size_t N>
struct SIdentity<CSymmetricMatrixNxN<T, N>> {
static CSymmetricMatrixNxN<T, N> get(std::size_t /*dimension*/) {
CSymmetricMatrixNxN<T, N> result(T{0});
for (std::size_t i = 0; i < N; ++i) {
result(i, i) = T{1};
}
return result;
}
};

// ************************ HEAP SYMMETRIC MATRIX ************************

//! \brief A heap based lightweight dense symmetric matrix class.
Expand Down Expand Up @@ -765,6 +777,18 @@ struct SConstant<CSymmetricMatrix<T>> {
}
};

//! \brief Gets the identity matrix with specified dimesion.
template<typename T>
struct SIdentity<CSymmetricMatrix<T>> {
static CSymmetricMatrix<T> get(std::size_t dimension) {
CSymmetricMatrix<T> result(dimension, T{0});
for (std::size_t i = 0; i < dimension; ++i) {
result(i, i) = T{1};
}
return result;
}
};

namespace linear_algebra_detail {

//! \brief Common vector functionality for variable storage type.
Expand Down
25 changes: 21 additions & 4 deletions include/maths/common/CLinearAlgebraEigen.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ class CDenseMatrix : public Eigen::Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic
}
};

//! Free efficient efficient swap for ADLU.
//! Free efficient swap for ADLU.
template<typename SCALAR>
void swap(CDenseMatrix<SCALAR>& lhs, CDenseMatrix<SCALAR>& rhs) {
lhs.swap(rhs);
Expand All @@ -257,6 +257,14 @@ struct SConstant<CDenseMatrix<SCALAR>> {
}
};

//! \brief Gets the identity dense square matrix with specified dimension.
template<typename SCALAR>
struct SIdentity<CDenseMatrix<SCALAR>> {
static CDenseMatrix<SCALAR> get(std::ptrdiff_t dimension) {
return CDenseMatrix<SCALAR>::Identity(dimension, dimension);
}
};

//! \brief Decorates an Eigen column vector with some useful methods.
template<typename SCALAR>
class CDenseVector : public Eigen::Matrix<SCALAR, Eigen::Dynamic, 1> {
Expand Down Expand Up @@ -355,7 +363,7 @@ class CDenseVector : public Eigen::Matrix<SCALAR, Eigen::Dynamic, 1> {
template<typename SCALAR>
const std::string CDenseVector<SCALAR>::DENSE_VECTOR_TAG{"dense_vector"};

//! Free efficient efficient swap for ADLU.
//! Free efficient swap for ADLU.
template<typename SCALAR>
void swap(CDenseVector<SCALAR>& lhs, CDenseVector<SCALAR>& rhs) {
lhs.swap(rhs);
Expand Down Expand Up @@ -445,7 +453,7 @@ class CMemoryMappedDenseMatrix
}
};

//! Free efficient efficient swap for ADLU.
//! Free efficient swap for ADLU.
template<typename SCALAR, Eigen::AlignmentType ALIGNMENT>
void swap(CMemoryMappedDenseMatrix<SCALAR, ALIGNMENT>& lhs,
CMemoryMappedDenseMatrix<SCALAR, ALIGNMENT>& rhs) {
Expand All @@ -466,6 +474,15 @@ struct SConstant<CMemoryMappedDenseMatrix<SCALAR, ALIGNMENT>> {
}
};

//! \brief Gets the identity dense matrix with specified dimension.
template<typename SCALAR, Eigen::AlignmentType ALIGNMENT>
struct SIdentity<CMemoryMappedDenseMatrix<SCALAR, ALIGNMENT>> {
static auto get(std::ptrdiff_t dimension)
-> decltype(SIdentity<CDenseMatrix<SCALAR>>::get(dimension)) {
return SIdentity<CDenseMatrix<SCALAR>>::get(dimension);
}
};

//! \brief Decorates an Eigen::Map of a dense vector with some useful methods
//! and changes default copy semantics to shallow.
//!
Expand Down Expand Up @@ -568,7 +585,7 @@ class CMemoryMappedDenseVector
}
};

//! Free efficient efficient swap for ADLU.
//! Free efficient swap for ADLU.
template<typename SCALAR, Eigen::AlignmentType ALIGNMENT>
void swap(CMemoryMappedDenseVector<SCALAR, ALIGNMENT>& lhs,
CMemoryMappedDenseVector<SCALAR, ALIGNMENT>& rhs) {
Expand Down
17 changes: 13 additions & 4 deletions include/maths/common/CLinearAlgebraFwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,25 @@ class MATHS_COMMON_EXPORT CLinearAlgebra {
static const char DELIMITER = ',';
};

//! \brief Get a constant initialized version of \p TYPE.
//! \brief Gets a constant initialized version of \p TYPE.
//!
//! Each of our vector and matrix types provides a specialization
//! of this class and define a static get method which takes the
//! dimension(s) and the constant value.
//! Each of our vector and matrix types provides a specialization of this
//! class and define a static get method which takes the dimension(s) and
//! the constant value.
template<typename TYPE>
struct SConstant {
static_assert(sizeof(TYPE) < 0, "Missing specialisation of SConstant");
};

//! \brief Gets an identity version of \p MATRIX.
//!
//! Each matrix types provides a specialization of this class and define
//! a static get method which takes the dimension and the constant value.
template<typename MATRIX>
struct SIdentity {
static_assert(sizeof(MATRIX) < 0, "Missing specialisation of SIdentity");
};

template<typename T, std::size_t N>
class CVectorNx1;
template<typename T, std::size_t N>
Expand Down
18 changes: 18 additions & 0 deletions include/maths/common/CLinearAlgebraShims.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,24 @@ std::size_t dimension(const CAnnotatedVector<VECTOR, ANNOTATION>& x) {
return dimension(static_cast<const VECTOR&>(x));
}

//! Get the number of columns of a matrix.
template<typename MATRIX>
std::size_t columns(const MATRIX& m) {
return m.columns();
}

//! Get the Euclidean norm of an Eigen dense vector.
template<typename SCALAR>
std::size_t columns(const CDenseMatrix<SCALAR>& x) {
return x.cols();
}

//! Get the Euclidean norm of an Eigen memory mapped matrix.
template<typename SCALAR, Eigen::AlignmentType ALIGNMENT>
std::size_t columns(const CMemoryMappedDenseMatrix<SCALAR, ALIGNMENT>& x) {
return x.cols();
}

//! Get the concomitant zero vector.
template<typename VECTOR>
auto zero(const VECTOR& x) -> decltype(SConstant<VECTOR>::get(dimension(x), 0)) {
Expand Down
2 changes: 1 addition & 1 deletion lib/api/unittest/CDataFrameAnalyzerOutlierTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ BOOST_AUTO_TEST_CASE(testRunOutlierDetectionPartitioned) {

BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFONumberPartitions) > 1);
// Allow a 20% margin
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) < 150000);
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) < 160000);
BOOST_TEST_REQUIRE(
core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) <
(120 * core::CProgramCounters::counter(counter_t::E_DFOEstimatedPeakMemoryUsage)) / 100);
Expand Down
Loading