From 6649c9575d720d60c20a6d10623b6a810f1000f6 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Sun, 15 May 2022 15:20:33 +0100 Subject: [PATCH 01/12] Improve non-negative time series modelling --- include/maths/common/CLinearAlgebra.h | 70 ++++-- include/maths/common/CLinearAlgebraShims.h | 16 +- .../maths/time_series/CCalendarComponent.h | 6 +- .../time_series/CDecompositionComponent.h | 13 +- .../maths/time_series/CSeasonalComponent.h | 4 +- .../time_series/CTimeSeriesDecomposition.h | 47 ++-- .../CTimeSeriesDecompositionInterface.h | 36 +-- .../CTimeSeriesDecompositionStub.h | 24 +- include/maths/time_series/CTrendComponent.h | 31 +-- .../common/unittest/CLinearAlgebraTest.cc | 187 ++++++++------ lib/maths/time_series/CCalendarComponent.cc | 8 +- .../time_series/CDecompositionComponent.cc | 31 +-- lib/maths/time_series/CSeasonalComponent.cc | 23 +- .../time_series/CTimeSeriesDecomposition.cc | 196 +++++++-------- .../CTimeSeriesDecompositionDetail.cc | 30 +-- .../CTimeSeriesDecompositionStub.cc | 25 +- lib/maths/time_series/CTimeSeriesModel.cc | 195 +++++++------- lib/maths/time_series/CTrendComponent.cc | 80 +++--- .../unittest/CCalendarComponentTest.cc | 4 +- .../unittest/CSeasonalComponentTest.cc | 119 ++++----- .../unittest/CTimeSeriesDecompositionTest.cc | 237 +++++++++--------- .../unittest/CTimeSeriesModelTest.cc | 58 ++--- .../unittest/CTrendComponentTest.cc | 10 +- lib/model/CInterimBucketCorrector.cc | 7 +- 24 files changed, 745 insertions(+), 712 deletions(-) diff --git a/include/maths/common/CLinearAlgebra.h b/include/maths/common/CLinearAlgebra.h index f4368360f1..8f0d0db194 100644 --- a/include/maths/common/CLinearAlgebra.h +++ b/include/maths/common/CLinearAlgebra.h @@ -30,6 +30,7 @@ #include #include +#include BOOST_GEOMETRY_REGISTER_STD_ARRAY_CS(cs::cartesian) @@ -168,7 +169,7 @@ struct SSymmetricMatrix { //! The Frobenius norm. double frobenius(std::size_t d) const { double result = 0.0; - for (std::size_t i = 0u, i_ = 0; i < d; ++i, ++i_) { + for (std::size_t i = 0, i_ = 0; i < d; ++i, ++i_) { for (std::size_t j = 0; j < i; ++j, ++i_) { result += 2.0 * m_LowerTriangle[i_] * m_LowerTriangle[i_]; } @@ -177,10 +178,17 @@ struct SSymmetricMatrix { return std::sqrt(result); } + //! Get the mean of the matrix elements. + double mean(std::size_t d) const { + return (2.0 * std::accumulate(m_LowerTriangle.begin(), m_LowerTriangle.end(), 0.0) - + this->trace(d)) / + static_cast(d * d); + } + //! Convert to the MATRIX representation. template inline MATRIX& toType(std::size_t d, MATRIX& result) const { - for (std::size_t i = 0u, i_ = 0; i < d; ++i) { + for (std::size_t i = 0, i_ = 0; i < d; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { result(i, j) = result(j, i) = m_LowerTriangle[i_]; } @@ -267,7 +275,7 @@ class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrix //! Construct from C-style array of arrays. explicit CSymmetricMatrixNxN(const TArray& m) { - for (std::size_t i = 0u, i_ = 0; i < N; ++i) { + for (std::size_t i = 0, i_ = 0; i < N; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m[i][j]; } @@ -276,7 +284,7 @@ class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrix //! Construct from a vector of vectors. explicit CSymmetricMatrixNxN(const TVecVec& m) { - for (std::size_t i = 0u, i_ = 0; i < N; ++i) { + for (std::size_t i = 0, i_ = 0; i < N; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m[i][j]; } @@ -286,7 +294,7 @@ class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrix //! Construct from a small vector of small vectors. template explicit CSymmetricMatrixNxN(const core::CSmallVectorBase>& m) { - for (std::size_t i = 0u, i_ = 0; i < N; ++i) { + for (std::size_t i = 0, i_ = 0; i < N; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m[i][j]; } @@ -319,7 +327,7 @@ class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrix //! Assignment if the underlying type is implicitly convertible. template - const CSymmetricMatrixNxN& operator=(const CSymmetricMatrixNxN& other) { + CSymmetricMatrixNxN& operator=(const CSymmetricMatrixNxN& other) { this->assign(other.base()); return *this; } @@ -428,6 +436,9 @@ class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrix //! Get the Frobenius norm. double frobenius() const { return this->TBase::frobenius(N); } + //! Get the mean of the matrix elements. + double mean() const { return this->TBase::mean(N); } + //! Convert to a vector of vectors. template inline VECTOR_OF_VECTORS toVectors() const { @@ -454,7 +465,7 @@ class CSymmetricMatrixNxN : private boost::equality_comparable< CSymmetricMatrix } //! Get a checksum for the matrix. - uint64_t checksum() const { return this->TBase::checksum(); } + std::uint64_t checksum() const { return this->TBase::checksum(); } }; //! \brief Gets a constant symmetric matrix with specified dimension. @@ -518,7 +529,7 @@ class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix public: //! Set to multiple of ones matrix. - explicit CSymmetricMatrix(std::size_t d = 0u, T v = T(0)) : m_D(d) { + explicit CSymmetricMatrix(std::size_t d = 0, T v = T(0)) : m_D(d) { if (d > 0) { TBase::m_LowerTriangle.resize(d * (d + 1) / 2, v); } @@ -527,7 +538,7 @@ class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix //! Construct from C-style array of arrays. explicit CSymmetricMatrix(const TArray& m) : m_D(m.size()) { TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - for (std::size_t i = 0u, i_ = 0; i < m_D; ++i) { + for (std::size_t i = 0, i_ = 0; i < m_D; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m[i][j]; } @@ -539,7 +550,7 @@ class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix explicit CSymmetricMatrix(const core::CSmallVectorBase>& m) : m_D(m.size()) { TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); - for (std::size_t i = 0u, i_ = 0; i < m_D; ++i) { + for (std::size_t i = 0, i_ = 0; i < m_D; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = m[i][j]; } @@ -581,7 +592,7 @@ class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix //! \note Because this is template it is *not* an copy assignment //! operator so this class has implicit move semantics. template - const CSymmetricMatrix& operator=(const CSymmetricMatrix& other) { + CSymmetricMatrix& operator=(const CSymmetricMatrix& other) { m_D = other.m_D; TBase::m_LowerTriangle.resize(m_D * (m_D + 1) / 2); this->assign(other.base()); @@ -702,6 +713,9 @@ class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix //! The Frobenius norm. double frobenius() const { return this->TBase::frobenius(m_D); } + //! Get the mean of the matrix elements. + double mean() const { return this->TBase::mean(m_D); } + //! Convert to a vector of vectors. template inline VECTOR_OF_VECTORS toVectors() const { @@ -728,9 +742,9 @@ class CSymmetricMatrix : private boost::equality_comparable< CSymmetricMatrix } //! Get a checksum for the matrix. - uint64_t checksum() const { + std::uint64_t checksum() const { return core::CHashing::hashCombine(this->TBase::checksum(), - static_cast(m_D)); + static_cast(m_D)); } private: @@ -867,6 +881,12 @@ struct SVector { return result; } + //! Get the mean of the vector components. + double mean() const { + return std::accumulate(m_X.begin(), m_X.end(), 0.0) / + static_cast(m_X.size()); + } + //! Convert to the VECTOR representation. template inline VECTOR& toType(VECTOR& result) const { @@ -1000,7 +1020,7 @@ class CVectorNx1 : private boost::equality_comparable< CVectorNx1, //! Assignment if the underlying type is implicitly convertible. template - const CVectorNx1& operator=(const CVectorNx1& other) { + CVectorNx1& operator=(const CVectorNx1& other) { this->assign(other.base()); return *this; } @@ -1118,6 +1138,9 @@ class CVectorNx1 : private boost::equality_comparable< CVectorNx1, //! Euclidean norm. double euclidean() const { return std::sqrt(this->inner(*this)); } + //! Get the mean of the vector components. + double mean() const { return this->TBase::mean(); } + //! Convert to a vector on a different underlying type. template inline CVectorNx1 to() const { @@ -1143,7 +1166,7 @@ class CVectorNx1 : private boost::equality_comparable< CVectorNx1, } //! Get a checksum of this vector's components. - uint64_t checksum() const { return this->TBase::checksum(); } + std::uint64_t checksum() const { return this->TBase::checksum(); } //! Get the smallest possible vector. static const CVectorNx1& smallest() { @@ -1164,14 +1187,14 @@ CSymmetricMatrixNxN::CSymmetricMatrixNxN(ESymmetricMatrixType type, const CVectorNx1& x) { switch (type) { case E_OuterProduct: - for (std::size_t i = 0u, i_ = 0; i < N; ++i) { + for (std::size_t i = 0, i_ = 0; i < N; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = x(i) * x(j); } } break; case E_Diagonal: - for (std::size_t i = 0u, i_ = 0; i < N; ++i) { + for (std::size_t i = 0, i_ = 0; i < N; ++i) { for (std::size_t j = 0; j <= i; ++j, ++i_) { TBase::m_LowerTriangle[i_] = i == j ? x(i) : T(0); } @@ -1233,7 +1256,7 @@ class CVector : private boost::equality_comparable< CVector, public: //! Set to multiple of ones vector. - explicit CVector(std::size_t d = 0u, T v = T(0)) { + explicit CVector(std::size_t d = 0, T v = T(0)) { if (d > 0) { TBase::m_X.resize(d, v); } @@ -1284,7 +1307,7 @@ class CVector : private boost::equality_comparable< CVector, //! \note Because this is template it is *not* an copy assignment //! operator so this class has implicit move semantics. template - const CVector& operator=(const CVector& other) { + CVector& operator=(const CVector& other) { TBase::m_X.resize(other.dimension()); this->TBase::assign(other.base()); return *this; @@ -1425,6 +1448,9 @@ class CVector : private boost::equality_comparable< CVector, //! Euclidean norm. double euclidean() const { return std::sqrt(this->inner(*this)); } + //! Get the mean of the vector components. + double mean() const { return this->TBase::mean(); } + //! Convert to a vector on a different underlying type. template inline CVector to() const { @@ -1447,7 +1473,7 @@ class CVector : private boost::equality_comparable< CVector, } //! Get a checksum of this vector's components. - uint64_t checksum() const { return this->TBase::checksum(); } + std::uint64_t checksum() const { return this->TBase::checksum(); } //! Get the smallest possible vector. static const CVector& smallest(std::size_t d) { @@ -1468,14 +1494,14 @@ CSymmetricMatrix::CSymmetricMatrix(ESymmetricMatrixType type, const CVector::Type L1(const CAnnotatedVector return L1(static_cast(x)); } +//! Get the Manhattan norm of one of our internal vector classes. +template +double mean(const TENSOR& x) { + return x.mean(); +} + +//! Get the mean of the elements of an annotated vector. +template +double mean(const CAnnotatedVector& x) { + return mean(static_cast(x)); +} + //! Get the Frobenius norm of one of our internal matrices. template typename SCoordinate::Type frobenius(const MATRIX& m) { return m.frobenius(); } -//! Get the Euclidean norm of an Eigen dense vector. +//! Get the Frobenius norm of an Eigen dense matrix. template SCALAR frobenius(const CDenseMatrix& x) { return x.norm(); } -//! Get the Euclidean norm of an Eigen memory mapped matrix. +//! Get the Frobenius norm of an Eigen memory mapped matrix. template SCALAR frobenius(const CMemoryMappedDenseMatrix& x) { return x.norm(); diff --git a/include/maths/time_series/CCalendarComponent.h b/include/maths/time_series/CCalendarComponent.h index 0db82e5586..9179a5ab12 100644 --- a/include/maths/time_series/CCalendarComponent.h +++ b/include/maths/time_series/CCalendarComponent.h @@ -127,7 +127,7 @@ class MATHS_TIME_SERIES_EXPORT CCalendarComponent : private CDecompositionCompon //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the variance //! as a percentage. - TDoubleDoublePr value(core_t::TTime time, double confidence) const; + TVector2x1 value(core_t::TTime time, double confidence) const; //! Get the mean value of the component. double meanValue() const; @@ -137,13 +137,13 @@ class MATHS_TIME_SERIES_EXPORT CCalendarComponent : private CDecompositionCompon //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the //! variance as a percentage. - TDoubleDoublePr variance(core_t::TTime time, double confidence) const; + TVector2x1 variance(core_t::TTime time, double confidence) const; //! Get the mean variance of the component residuals. double meanVariance() const; //! Get a checksum for this object. - uint64_t checksum(uint64_t seed = 0) const; + std::uint64_t checksum(std::uint64_t seed = 0) const; //! Debug the memory used by this component. void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; diff --git a/include/maths/time_series/CDecompositionComponent.h b/include/maths/time_series/CDecompositionComponent.h index cc7f7497d3..5b667b5b71 100644 --- a/include/maths/time_series/CDecompositionComponent.h +++ b/include/maths/time_series/CDecompositionComponent.h @@ -15,13 +15,12 @@ #include #include +#include #include #include #include -#include - #include #include #include @@ -38,7 +37,7 @@ namespace time_series { //! \brief Common functionality used by our decomposition component classes. class MATHS_TIME_SERIES_EXPORT CDecompositionComponent { public: - using TDoubleDoublePr = maths_t::TDoubleDoublePr; + using TVector2x1 = common::CVectorNx1; using TDoubleVec = std::vector; using TFloatVec = std::vector; using TSplineCRef = common::CSpline, @@ -104,7 +103,7 @@ class MATHS_TIME_SERIES_EXPORT CDecompositionComponent { common::CSplineTypes::EBoundaryCondition boundary); //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const; + std::uint64_t checksum(std::uint64_t seed) const; //! Debug the memory used by the splines. void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; @@ -165,7 +164,7 @@ class MATHS_TIME_SERIES_EXPORT CDecompositionComponent { //! \param[in] n The bucket count containing \p offset. //! \param[in] confidence The symmetric confidence interval for the variance //! as a percentage. - TDoubleDoublePr value(double offset, double n, double confidence) const; + TVector2x1 value(double offset, double n, double confidence) const; //! Get the mean value of the function. double meanValue() const; @@ -176,7 +175,7 @@ class MATHS_TIME_SERIES_EXPORT CDecompositionComponent { //! \param[in] n The bucket count containing \p offset. //! \param[in] confidence The symmetric confidence interval for the //! variance as a percentage. - TDoubleDoublePr variance(double offset, double n, double confidence) const; + TVector2x1 variance(double offset, double n, double confidence) const; //! Get the mean variance of the function residuals. double meanVariance() const; @@ -197,7 +196,7 @@ class MATHS_TIME_SERIES_EXPORT CDecompositionComponent { const CPackedSplines& splines() const; //! Get a checksum for this object. - uint64_t checksum(uint64_t seed) const; + std::uint64_t checksum(std::uint64_t seed) const; private: //! The minimum permitted size for the points sketch. diff --git a/include/maths/time_series/CSeasonalComponent.h b/include/maths/time_series/CSeasonalComponent.h index 0cd5556b7e..88c457e4a2 100644 --- a/include/maths/time_series/CSeasonalComponent.h +++ b/include/maths/time_series/CSeasonalComponent.h @@ -167,7 +167,7 @@ class MATHS_TIME_SERIES_EXPORT CSeasonalComponent : private CDecompositionCompon //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the variance //! as a percentage. - TDoubleDoublePr value(core_t::TTime time, double confidence) const; + TVector2x1 value(core_t::TTime time, double confidence) const; //! Get the mean value of the component. double meanValue() const; @@ -188,7 +188,7 @@ class MATHS_TIME_SERIES_EXPORT CSeasonalComponent : private CDecompositionCompon //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the //! variance as a percentage. - TDoubleDoublePr variance(core_t::TTime time, double confidence) const; + TVector2x1 variance(core_t::TTime time, double confidence) const; //! Get the mean variance of the component residuals. double meanVariance() const; diff --git a/include/maths/time_series/CTimeSeriesDecomposition.h b/include/maths/time_series/CTimeSeriesDecomposition.h index a55dad4f55..cd9e4e703c 100644 --- a/include/maths/time_series/CTimeSeriesDecomposition.h +++ b/include/maths/time_series/CTimeSeriesDecomposition.h @@ -131,15 +131,9 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the prediction //! the baseline as a percentage. - //! \param[in] components The components to include in the baseline. - //! \param[in] removedSeasonalMask A bit mask of specific seasonal components - //! to remove. This is only intended for use by CTimeSeriesTestForSeasonlity. - //! \param[in] smooth Detail do not supply. - maths_t::TDoubleDoublePr value(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - const TBoolVec& removedSeasonalMask = {}, - bool smooth = true) const override; + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. + TVector2x1 value(core_t::TTime time, double confidence, bool isNonNegative) const override; //! Get a function which returns the decomposition value as a function of time. //! @@ -159,29 +153,33 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition //! \param[in] step The time increment. //! \param[in] confidence The forecast confidence interval. //! \param[in] minimumScale The minimum permitted seasonal scale. + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. //! \param[in] writer Forecast results are passed to this callback. void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, double minimumScale, + bool isNonNegative, const TWriteForecastResult& writer) override; //! Remove the prediction of the component models at \p time from \p value. //! //! \param[in] time The time of \p value. + //! \param[in] value The value from which to remove the prediction. //! \param[in] confidence The prediction confidence interval as a percentage. //! The closest point to \p value in the confidence interval is removed. + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. //! \param[in] maximumTimeShift The maximum amount by which we will shift //! \p time in order to minimize the difference between the prediction and //! \p value. - //! \param[in] components A bit mask of the type of components to remove. - //! \note That detrending preserves the time series mean. double detrend(core_t::TTime time, double value, double confidence, - core_t::TTime maximumTimeShift = 0, - int components = E_All) const override; + bool isNonNegative, + core_t::TTime maximumTimeShift = 0) const override; //! Get the mean variance of the baseline. double meanVariance() const override; @@ -192,10 +190,7 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition //! \param[in] variance The variance of the distribution to scale. //! \param[in] confidence The symmetric confidence interval for the variance //! scale as a percentage. - maths_t::TDoubleDoublePr varianceScaleWeight(core_t::TTime time, - double variance, - double confidence, - bool smooth = true) const override; + TVector2x1 varianceScaleWeight(core_t::TTime time, double variance, double confidence) const override; //! Get the count weight to apply at \p time. double countWeight(core_t::TTime time) const override; @@ -204,7 +199,10 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition double winsorisationDerate(core_t::TTime time) const override; //! Get the prediction residuals in a recent time window. - TFloatMeanAccumulatorVec residuals() const override; + //! + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. + TFloatMeanAccumulatorVec residuals(bool isNonNegative) const override; //! Roll time forwards by \p skipInterval. void skipTime(core_t::TTime skipInterval) override; @@ -241,10 +239,21 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition bool acceptRestoreTraverser(const common::SDistributionRestoreParams& params, core::CStateRestoreTraverser& traverser); + //! Get the predicted value of the time series at \p time. + TVector2x1 value(core_t::TTime time, + double confidence, + int components, + const TBoolVec& removedSeasonalMask = {}, + bool smooth = true) const; + + //! Compute the variance scale weight to apply at \p time. + TVector2x1 + varianceScaleWeight(core_t::TTime time, double variance, double confidence, bool smooth) const; + //! The correction to produce a smooth join between periodic //! repeats and partitions. template - maths_t::TDoubleDoublePr smooth(const F& f, core_t::TTime time, int components) const; + TVector2x1 smooth(const F& f, core_t::TTime time, int components) const; private: //! The time over which discontinuities between weekdays diff --git a/include/maths/time_series/CTimeSeriesDecompositionInterface.h b/include/maths/time_series/CTimeSeriesDecompositionInterface.h index 82e81f5fea..a65db1c192 100644 --- a/include/maths/time_series/CTimeSeriesDecompositionInterface.h +++ b/include/maths/time_series/CTimeSeriesDecompositionInterface.h @@ -57,6 +57,7 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionTypes { class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionInterface : public CTimeSeriesDecompositionTypes { public: + using TVector2x1 = common::CVectorNx1; using TDouble3Vec = core::CSmallVector; using TDouble3VecVec = std::vector; using TWeights = maths_t::CUnitWeights; @@ -122,14 +123,10 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionInterface //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the prediction //! the baseline as a percentage. - //! \param[in] components The components to include in the baseline. - //! \param[in] removedSeasonalMask A bit mask of specific seasonal components - //! to remove. This is only intended for use by CTimeSeriesTestForSeasonlity. - virtual maths_t::TDoubleDoublePr value(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - const TBoolVec& removedSeasonalMask = {}, - bool smooth = true) const = 0; + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. + virtual TVector2x1 + value(core_t::TTime time, double confidence, bool isNonNegative) const = 0; //! Get the maximum interval for which the time series can be forecast. virtual core_t::TTime maximumForecastInterval() const = 0; @@ -141,29 +138,33 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionInterface //! \param[in] step The time increment. //! \param[in] confidence The forecast confidence interval. //! \param[in] minimumScale The minimum permitted seasonal scale. + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. //! \param[in] writer Forecast results are passed to this callback. virtual void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, double minimumScale, + bool isNonNegative, const TWriteForecastResult& writer) = 0; //! Remove the prediction of the component models at \p time from \p value. //! //! \param[in] time The time of \p value. + //! \param[in] value The value from which to remove the prediction. //! \param[in] confidence The prediction confidence interval as a percentage. //! The closest point to \p value in the confidence interval is removed. + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. //! \param[in] maximumTimeShift The maximum amount by which we will shift //! \p time in order to minimize the difference between the prediction and //! \p value. - //! \param[in] components A bit mask of the type of components to remove. - //! \note That detrending preserves the time series mean. virtual double detrend(core_t::TTime time, double value, double confidence, - core_t::TTime maximumTimeShift = 0, - int components = E_All) const = 0; + bool isNonNegative, + core_t::TTime maximumTimeShift = 0) const = 0; //! Get the mean variance of the baseline. virtual double meanVariance() const = 0; @@ -174,10 +175,8 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionInterface //! \param[in] variance The variance of the distribution to scale. //! \param[in] confidence The symmetric confidence interval for the variance //! scale as a percentage. - virtual maths_t::TDoubleDoublePr varianceScaleWeight(core_t::TTime time, - double variance, - double confidence, - bool smooth = true) const = 0; + virtual TVector2x1 + varianceScaleWeight(core_t::TTime time, double variance, double confidence) const = 0; //! Get the count weight to apply at \p time. virtual double countWeight(core_t::TTime time) const = 0; @@ -186,7 +185,10 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionInterface virtual double winsorisationDerate(core_t::TTime time) const = 0; //! Get the prediction residuals in a recent time window. - virtual TFloatMeanAccumulatorVec residuals() const = 0; + //! + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. + virtual TFloatMeanAccumulatorVec residuals(bool isNonNegative) const = 0; //! Roll time forwards by \p skipInterval. virtual void skipTime(core_t::TTime skipInterval) = 0; diff --git a/include/maths/time_series/CTimeSeriesDecompositionStub.h b/include/maths/time_series/CTimeSeriesDecompositionStub.h index a9177db620..0578b9ed37 100644 --- a/include/maths/time_series/CTimeSeriesDecompositionStub.h +++ b/include/maths/time_series/CTimeSeriesDecompositionStub.h @@ -61,12 +61,10 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionStub //! Returns 0. double meanValue(core_t::TTime time) const override; - //! Returns (0.0, 0.0). - maths_t::TDoubleDoublePr value(core_t::TTime time, - double confidence = 0.0, - int components = E_All, - const TBoolVec& removedSeasonalMask = {}, - bool smooth = true) const override; + //! Returns zero vector. + TVector2x1 value(core_t::TTime time, + double confidence = 0.0, + bool isNonNegative = false) const override; //! Returns 0. core_t::TTime maximumForecastInterval() const override; @@ -77,23 +75,21 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionStub core_t::TTime step, double confidence, double minimumScale, + bool isNonNegative, const TWriteForecastResult& writer) override; //! Returns \p value. double detrend(core_t::TTime time, double value, double confidence, - core_t::TTime maximumTimeShift = 0, - int components = E_All) const override; + bool isNonNegative, + core_t::TTime maximumTimeShift = 0) const override; //! Returns 0.0. double meanVariance() const override; - //! Returns (1.0, 1.0). - maths_t::TDoubleDoublePr varianceScaleWeight(core_t::TTime time, - double variance, - double confidence, - bool smooth = true) const override; + //! Returns ones vector. + TVector2x1 varianceScaleWeight(core_t::TTime time, double variance, double confidence) const override; //! Returns 1.0. double countWeight(core_t::TTime time) const override; @@ -102,7 +98,7 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionStub double winsorisationDerate(core_t::TTime time) const override; //! Returns an empty vector. - TFloatMeanAccumulatorVec residuals() const override; + TFloatMeanAccumulatorVec residuals(bool isNonNegative) const override; //! No-op. void skipTime(core_t::TTime skipInterval) override; diff --git a/include/maths/time_series/CTrendComponent.h b/include/maths/time_series/CTrendComponent.h index 5f8517330a..9efb672469 100644 --- a/include/maths/time_series/CTrendComponent.h +++ b/include/maths/time_series/CTrendComponent.h @@ -52,13 +52,13 @@ namespace time_series { //! is common in many real world time series. class MATHS_TIME_SERIES_EXPORT CTrendComponent { public: - using TDoubleDoublePr = maths_t::TDoubleDoublePr; using TDoubleVec = std::vector; using TDouble3Vec = core::CSmallVector; using TSizeVec = std::vector; - using TVector = common::CVectorNx1; - using TMatrix = common::CSymmetricMatrixNxN; - using TMatrixVec = std::vector; + using TVector2x1 = common::CVectorNx1; + using TVector3x1 = common::CVectorNx1; + using TMatrix3x3 = common::CSymmetricMatrixNxN; + using TMatrix3x3Vec = std::vector; using TFloatMeanAccumulator = common::CBasicStatistics::SSampleMean::TAccumulator; using TFloatMeanAccumulatorVec = std::vector; @@ -67,7 +67,7 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { using TWriteForecastResult = std::function; public: - CTrendComponent(double decayRate); + explicit CTrendComponent(double decayRate); //! Efficiently swap the state of this and \p other. void swap(CTrendComponent& other); @@ -145,7 +145,7 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { //! \param[in] time The time of interest. //! \param[in] confidence The symmetric confidence interval for the variance //! as a percentage. - TDoubleDoublePr value(core_t::TTime time, double confidence) const; + TVector2x1 value(core_t::TTime time, double confidence) const; //! Get a function which returns the trend value as a function of time. //! @@ -159,7 +159,7 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { //! //! \param[in] confidence The symmetric confidence interval for the //! variance as a percentage. - TDoubleDoublePr variance(double confidence) const; + TVector2x1 variance(double confidence) const; //! Get the maximum interval for which the trend model can be forecast. core_t::TTime maximumForecastInterval() const; @@ -170,12 +170,15 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { //! \param[in] endTime The end time of the forecast interval. //! \param[in] step The time step. //! \param[in] confidence The confidence interval to calculate. + //! \param[in] isNonNegative True if the data being modelled are known to be + //! non-negative. //! \param[in] seasonal Forecasts seasonal components. //! \param[in] writer Writes out forecast results. void forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, + bool isNonNegative, const TSeasonalForecast& seasonal, const TWriteForecastResult& writer) const; @@ -196,7 +199,8 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { using TRegressionArray = TRegression::TArray; using TRegressionArrayVec = std::vector; using TMeanAccumulator = common::CBasicStatistics::SSampleMean::TAccumulator; - using TVectorMeanAccumulator = common::CBasicStatistics::SSampleMean::TAccumulator; + using TVector3x1MeanAccumulator = + common::CBasicStatistics::SSampleMean::TAccumulator; using TMeanVarAccumulator = common::CBasicStatistics::SSampleMeanVar::TAccumulator; //! \brief A model of the trend at a specific time scale. @@ -207,7 +211,7 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { std::uint64_t checksum(std::uint64_t seed) const; TMeanAccumulator s_Weight; TRegression s_Regression; - TVectorMeanAccumulator s_Mse; + TVector3x1MeanAccumulator s_Mse; }; using TModelVec = std::vector; @@ -215,7 +219,7 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { class CForecastLevel : private core::CNonCopyable { public: //! The default number of roll out paths to use. - static const std::size_t DEFAULT_NUMBER_PATHS{100u}; + static const std::size_t DEFAULT_NUMBER_PATHS{100}; public: CForecastLevel(const common::CNaiveBayes& probability, @@ -253,11 +257,8 @@ class MATHS_TIME_SERIES_EXPORT CTrendComponent { //! Select the most complex model for which there is significant evidence. TSizeVec selectModelOrdersForForecasting() const; - //! Get the initial weights to use for forecast predictions. - TDoubleVec initialForecastModelWeights() const; - - //! Get the initial weights to use for forecast prediction errors. - TDoubleVec initialForecastErrorWeights() const; + //! Get the initial model weights to use for forecasting. + TDoubleVec initialForecastModelWeights(std::size_t n) const; //! Get the mean count of samples in the prediction. double count() const; diff --git a/lib/maths/common/unittest/CLinearAlgebraTest.cc b/lib/maths/common/unittest/CLinearAlgebraTest.cc index ea4c9436b6..f47a1338e4 100644 --- a/lib/maths/common/unittest/CLinearAlgebraTest.cc +++ b/lib/maths/common/unittest/CLinearAlgebraTest.cc @@ -73,8 +73,7 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrixNxN) { BOOST_REQUIRE_EQUAL(10.8, matrix.trace()); } { - double v[]{1.0, 2.0, 3.0, 4.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector{{1.0, 2.0, 3.0, 4.0}}; maths::common::CSymmetricMatrixNxN matrix( maths::common::E_OuterProduct, vector); LOG_DEBUG(<< "matrix = " << matrix); @@ -86,8 +85,7 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrixNxN) { BOOST_REQUIRE_EQUAL(30.0, matrix.trace()); } { - double v[]{1.0, 2.0, 3.0, 4.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector{{1.0, 2.0, 3.0, 4.0}}; maths::common::CSymmetricMatrixNxN matrix(maths::common::E_Diagonal, vector); LOG_DEBUG(<< "matrix = " << matrix); for (std::size_t i = 0; i < 4; ++i) { @@ -133,8 +131,7 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrixNxN) { { LOG_DEBUG(<< "Matrix Scalar Multiplication"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector{{1.0, 2.0, 3.0, 4.0, 5.0}}; maths::common::CSymmetricMatrixNxN m(maths::common::E_OuterProduct, vector); maths::common::CSymmetricMatrixNxN ms = m * 3.0; LOG_DEBUG(<< "3 * m = " << ms); @@ -148,8 +145,7 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrixNxN) { { LOG_DEBUG(<< "Matrix Scalar Division"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector{{1.0, 2.0, 3.0, 4.0, 5.0}}; maths::common::CSymmetricMatrixNxN m(maths::common::E_OuterProduct, vector); maths::common::CSymmetricMatrixNxN ms = m / 4.0; LOG_DEBUG(<< "m / 4.0 = " << ms); @@ -160,6 +156,23 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrixNxN) { } } } + { + LOG_DEBUG(<< "Mean"); + + double m[][5]{{1.1, 2.4, 1.4, 3.7, 4.0}, + {2.4, 3.2, 1.8, 0.7, 1.0}, + {1.4, 1.8, 0.8, 4.7, 3.1}, + {3.7, 0.7, 4.7, 4.7, 1.1}, + {4.0, 1.0, 3.1, 1.1, 1.0}}; + maths::common::CSymmetricMatrixNxN matrix(m); + + double expectedMean{ + (5.0 * maths::common::CBasicStatistics::mean({1.1, 3.2, 0.8, 4.7, 1.0}) + + 20.0 * maths::common::CBasicStatistics::mean( + {2.4, 1.4, 3.7, 4.0, 1.8, 0.7, 1.0, 4.7, 3.1, 1.1})) / + 25.0}; + BOOST_REQUIRE_CLOSE(expectedMean, matrix.mean(), 1e-6); + } } BOOST_AUTO_TEST_CASE(testVectorNx1) { @@ -191,25 +204,22 @@ BOOST_AUTO_TEST_CASE(testVectorNx1) { { LOG_DEBUG(<< "Sum"); - double v[]{1.1, 2.4, 1.4, 3.7, 4.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector{{1.1, 2.4, 1.4, 3.7, 4.0}}; maths::common::CVectorNx1 sum = vector + vector; LOG_DEBUG(<< "vector = " << sum); for (std::size_t i = 0; i < 5; ++i) { - BOOST_REQUIRE_EQUAL(2.0 * v[i], sum(i)); + BOOST_REQUIRE_EQUAL(2.0 * vector(i), sum(i)); } } { LOG_DEBUG(<< "Difference"); - double v1[]{1.1, 0.4, 1.4}; - double v2[]{2.1, 0.3, 0.4}; - maths::common::CVectorNx1 vector1(v1); - maths::common::CVectorNx1 vector2(v2); + maths::common::CVectorNx1 vector1{{1.1, 0.4, 1.4}}; + maths::common::CVectorNx1 vector2{{2.1, 0.3, 0.4}}; maths::common::CVectorNx1 difference = vector1 - vector2; LOG_DEBUG(<< "vector = " << difference); for (std::size_t i = 0; i < 3; ++i) { - BOOST_REQUIRE_EQUAL(v1[i] - v2[i], difference(i)); + BOOST_REQUIRE_EQUAL(vector1(i) - vector2(i), difference(i)); } } { @@ -241,8 +251,7 @@ BOOST_AUTO_TEST_CASE(testVectorNx1) { { LOG_DEBUG(<< "Vector Scalar Multiplication"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector{{1.0, 2.0, 3.0, 4.0, 5.0}}; maths::common::CVectorNx1 vs = vector * 3.0; LOG_DEBUG(<< "3 * v = " << vs); for (std::size_t i = 0; i < 5; ++i) { @@ -252,14 +261,21 @@ BOOST_AUTO_TEST_CASE(testVectorNx1) { { LOG_DEBUG(<< "Matrix Scalar Division"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVectorNx1 vector(v); + maths::common::CVectorNx1 vector({1.0, 2.0, 3.0, 4.0, 5.0}); maths::common::CVectorNx1 vs = vector / 4.0; LOG_DEBUG(<< "v / 4.0 = " << vs); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_EQUAL(static_cast((i + 1)) / 4.0, vs(i)); } } + { + LOG_DEBUG(<< "Mean"); + + maths::common::CVectorNx1 vector{{1.0, 2.0, 3.0, 4.0, 5.0}}; + + double expectedMean{maths::common::CBasicStatistics::mean({1.0, 2.0, 3.0, 4.0, 5.0})}; + BOOST_REQUIRE_EQUAL(expectedMean, vector.mean()); + } } BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { @@ -267,8 +283,8 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { { maths::common::CSymmetricMatrix matrix(3); LOG_DEBUG(<< "matrix = " << matrix); - BOOST_REQUIRE_EQUAL(std::size_t(3), matrix.rows()); - BOOST_REQUIRE_EQUAL(std::size_t(3), matrix.columns()); + BOOST_REQUIRE_EQUAL(3, matrix.rows()); + BOOST_REQUIRE_EQUAL(3, matrix.columns()); for (std::size_t i = 0; i < 3; ++i) { for (std::size_t j = 0; j < 3; ++j) { BOOST_REQUIRE_EQUAL(0.0, matrix(i, j)); @@ -278,8 +294,8 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { { maths::common::CSymmetricMatrix matrix(4, 3.0); LOG_DEBUG(<< "matrix = " << matrix); - BOOST_REQUIRE_EQUAL(std::size_t(4), matrix.rows()); - BOOST_REQUIRE_EQUAL(std::size_t(4), matrix.columns()); + BOOST_REQUIRE_EQUAL(4, matrix.rows()); + BOOST_REQUIRE_EQUAL(4, matrix.columns()); for (std::size_t i = 0; i < 4; ++i) { for (std::size_t j = 0; j < 4; ++j) { BOOST_REQUIRE_EQUAL(3.0, matrix(i, j)); @@ -287,21 +303,15 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { } } { - double m_[][5]{{1.1, 2.4, 1.4, 3.7, 4.0}, - {2.4, 3.2, 1.8, 0.7, 1.0}, - {1.4, 1.8, 0.8, 4.7, 3.1}, - {3.7, 0.7, 4.7, 4.7, 1.1}, - {4.0, 1.0, 3.1, 1.1, 1.0}}; - TDoubleVecVec m(5, TDoubleVec(5)); - for (std::size_t i = 0; i < 5; ++i) { - for (std::size_t j = 0; j < 5; ++j) { - m[i][j] = m_[i][j]; - } - } + TDoubleVecVec m{{1.1, 2.4, 1.4, 3.7, 4.0}, + {2.4, 3.2, 1.8, 0.7, 1.0}, + {1.4, 1.8, 0.8, 4.7, 3.1}, + {3.7, 0.7, 4.7, 4.7, 1.1}, + {4.0, 1.0, 3.1, 1.1, 1.0}}; maths::common::CSymmetricMatrix matrix(m); LOG_DEBUG(<< "matrix = " << matrix); - BOOST_REQUIRE_EQUAL(std::size_t(5), matrix.rows()); - BOOST_REQUIRE_EQUAL(std::size_t(5), matrix.columns()); + BOOST_REQUIRE_EQUAL(5, matrix.rows()); + BOOST_REQUIRE_EQUAL(5, matrix.columns()); for (std::size_t i = 0; i < 5; ++i) { for (std::size_t j = 0; j < 5; ++j) { BOOST_REQUIRE_EQUAL(m[i][j], matrix(i, j)); @@ -314,9 +324,9 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { 4.7, 4.7, 4.0, 1.0, 3.1, 1.1, 1.0}; maths::common::CSymmetricMatrix matrix(std::begin(m), std::end(m)); LOG_DEBUG(<< "matrix = " << matrix); - BOOST_REQUIRE_EQUAL(std::size_t(5), matrix.rows()); - BOOST_REQUIRE_EQUAL(std::size_t(5), matrix.columns()); - for (std::size_t i = 0u, k = 0; i < 5; ++i) { + BOOST_REQUIRE_EQUAL(5, matrix.rows()); + BOOST_REQUIRE_EQUAL(5, matrix.columns()); + for (std::size_t i = 0, k = 0; i < 5; ++i) { for (std::size_t j = 0; j <= i; ++j, ++k) { BOOST_REQUIRE_EQUAL(m[k], matrix(i, j)); BOOST_REQUIRE_EQUAL(m[k], matrix(j, i)); @@ -325,12 +335,12 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { BOOST_REQUIRE_EQUAL(10.8, matrix.trace()); } { - double v[]{1.0, 2.0, 3.0, 4.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.0, 2.0, 3.0, 4.0}; + maths::common::CVector vector{v}; maths::common::CSymmetricMatrix matrix(maths::common::E_OuterProduct, vector); LOG_DEBUG(<< "matrix = " << matrix); - BOOST_REQUIRE_EQUAL(std::size_t(4), matrix.rows()); - BOOST_REQUIRE_EQUAL(std::size_t(4), matrix.columns()); + BOOST_REQUIRE_EQUAL(4, matrix.rows()); + BOOST_REQUIRE_EQUAL(4, matrix.columns()); for (std::size_t i = 0; i < 4; ++i) { for (std::size_t j = 0; j < 4; ++j) { BOOST_REQUIRE_EQUAL(static_cast((i + 1) * (j + 1)), matrix(i, j)); @@ -339,12 +349,12 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { BOOST_REQUIRE_EQUAL(30.0, matrix.trace()); } { - double v[]{1.0, 2.0, 3.0, 4.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.0, 2.0, 3.0, 4.0}; + maths::common::CVector vector{v}; maths::common::CSymmetricMatrix matrix(maths::common::E_Diagonal, vector); LOG_DEBUG(<< "matrix = " << matrix); - BOOST_REQUIRE_EQUAL(std::size_t(4), matrix.rows()); - BOOST_REQUIRE_EQUAL(std::size_t(4), matrix.columns()); + BOOST_REQUIRE_EQUAL(4, matrix.rows()); + BOOST_REQUIRE_EQUAL(4, matrix.columns()); for (std::size_t i = 0; i < 4; ++i) { for (std::size_t j = 0; j < 4; ++j) { BOOST_REQUIRE_EQUAL(i == j ? vector(i) : 0.0, matrix(i, j)); @@ -361,7 +371,7 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { maths::common::CSymmetricMatrix matrix(std::begin(m), std::end(m)); maths::common::CSymmetricMatrix sum = matrix + matrix; LOG_DEBUG(<< "sum = " << sum); - for (std::size_t i = 0u, k = 0; i < 5; ++i) { + for (std::size_t i = 0, k = 0; i < 5; ++i) { for (std::size_t j = 0; j <= i; ++j, ++k) { BOOST_REQUIRE_EQUAL(2.0 * m[k], sum(i, j)); BOOST_REQUIRE_EQUAL(2.0 * m[k], sum(j, i)); @@ -387,8 +397,8 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { { LOG_DEBUG(<< "Matrix Scalar Multiplication"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.0, 2.0, 3.0, 4.0, 5.0}; + maths::common::CVector vector{v}; maths::common::CSymmetricMatrix m(maths::common::E_OuterProduct, vector); maths::common::CSymmetricMatrix ms = m * 3.0; LOG_DEBUG(<< "3 * m = " << ms); @@ -402,8 +412,8 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { { LOG_DEBUG(<< "Matrix Scalar Division"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.0, 2.0, 3.0, 4.0, 5.0}; + maths::common::CVector vector{v}; maths::common::CSymmetricMatrix m(maths::common::E_OuterProduct, vector); maths::common::CSymmetricMatrix ms = m / 4.0; LOG_DEBUG(<< "m / 4.0 = " << ms); @@ -414,6 +424,23 @@ BOOST_AUTO_TEST_CASE(testSymmetricMatrix) { } } } + { + LOG_DEBUG(<< "Mean"); + + TDoubleVecVec m{{1.1, 2.4, 1.4, 3.7, 4.0}, + {2.4, 3.2, 1.8, 0.7, 1.0}, + {1.4, 1.8, 0.8, 4.7, 3.1}, + {3.7, 0.7, 4.7, 4.7, 1.1}, + {4.0, 1.0, 3.1, 1.1, 1.0}}; + maths::common::CSymmetricMatrix matrix(m); + + double expectedMean{ + (5.0 * maths::common::CBasicStatistics::mean({1.1, 3.2, 0.8, 4.7, 1.0}) + + 20.0 * maths::common::CBasicStatistics::mean( + {2.4, 1.4, 3.7, 4.0, 1.8, 0.7, 1.0, 4.7, 3.1, 1.1})) / + 25.0}; + BOOST_REQUIRE_CLOSE(expectedMean, matrix.mean(), 1e-6); + } } BOOST_AUTO_TEST_CASE(testVector) { @@ -421,7 +448,7 @@ BOOST_AUTO_TEST_CASE(testVector) { { maths::common::CVector vector(3); LOG_DEBUG(<< "vector = " << vector); - BOOST_REQUIRE_EQUAL(std::size_t(3), vector.dimension()); + BOOST_REQUIRE_EQUAL(3, vector.dimension()); for (std::size_t i = 0; i < 3; ++i) { BOOST_REQUIRE_EQUAL(0.0, vector(i)); } @@ -429,25 +456,24 @@ BOOST_AUTO_TEST_CASE(testVector) { { maths::common::CVector vector(4, 3.0); LOG_DEBUG(<< "vector = " << vector); - BOOST_REQUIRE_EQUAL(std::size_t(4), vector.dimension()); + BOOST_REQUIRE_EQUAL(4, vector.dimension()); for (std::size_t i = 0; i < 4; ++i) { BOOST_REQUIRE_EQUAL(3.0, vector(i)); } } { - double v_[]{1.1, 2.4, 1.4, 3.7, 4.0}; - TDoubleVec v(std::begin(v_), std::end(v_)); + TDoubleVec v{1.1, 2.4, 1.4, 3.7, 4.0}; maths::common::CVector vector(v); - BOOST_REQUIRE_EQUAL(std::size_t(5), vector.dimension()); + BOOST_REQUIRE_EQUAL(5, vector.dimension()); LOG_DEBUG(<< "vector = " << vector); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_EQUAL(v[i], vector(i)); } } { - double v[]{1.1, 2.4, 1.4, 3.7, 4.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); - BOOST_REQUIRE_EQUAL(std::size_t(5), vector.dimension()); + TDoubleVec v{1.1, 2.4, 1.4, 3.7, 4.0}; + maths::common::CVector vector{v.begin(), v.end()}; + BOOST_REQUIRE_EQUAL(5, vector.dimension()); LOG_DEBUG(<< "vector = " << vector); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_EQUAL(v[i], vector(i)); @@ -458,8 +484,8 @@ BOOST_AUTO_TEST_CASE(testVector) { { LOG_DEBUG(<< "Sum"); - double v[]{1.1, 2.4, 1.4, 3.7, 4.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.1, 2.4, 1.4, 3.7, 4.0}; + maths::common::CVector vector{v}; maths::common::CVector sum = vector + vector; LOG_DEBUG(<< "vector = " << sum); for (std::size_t i = 0; i < 5; ++i) { @@ -469,10 +495,10 @@ BOOST_AUTO_TEST_CASE(testVector) { { LOG_DEBUG(<< "Difference"); - double v1[]{1.1, 0.4, 1.4}; - double v2[]{2.1, 0.3, 0.4}; - maths::common::CVector vector1(std::begin(v1), std::end(v1)); - maths::common::CVector vector2(std::begin(v2), std::end(v2)); + TDoubleVec v1{1.1, 0.4, 1.4}; + TDoubleVec v2{2.1, 0.3, 0.4}; + maths::common::CVector vector1{v1}; + maths::common::CVector vector2{v2}; maths::common::CVector difference = vector1 - vector2; LOG_DEBUG(<< "vector = " << difference); for (std::size_t i = 0; i < 3; ++i) { @@ -482,7 +508,7 @@ BOOST_AUTO_TEST_CASE(testVector) { { LOG_DEBUG(<< "Matrix Vector Multiplication"); - Eigen::MatrixXd randomMatrix(std::size_t(4), std::size_t(4)); + Eigen::MatrixXd randomMatrix(4, 4); Eigen::VectorXd randomVector(4); for (std::size_t t = 0; t < 20; ++t) { randomMatrix.setRandom(); @@ -508,8 +534,8 @@ BOOST_AUTO_TEST_CASE(testVector) { { LOG_DEBUG(<< "Vector Scalar Multiplication"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.0, 2.0, 3.0, 4.0, 5.0}; + maths::common::CVector vector{v}; maths::common::CVector vs = vector * 3.0; LOG_DEBUG(<< "3 * v = " << vs); for (std::size_t i = 0; i < 5; ++i) { @@ -519,14 +545,23 @@ BOOST_AUTO_TEST_CASE(testVector) { { LOG_DEBUG(<< "Matrix Scalar Division"); - double v[]{1.0, 2.0, 3.0, 4.0, 5.0}; - maths::common::CVector vector(std::begin(v), std::end(v)); + TDoubleVec v{1.0, 2.0, 3.0, 4.0, 5.0}; + maths::common::CVector vector{v}; maths::common::CVector vs = vector / 4.0; LOG_DEBUG(<< "v / 4.0 = " << vs); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_EQUAL(static_cast((i + 1)) / 4.0, vs(i)); } } + { + LOG_DEBUG(<< "Mean"); + + TDoubleVec v{1.0, 2.0, 3.0, 4.0, 5.0}; + maths::common::CVector vector{v}; + + double expectedMean{maths::common::CBasicStatistics::mean({1.0, 2.0, 3.0, 4.0, 5.0})}; + BOOST_REQUIRE_EQUAL(expectedMean, vector.mean()); + } } BOOST_AUTO_TEST_CASE(testNorms) { @@ -1143,10 +1178,10 @@ BOOST_AUTO_TEST_CASE(testShims) { LOG_DEBUG(<< "Test dimension"); { - BOOST_REQUIRE_EQUAL(std::size_t(4), maths::common::las::dimension(vector1)); - BOOST_REQUIRE_EQUAL(std::size_t(4), maths::common::las::dimension(vector2)); - BOOST_REQUIRE_EQUAL(std::size_t(4), maths::common::las::dimension(vector3)); - BOOST_REQUIRE_EQUAL(std::size_t(4), maths::common::las::dimension(vector4)); + BOOST_REQUIRE_EQUAL(4, maths::common::las::dimension(vector1)); + BOOST_REQUIRE_EQUAL(4, maths::common::las::dimension(vector2)); + BOOST_REQUIRE_EQUAL(4, maths::common::las::dimension(vector3)); + BOOST_REQUIRE_EQUAL(4, maths::common::las::dimension(vector4)); } LOG_DEBUG(<< "Test zero"); { diff --git a/lib/maths/time_series/CCalendarComponent.cc b/lib/maths/time_series/CCalendarComponent.cc index 21be45117b..91cabd72d8 100644 --- a/lib/maths/time_series/CCalendarComponent.cc +++ b/lib/maths/time_series/CCalendarComponent.cc @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -32,7 +33,6 @@ namespace ml { namespace maths { namespace time_series { namespace { -using TDoubleDoublePr = maths_t::TDoubleDoublePr; const core::TPersistenceTag DECOMPOSITION_COMPONENT_TAG{"a", "decomposition_component"}; const core::TPersistenceTag BUCKETING_TAG{"b", "bucketing"}; const core::TPersistenceTag LAST_INTERPOLATION_TAG{"c", "last_interpolation_time"}; @@ -167,7 +167,8 @@ CCalendarFeatureAndTZ CCalendarComponent::feature() const { return m_Bucketing.feature(); } -TDoubleDoublePr CCalendarComponent::value(core_t::TTime time, double confidence) const { +CCalendarComponent::TVector2x1 CCalendarComponent::value(core_t::TTime time, + double confidence) const { double offset{static_cast(this->feature().offset(time))}; double n{m_Bucketing.count(time)}; return this->CDecompositionComponent::value(offset, n, confidence); @@ -177,7 +178,8 @@ double CCalendarComponent::meanValue() const { return this->CDecompositionComponent::meanValue(); } -TDoubleDoublePr CCalendarComponent::variance(core_t::TTime time, double confidence) const { +CCalendarComponent::TVector2x1 +CCalendarComponent::variance(core_t::TTime time, double confidence) const { double offset{static_cast(this->feature().offset(time))}; double n{m_Bucketing.count(time)}; return this->CDecompositionComponent::variance(offset, n, confidence); diff --git a/lib/maths/time_series/CDecompositionComponent.cc b/lib/maths/time_series/CDecompositionComponent.cc index a60754e353..2327a27d5f 100644 --- a/lib/maths/time_series/CDecompositionComponent.cc +++ b/lib/maths/time_series/CDecompositionComponent.cc @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -124,7 +125,8 @@ void CDecompositionComponent::shiftLevel(double shift) { m_MeanValue += shift; } -TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double confidence) const { +CDecompositionComponent::TVector2x1 +CDecompositionComponent::value(double offset, double n, double confidence) const { // In order to compute a confidence interval we need to know // the distribution of the samples. In practice, as long as // they are independent, then the sample mean will be @@ -135,36 +137,37 @@ TDoubleDoublePr CDecompositionComponent::value(double offset, double n, double c double m{this->valueSpline().value(offset)}; if (confidence == 0.0) { - return {m, m}; + return TVector2x1{m}; } n = std::max(n, 1.0); - double sd{::sqrt(std::max(this->varianceSpline().value(offset), 0.0) / n)}; + double sd{std::sqrt(std::max(this->varianceSpline().value(offset), 0.0) / n)}; if (sd == 0.0) { - return {m, m}; + return TVector2x1{m}; } try { boost::math::normal normal{m, sd}; double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; - return {ql, qu}; + return TVector2x1{{ql, qu}}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", n = " << n << ", m = " << m << ", sd = " << sd << ", confidence = " << confidence); } - return {m, m}; + return TVector2x1{m}; } - return {m_MeanValue, m_MeanValue}; + return TVector2x1{m_MeanValue}; } double CDecompositionComponent::meanValue() const { return m_MeanValue; } -TDoubleDoublePr CDecompositionComponent::variance(double offset, double n, double confidence) const { +CDecompositionComponent::TVector2x1 +CDecompositionComponent::variance(double offset, double n, double confidence) const { // In order to compute a confidence interval we need to know // the distribution of the samples. In practice, as long as // they are independent, then the sample variance will be @@ -175,20 +178,20 @@ TDoubleDoublePr CDecompositionComponent::variance(double offset, double n, doubl n = std::max(n, 2.0); double v{this->varianceSpline().value(offset)}; if (confidence == 0.0) { - return {v, v}; + return TVector2x1{v}; } try { boost::math::chi_squared chi{n - 1.0}; double ql{boost::math::quantile(chi, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(chi, (100.0 + confidence) / 200.0)}; - return std::make_pair(ql * v / (n - 1.0), qu * v / (n - 1.0)); + return TVector2x1{{ql * v / (n - 1.0), qu * v / (n - 1.0)}}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", n = " << n << ", confidence = " << confidence); } - return {v, v}; + return TVector2x1{v}; } - return {m_MeanVariance, m_MeanVariance}; + return TVector2x1{m_MeanVariance}; } double CDecompositionComponent::meanVariance() const { @@ -211,7 +214,7 @@ CDecompositionComponent::TSplineCRef CDecompositionComponent::varianceSpline() c return m_Splines.spline(CPackedSplines::E_Variance); } -uint64_t CDecompositionComponent::checksum(uint64_t seed) const { +std::uint64_t CDecompositionComponent::checksum(std::uint64_t seed) const { seed = common::CChecksum::calculate(seed, m_MaxSize); seed = common::CChecksum::calculate(seed, m_BoundaryCondition); seed = common::CChecksum::calculate(seed, m_Splines); @@ -332,7 +335,7 @@ void CDecompositionComponent::CPackedSplines::interpolate(const TDoubleVec& knot LOG_TRACE(<< "curvatures = " << core::CContainerPrinter::print(m_Curvatures)); } -uint64_t CDecompositionComponent::CPackedSplines::checksum(uint64_t seed) const { +std::uint64_t CDecompositionComponent::CPackedSplines::checksum(std::uint64_t seed) const { seed = common::CChecksum::calculate(seed, m_Types); seed = common::CChecksum::calculate(seed, m_Knots); seed = common::CChecksum::calculate(seed, m_Values); diff --git a/lib/maths/time_series/CSeasonalComponent.cc b/lib/maths/time_series/CSeasonalComponent.cc index e18f44e647..803388fad3 100644 --- a/lib/maths/time_series/CSeasonalComponent.cc +++ b/lib/maths/time_series/CSeasonalComponent.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -34,7 +35,6 @@ namespace ml { namespace maths { namespace time_series { namespace { -using TDoubleDoublePr = maths_t::TDoubleDoublePr; const core::TPersistenceTag DECOMPOSITION_COMPONENT_TAG{"a", "decomposition_component"}; const core::TPersistenceTag RNG_TAG{"b", "rng"}; const core::TPersistenceTag BUCKETING_TAG{"c", "bucketing"}; @@ -197,7 +197,7 @@ void CSeasonalComponent::add(core_t::TTime time, double value, double weight, do double shiftWeight; std::tie(shift, shiftWeight) = this->likelyShift(time, value); m_CurrentMeanShift.add(static_cast(shift), weight * shiftWeight); - double prediction{common::CBasicStatistics::mean(this->value(this->jitter(time), 0.0))}; + double prediction{this->value(this->jitter(time), 0.0).mean()}; m_Bucketing.add(time + m_TotalShift, value, prediction, weight, gradientLearnRate); } @@ -251,7 +251,8 @@ const CSeasonalComponentAdaptiveBucketing& CSeasonalComponent::bucketing() const return m_Bucketing; } -TDoubleDoublePr CSeasonalComponent::value(core_t::TTime time, double confidence) const { +CSeasonalComponent::TVector2x1 CSeasonalComponent::value(core_t::TTime time, + double confidence) const { time += m_TotalShift; double offset{this->time().periodic(time)}; double n{m_Bucketing.count(time)}; @@ -294,7 +295,7 @@ double CSeasonalComponent::delta(core_t::TTime time, double mean{this->CDecompositionComponent::meanValue()}; for (core_t::TTime t = time; t < time + longPeriod; t += shortPeriod) { if (time_.inWindow(t)) { - double difference{common::CBasicStatistics::mean(this->value(t, 0.0)) - mean}; + double difference{this->value(t, 0.0).mean() - mean}; bias.add(difference); amplitude = std::max(amplitude, std::fabs(difference)); if (shortDifference * difference < 0.0) { @@ -314,7 +315,8 @@ double CSeasonalComponent::delta(core_t::TTime time, return 0.0; } -TDoubleDoublePr CSeasonalComponent::variance(core_t::TTime time, double confidence) const { +CSeasonalComponent::TVector2x1 +CSeasonalComponent::variance(core_t::TTime time, double confidence) const { time += m_TotalShift; double offset{this->time().periodic(time)}; double n{m_Bucketing.count(time)}; @@ -334,7 +336,7 @@ bool CSeasonalComponent::covariances(core_t::TTime time, TMatrix& result) const time += m_TotalShift; if (const auto* r = m_Bucketing.regression(time)) { - double variance{common::CBasicStatistics::mean(this->variance(time, 0.0))}; + double variance{this->variance(time, 0.0).mean()}; return r->covariances(variance, result); } @@ -408,11 +410,10 @@ CSeasonalComponent::likelyShift(core_t::TTime time, double value) const { // If the change due to the shift is small compared to the prediction // error force it to zero. double noise{0.2 * std::sqrt(this->meanVariance()) / range}; - auto loss = [&](double offset) { - return std::fabs(common::CBasicStatistics::mean(this->value( - time + static_cast(offset + 0.5), 0.0)) - - value) + - noise * std::fabs(offset); + auto loss = [&](double shift) { + auto shift_ = static_cast(shift + 0.5); + return std::fabs(this->value(time + shift_, 0.0).mean() - value) + + noise * std::fabs(shift); }; return likelyShift(m_MaxTimeShiftPerPeriod, 0, loss); diff --git a/lib/maths/time_series/CTimeSeriesDecomposition.cc b/lib/maths/time_series/CTimeSeriesDecomposition.cc index 4f53fe2f80..30c5770453 100644 --- a/lib/maths/time_series/CTimeSeriesDecomposition.cc +++ b/lib/maths/time_series/CTimeSeriesDecomposition.cc @@ -29,6 +29,7 @@ #include +#include #include #include #include @@ -38,22 +39,6 @@ namespace maths { namespace time_series { namespace { -using TDoubleDoublePr = maths_t::TDoubleDoublePr; -using TVector2x1 = common::CVectorNx1; - -//! Convert a double pair to a 2x1 vector. -TVector2x1 vector2x1(const TDoubleDoublePr& p) { - TVector2x1 result; - result(0) = p.first; - result(1) = p.second; - return result; -} - -//! Convert a 2x1 vector to a double pair. -TDoubleDoublePr pair(const TVector2x1& v) { - return {v(0), v(1)}; -} - // Version 7.11 const std::string VERSION_7_11_TAG("7.11"); const core::TPersistenceTag LAST_VALUE_TIME_7_11_TAG{"a", "last_value_time"}; @@ -243,28 +228,26 @@ void CTimeSeriesDecomposition::addPoint(core_t::TTime time, auto testForSeasonality = m_Components.makeTestForSeasonality( [this](core_t::TTime time_, const TBoolVec& removedSeasonalMask) { - return common::CBasicStatistics::mean( - this->value(time_, 0.0, E_Seasonal, removedSeasonalMask)); + return this->value(time_, 0.0, E_Seasonal, removedSeasonalMask).mean(); }); - SAddValue message{ - time, - lastTime, - m_TimeShift, - value, - weights, - common::CBasicStatistics::mean(this->value(time, 0.0, E_TrendForced)), - common::CBasicStatistics::mean(this->value(time, 0.0, E_Seasonal)), - common::CBasicStatistics::mean(this->value(time, 0.0, E_Calendar)), - *this, - [this] { - auto predictor_ = this->predictor(E_All | E_TrendForced); - return [predictor = std::move(predictor_)](core_t::TTime time_) { - return predictor(time_, {}); - }; - }, - [this] { return this->predictor(E_Seasonal | E_Calendar); }, - testForSeasonality}; + SAddValue message{time, + lastTime, + m_TimeShift, + value, + weights, + this->value(time, 0.0, E_TrendForced).mean(), + this->value(time, 0.0, E_Seasonal).mean(), + this->value(time, 0.0, E_Calendar).mean(), + *this, + [this] { + auto predictor_ = this->predictor(E_All | E_TrendForced); + return [predictor = std::move(predictor_)](core_t::TTime time_) { + return predictor(time_, {}); + }; + }, + [this] { return this->predictor(E_Seasonal | E_Calendar); }, + testForSeasonality}; m_ChangePointTest.handle(message); m_Components.handle(message); @@ -293,20 +276,21 @@ double CTimeSeriesDecomposition::meanValue(core_t::TTime time) const { return m_Components.meanValue(time); } -TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, - double confidence, - int components, - const TBoolVec& removedSeasonalMask, - bool smooth) const { - TVector2x1 baseline{0.0}; +CTimeSeriesDecomposition::TVector2x1 +CTimeSeriesDecomposition::value(core_t::TTime time, + double confidence, + int components, + const TBoolVec& removedSeasonalMask, + bool smooth) const { + TVector2x1 result{0.0}; time += m_TimeShift; if ((components & E_TrendForced) != 0) { - baseline += vector2x1(m_Components.trend().value(time, confidence)); + result += m_Components.trend().value(time, confidence); } else if ((components & E_Trend) != 0) { if (m_Components.usingTrendForPrediction()) { - baseline += vector2x1(m_Components.trend().value(time, confidence)); + result += m_Components.trend().value(time, confidence); } } @@ -316,7 +300,7 @@ TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, if (seasonal[i].initialized() && (removedSeasonalMask.empty() || removedSeasonalMask[i] == false) && seasonal[i].time().inWindow(time)) { - baseline += vector2x1(seasonal[i].value(time, confidence)); + result += seasonal[i].value(time, confidence); } } } @@ -324,21 +308,27 @@ TDoubleDoublePr CTimeSeriesDecomposition::value(core_t::TTime time, if ((components & E_Calendar) != 0) { for (const auto& component : m_Components.calendar()) { if (component.initialized() && component.feature().inWindow(time)) { - baseline += vector2x1(component.value(time, confidence)); + result += component.value(time, confidence); } } } if (smooth) { - baseline += vector2x1(this->smooth( + result += this->smooth( [&](core_t::TTime time_) { return this->value(time_ - m_TimeShift, confidence, components & E_Seasonal, removedSeasonalMask, false); }, - time, components)); + time, components); } - return pair(baseline); + return result; +} + +CTimeSeriesDecomposition::TVector2x1 +CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, bool isNonNegative) const { + auto result = this->value(time, confidence, E_All); + return isNonNegative ? max(result, 0.0) : result; } CTimeSeriesDecomposition::TFilteredPredictor @@ -364,7 +354,7 @@ CTimeSeriesDecomposition::predictor(int components) const { if (seasonal[i].initialized() && (removedSeasonalMask.empty() || removedSeasonalMask[i] == false) && seasonal[i].time().inWindow(time)) { - baseline += common::CBasicStatistics::mean(seasonal[i].value(time, 0.0)); + baseline += seasonal[i].value(time, 0.0).mean(); } } } @@ -372,7 +362,7 @@ CTimeSeriesDecomposition::predictor(int components) const { if ((components & E_Calendar) != 0) { for (const auto& component : m_Components.calendar()) { if (component.initialized() && component.feature().inWindow(time)) { - baseline += common::CBasicStatistics::mean(component.value(time, 0.0)); + baseline += component.value(time, 0.0).mean(); } } } @@ -390,6 +380,7 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, core_t::TTime step, double confidence, double minimumScale, + bool isNonNegative, const TWriteForecastResult& writer) { if (endTime < startTime) { LOG_ERROR(<< "Bad forecast range: [" << startTime << "," << endTime << "]"); @@ -404,15 +395,15 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, TVector2x1 prediction{0.0}; for (const auto& component : m_Components.seasonal()) { if (component.initialized() && component.time().inWindow(time)) { - prediction += vector2x1(component.value(time, confidence)); + prediction += component.value(time, confidence); } } for (const auto& component : m_Components.calendar()) { if (component.initialized() && component.feature().inWindow(time)) { - prediction += vector2x1(component.value(time, confidence)); + prediction += component.value(time, confidence); } } - return pair(prediction); + return prediction; }; startTime += m_TimeShift; @@ -422,19 +413,18 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, auto forecastSeasonal = [&](core_t::TTime time) -> TDouble3Vec { m_Components.interpolateForForecast(time); - TVector2x1 bounds{vector2x1(seasonal(time))}; + TVector2x1 bounds{seasonal(time)}; // Decompose the smoothing into shift plus stretch and ensure that the // smoothed interval between the prediction bounds remains positive length. - TDoubleDoublePr smoothing{this->smooth(seasonal, time, E_Seasonal)}; - double shift{common::CBasicStatistics::mean(smoothing)}; - double stretch{std::max(smoothing.second - smoothing.first, bounds(0) - bounds(1))}; + TVector2x1 smoothing{this->smooth(seasonal, time, E_Seasonal)}; + double shift{smoothing.mean()}; + double stretch{std::max(smoothing(1) - smoothing(0), bounds(0) - bounds(1))}; bounds += TVector2x1{{shift - stretch / 2.0, shift + stretch / 2.0}}; double variance{this->meanVariance()}; double boundsScale{std::sqrt(std::max( - minimumScale, common::CBasicStatistics::mean( - this->varianceScaleWeight(time, variance, 0.0))))}; + minimumScale, this->varianceScaleWeight(time, variance, 0.0).mean()))}; double prediction{(bounds(0) + bounds(1)) / 2.0}; double interval{boundsScale * (bounds(1) - bounds(0))}; @@ -442,60 +432,60 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, }; m_Components.trend().forecast(startTime, endTime, step, confidence, - forecastSeasonal, writer); + isNonNegative, forecastSeasonal, writer); } double CTimeSeriesDecomposition::detrend(core_t::TTime time, double value, double confidence, - core_t::TTime maximumTimeShift, - int components) const { + bool isNonNegative, + core_t::TTime maximumTimeShift) const { if (this->initialized() == false) { return value; } - TDoubleDoublePr interval{this->value(time, confidence, (E_All ^ E_Seasonal) & components)}; - value = std::min(value - interval.first, 0.0) + std::max(value - interval.second, 0.0); - - if ((components & E_Seasonal) == 0) { - return value; - } + TVector2x1 interval{this->value(time, confidence, E_All ^ E_Seasonal)}; + auto shiftedInterval = [=](core_t::TTime shift) { + TVector2x1 result{interval + this->value(time + shift, confidence, E_Seasonal)}; + return isNonNegative ? max(result, 0.0) : result; + }; core_t::TTime shift{0}; if (maximumTimeShift > 0) { - auto loss = [&](double offset) { - TDoubleDoublePr seasonalInterval{this->value( - time + static_cast(offset + 0.5), confidence, E_Seasonal)}; - return std::fabs(std::min(value - seasonalInterval.first, 0.0) + - std::max(value - seasonalInterval.second, 0.0)); + auto loss = [&](double shift_) { + TVector2x1 interval_{shiftedInterval(static_cast(shift_ + 0.5))}; + return std::fabs(std::min(value - interval_(0), 0.0) + + std::max(value - interval_(1), 0.0)); }; std::tie(shift, std::ignore) = CSeasonalComponent::likelyShift(maximumTimeShift, 0, loss); } - interval = this->value(time + shift, confidence, E_Seasonal); - return std::min(value - interval.first, 0.0) + std::max(value - interval.second, 0.0); + interval = shiftedInterval(shift); + + return std::min(value - interval(0), 0.0) + std::max(value - interval(1), 0.0); } double CTimeSeriesDecomposition::meanVariance() const { return m_Components.meanVarianceScale() * m_Components.meanVariance(); } -TDoubleDoublePr CTimeSeriesDecomposition::varianceScaleWeight(core_t::TTime time, - double variance, - double confidence, - bool smooth) const { +CTimeSeriesDecomposition::TVector2x1 +CTimeSeriesDecomposition::varianceScaleWeight(core_t::TTime time, + double variance, + double confidence, + bool smooth) const { if (this->initialized() == false) { - return {1.0, 1.0}; + return TVector2x1{1.0}; } if (common::CMathsFuncs::isFinite(variance) == false) { LOG_ERROR(<< "Supplied variance is " << variance << "."); - return {1.0, 1.0}; + return TVector2x1{1.0}; } double mean{this->meanVariance()}; if (mean <= 0.0 || variance <= 0.0) { - return {1.0, 1.0}; + return TVector2x1{1.0}; } time += m_TimeShift; @@ -503,17 +493,17 @@ TDoubleDoublePr CTimeSeriesDecomposition::varianceScaleWeight(core_t::TTime time double components{0.0}; TVector2x1 scale(0.0); if (m_Components.usingTrendForPrediction()) { - scale += vector2x1(m_Components.trend().variance(confidence)); + scale += m_Components.trend().variance(confidence); } for (const auto& component : m_Components.seasonal()) { if (component.initialized() && component.time().inWindow(time)) { - scale += vector2x1(component.variance(time, confidence)); + scale += component.variance(time, confidence); components += 1.0; } } for (const auto& component : m_Components.calendar()) { if (component.initialized() && component.feature().inWindow(time)) { - scale += vector2x1(component.variance(time, confidence)); + scale += component.variance(time, confidence); components += 1.0; } } @@ -529,16 +519,23 @@ TDoubleDoublePr CTimeSeriesDecomposition::varianceScaleWeight(core_t::TTime time scale = max(TVector2x1{1.0} + bias * (scale - TVector2x1{1.0}), TVector2x1{0.0}); if (smooth) { - scale += vector2x1(this->smooth( + scale += this->smooth( [&](core_t::TTime time_) { return this->varianceScaleWeight(time_ - m_TimeShift, variance, confidence, false); }, - time, E_All)); + time, E_All); } // If anything overflowed just bail and don't scale. - return pair(common::CMathsFuncs::isFinite(scale) ? scale : TVector2x1{1.0}); + return common::CMathsFuncs::isFinite(scale) ? scale : TVector2x1{1.0}; +} + +CTimeSeriesDecomposition::TVector2x1 +CTimeSeriesDecomposition::varianceScaleWeight(core_t::TTime time, + double variance, + double confidence) const { + return this->varianceScaleWeight(time, variance, confidence, true); } double CTimeSeriesDecomposition::countWeight(core_t::TTime time) const { @@ -549,9 +546,10 @@ double CTimeSeriesDecomposition::winsorisationDerate(core_t::TTime time) const { return m_ChangePointTest.winsorisationDerate(time); } -CTimeSeriesDecomposition::TFloatMeanAccumulatorVec CTimeSeriesDecomposition::residuals() const { - return m_SeasonalityTest.residuals([this](core_t::TTime time) { - return common::CBasicStatistics::mean(this->value(time, 0.0)); +CTimeSeriesDecomposition::TFloatMeanAccumulatorVec +CTimeSeriesDecomposition::residuals(bool isNonNegative) const { + return m_SeasonalityTest.residuals([&](core_t::TTime time) { + return this->value(time, 0.0, isNonNegative).mean(); }); } @@ -611,15 +609,15 @@ void CTimeSeriesDecomposition::initializeMediator() { } template -TDoubleDoublePr +CTimeSeriesDecomposition::TVector2x1 CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) const { if ((components & E_Seasonal) != E_Seasonal) { - return {0.0, 0.0}; + return TVector2x1{0.0}; } auto offset = [&f, time](core_t::TTime discontinuity) { - TVector2x1 baselineMinusEps{vector2x1(f(discontinuity - 1))}; - TVector2x1 baselinePlusEps{vector2x1(f(discontinuity + 1))}; + TVector2x1 baselineMinusEps{f(discontinuity - 1)}; + TVector2x1 baselinePlusEps{f(discontinuity + 1)}; return 0.5 * std::max((1.0 - static_cast(std::abs(time - discontinuity)) / static_cast(SMOOTHING_INTERVAL)), @@ -641,15 +639,15 @@ CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) if (timeInWindow == false && inWindowBefore) { core_t::TTime discontinuity{times.startOfWindow(time - SMOOTHING_INTERVAL) + times.windowLength()}; - return pair(-offset(discontinuity)); + return -offset(discontinuity); } if (timeInWindow == false && inWindowAfter) { core_t::TTime discontinuity{component.time().startOfWindow(time + SMOOTHING_INTERVAL)}; - return pair(offset(discontinuity)); + return offset(discontinuity); } } - return {0.0, 0.0}; + return TVector2x1{0.0}; } const core_t::TTime CTimeSeriesDecomposition::SMOOTHING_INTERVAL{14400}; diff --git a/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc b/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc index b168cc93b8..e8a584b588 100644 --- a/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc @@ -168,11 +168,11 @@ void decompose(double trend, TDoubleVec x(m + n); double xhat{x0}; for (std::size_t i = 0; i < m; ++i) { - x[i] = common::CBasicStatistics::mean(seasonal[i]->value(time, 0.0)); + x[i] = seasonal[i]->value(time, 0.0).mean(); xhat += x[i]; } for (std::size_t i = m; i < m + n; ++i) { - x[i] = common::CBasicStatistics::mean(calendar[i - m]->value(time, 0.0)); + x[i] = calendar[i - m]->value(time, 0.0).mean(); xhat += x[i]; } @@ -1716,15 +1716,13 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag TDoubleVec variances(m + n + 1, 0.0); if (m_UsingTrendForPrediction) { - variances[0] = common::CBasicStatistics::mean(m_Trend.variance(0.0)); + variances[0] = m_Trend.variance(0.0).mean(); } for (std::size_t i = 1; i <= m; ++i) { - variances[i] = common::CBasicStatistics::mean( - seasonalComponents[i - 1]->variance(time, 0.0)); + variances[i] = seasonalComponents[i - 1]->variance(time, 0.0).mean(); } for (std::size_t i = m + 1; i <= m + n; ++i) { - variances[i] = common::CBasicStatistics::mean( - calendarComponents[i - m - 1]->variance(time, 0.0)); + variances[i] = calendarComponents[i - m - 1]->variance(time, 0.0).mean(); } double variance{std::accumulate(variances.begin(), variances.end(), 0.0)}; double expectedVarianceIncrease{1.0 / static_cast(m + n + 1)}; @@ -1967,18 +1965,14 @@ CTimeSeriesDecompositionDetail::CComponents::makeTestForSeasonality(const TFilte double CTimeSeriesDecompositionDetail::CComponents::meanValue(core_t::TTime time) const { return this->initialized() - ? ((m_UsingTrendForPrediction - ? common::CBasicStatistics::mean(m_Trend.value(time, 0.0)) - : 0.0) + + ? ((m_UsingTrendForPrediction ? m_Trend.value(time, 0.0).mean() : 0.0) + meanOf(&CSeasonalComponent::meanValue, this->seasonal())) : 0.0; } double CTimeSeriesDecompositionDetail::CComponents::meanVariance() const { return this->initialized() - ? ((m_UsingTrendForPrediction - ? common::CBasicStatistics::mean(this->trend().variance(0.0)) - : 0.0) + + ? ((m_UsingTrendForPrediction ? this->trend().variance(0.0).mean() : 0.0) + meanOf(&CSeasonalComponent::meanVariance, this->seasonal())) : 0.0; } @@ -2103,7 +2097,7 @@ void CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents(const CS for (std::size_t i = 0; i < initialValues.size(); ++i, time += dt) { if (common::CBasicStatistics::count(initialValues[i]) > 0.0) { common::CBasicStatistics::moment<0>(initialValues[i]) -= - common::CBasicStatistics::mean(m_Trend.value(time, 0.0)); + m_Trend.value(time, 0.0).mean(); } } @@ -2619,7 +2613,7 @@ void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::componentsErrorsAnd for (int j{static_cast(i - 1)}; j > -1; --j) { core_t::TTime period_{components[j]->time().period()}; if (period % period_ == 0) { - double value{common::CBasicStatistics::mean(components[j]->value(time, 0.0)) - + double value{components[j]->value(time, 0.0).mean() - components[j]->meanValue()}; double delta{0.1 * components[i]->delta(time, period_, value)}; deltas[j] += delta; @@ -2636,8 +2630,7 @@ void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::appendPredictions( predictions.reserve(predictions.size() + m_Components.size()); for (const auto& component : m_Components) { if (component.time().inWindow(time)) { - predictions.push_back(common::CBasicStatistics::mean(component.value(time, 0.0)) - - component.meanValue()); + predictions.push_back(component.value(time, 0.0).mean() - component.meanValue()); } } } @@ -2924,8 +2917,7 @@ void CTimeSeriesDecompositionDetail::CComponents::CCalendar::appendPredictions( predictions.reserve(predictions.size() + m_Components.size()); for (const auto& component : m_Components) { if (component.feature().inWindow(time)) { - predictions.push_back(common::CBasicStatistics::mean(component.value(time, 0.0)) - - component.meanValue()); + predictions.push_back(component.value(time, 0.0).mean() - component.meanValue()); } } } diff --git a/lib/maths/time_series/CTimeSeriesDecompositionStub.cc b/lib/maths/time_series/CTimeSeriesDecompositionStub.cc index d86cb4696f..4c24c50cbe 100644 --- a/lib/maths/time_series/CTimeSeriesDecompositionStub.cc +++ b/lib/maths/time_series/CTimeSeriesDecompositionStub.cc @@ -57,12 +57,11 @@ double CTimeSeriesDecompositionStub::meanValue(core_t::TTime /*time*/) const { return 0.0; } -maths_t::TDoubleDoublePr CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, - double /*confidence*/, - int /*components*/, - const TBoolVec& /*removedSeasonalMask*/, - bool /*smooth*/) const { - return {0.0, 0.0}; +CTimeSeriesDecompositionStub::TVector2x1 +CTimeSeriesDecompositionStub::value(core_t::TTime /*time*/, + double /*confidence*/, + bool /*isNonNegative*/) const { + return TVector2x1{0.0}; } core_t::TTime CTimeSeriesDecompositionStub::maximumForecastInterval() const { @@ -74,14 +73,15 @@ void CTimeSeriesDecompositionStub::forecast(core_t::TTime /*startTime*/, core_t::TTime /*step*/, double /*confidence*/, double /*minimumScale*/, + bool /*isNonNegative*/, const TWriteForecastResult& /*writer*/) { } double CTimeSeriesDecompositionStub::detrend(core_t::TTime /*time*/, double value, double /*confidence*/, - core_t::TTime /*maximumTimeShift*/, - int /*components*/) const { + bool /*isNonNegative*/, + core_t::TTime /*maximumTimeShift*/) const { return value; } @@ -89,12 +89,11 @@ double CTimeSeriesDecompositionStub::meanVariance() const { return 0.0; } -maths_t::TDoubleDoublePr +CTimeSeriesDecompositionStub::TVector2x1 CTimeSeriesDecompositionStub::varianceScaleWeight(core_t::TTime /*time*/, double /*variance*/, - double /*confidence*/, - bool /*smooth*/) const { - return {1.0, 1.0}; + double /*confidence*/) const { + return TVector2x1{1.0}; } double CTimeSeriesDecompositionStub::countWeight(core_t::TTime /*time*/) const { @@ -106,7 +105,7 @@ double CTimeSeriesDecompositionStub::winsorisationDerate(core_t::TTime /*time*/) } CTimeSeriesDecompositionStub::TFloatMeanAccumulatorVec -CTimeSeriesDecompositionStub::residuals() const { +CTimeSeriesDecompositionStub::residuals(bool /*isNonNegative*/) const { return {}; } diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index 2c28ef0050..c0ede3ac38 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -25,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +33,6 @@ #include #include #include -#include #include #include @@ -48,22 +45,21 @@ namespace maths { namespace time_series { namespace { +using TDoubleVec = std::vector; using TSizeDoublePr = std::pair; -using TTimeDoublePr = std::pair; using TSizeVec = std::vector; -using TBool2Vec = core::CSmallVector; -using TDouble2Vec = core::CSmallVector; using TDouble4Vec = core::CSmallVector; using TDouble10Vec = core::CSmallVector; +using TDouble10VecVec = std::vector; using TDouble10Vec1Vec = core::CSmallVector; using TDouble10Vec2Vec = core::CSmallVector; using TSize10Vec = core::CSmallVector; -using TTime1Vec = core::CSmallVector; using TDoubleDoublePr = std::pair; using TSizeDoublePr10Vec = core::CSmallVector; using TCalculation2Vec = core::CSmallVector; using TTail10Vec = core::CSmallVector; using TMeanAccumulator = common::CBasicStatistics::SSampleMean::TAccumulator; +using TUnivariatePriorPtr = std::shared_ptr; using TMultivariatePriorCPtrSizePr1Vec = CTimeSeriesCorrelations::TMultivariatePriorCPtrSizePr1Vec; //! The decay rate controllers we maintain. @@ -127,16 +123,10 @@ const std::string VERSION_7_11_TAG("7.11"); const std::string ID_6_3_TAG{"a"}; const std::string IS_NON_NEGATIVE_6_3_TAG{"b"}; const std::string IS_FORECASTABLE_6_3_TAG{"c"}; -//const std::string RNG_6_3_TAG{"d"}; Removed in 6.5 const std::string CONTROLLER_6_3_TAG{"e"}; const core::TPersistenceTag TREND_MODEL_6_3_TAG{"f", "trend_model"}; const core::TPersistenceTag RESIDUAL_MODEL_6_3_TAG{"g", "residual_model"}; const std::string ANOMALY_MODEL_6_3_TAG{"h"}; - -//const std::string RECENT_SAMPLES_6_3_TAG{"i"}; Removed in 6.5 -//const std::string CANDIDATE_CHANGE_POINT_6_3_TAG{"j"}; Removed in 7.11 -//const std::string CURRENT_CHANGE_INTERVAL_6_3_TAG{"k"}; Removed in 7.11 -//const std::string CHANGE_DETECTOR_6_3_TAG{"l"}; Removed in 7.11 const std::string MULTIBUCKET_FEATURE_6_3_TAG{"m"}; const std::string MULTIBUCKET_FEATURE_MODEL_6_3_TAG{"n"}; @@ -346,14 +336,14 @@ class CTimeSeriesAnomalyModel { private: //! The time at which the first anomalous bucket was detected. - core_t::TTime m_FirstAnomalousBucketTime = 0; + core_t::TTime m_FirstAnomalousBucketTime{0}; //! The time at which the last anomalous bucket was detected. - core_t::TTime m_LastAnomalousBucketTime = 0; + core_t::TTime m_LastAnomalousBucketTime{0}; //! The sum of the errors in our base model predictions for the //! anomaly. - double m_SumPredictionError = 0.0; + double m_SumPredictionError{0.0}; //! The mean of minus the log probabilities from our base model //! in the anomaly. @@ -385,7 +375,7 @@ class CTimeSeriesAnomalyModel { private: //! The data bucketing interval. - core_t::TTime m_BucketLength; + core_t::TTime m_BucketLength{0}; //! The current anomaly (if there is one). TOptionalAnomaly m_Anomaly; @@ -394,7 +384,7 @@ class CTimeSeriesAnomalyModel { TMultivariateNormalConjugateVec m_AnomalyFeatureModels; }; -CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel() : m_BucketLength(0) { +CTimeSeriesAnomalyModel::CTimeSeriesAnomalyModel() { m_AnomalyFeatureModels.reserve(2); m_AnomalyFeatureModels.push_back( TMultivariateNormalConjugate::nonInformativePrior(maths_t::E_ContinuousData)); @@ -682,7 +672,7 @@ CUnivariateTimeSeriesModel::TSize2Vec1Vec CUnivariateTimeSeriesModel::correlates void CUnivariateTimeSeriesModel::addBucketValue(const TTimeDouble2VecSizeTrVec& values) { for (const auto& value : values) { m_ResidualModel->adjustOffset( - {m_TrendModel->detrend(value.first, value.second[0], 0.0)}, + {m_TrendModel->detrend(value.first, value.second[0], 0.0, m_IsNonNegative)}, maths_t::CUnitWeights::SINGLE_UNIT); } } @@ -730,7 +720,7 @@ void CUnivariateTimeSeriesModel::skipTime(core_t::TTime gap) { CUnivariateTimeSeriesModel::TDouble2Vec CUnivariateTimeSeriesModel::mode(core_t::TTime time, const TDouble2VecWeightsAry& weights) const { return {m_ResidualModel->marginalLikelihoodMode(unpack(weights)) + - common::CBasicStatistics::mean(m_TrendModel->value(time))}; + m_TrendModel->value(time, 0.0, m_IsNonNegative).mean()}; } CUnivariateTimeSeriesModel::TDouble2Vec1Vec @@ -745,16 +735,19 @@ CUnivariateTimeSeriesModel::correlateModes(core_t::TTime time, if (this->correlationModels(correlated, variables, correlationModels, correlatedTimeSeriesModels)) { result.resize(correlated.size(), TDouble10Vec(2)); - double baseline[2]; - baseline[0] = common::CBasicStatistics::mean(m_TrendModel->value(time)); + baseline[0] = m_TrendModel->value(time, 0.0, m_IsNonNegative).mean(); for (std::size_t i = 0; i < correlated.size(); ++i) { - baseline[1] = common::CBasicStatistics::mean( - correlatedTimeSeriesModels[i]->m_TrendModel->value(time)); + baseline[1] = correlatedTimeSeriesModels[i] + ->m_TrendModel + ->value(time, 0.0, correlatedTimeSeriesModels[i]->m_IsNonNegative) + .mean(); TDouble10Vec mode(correlationModels[i].first->marginalLikelihoodMode( CMultivariateTimeSeriesModel::unpack(weights[i]))); - result[i][variables[i][0]] = baseline[0] + mode[variables[i][0]]; - result[i][variables[i][1]] = baseline[1] + mode[variables[i][1]]; + std::size_t v0{variables[i][0]}; + std::size_t v1{variables[i][1]}; + result[i][v0] = baseline[0] + mode[v0]; + result[i][v1] = baseline[1] + mode[v1]; } } @@ -767,7 +760,7 @@ CUnivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights) TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(unpack(weights))); result.reserve(modes.size()); for (auto mode : modes) { - result.push_back({mode}); + result.emplace_back(mode); } return result; } @@ -780,23 +773,26 @@ void CUnivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time, } if (value[0].size() == 1) { - value[0][0] = m_TrendModel->detrend(time[0][0], value[0][0], confidenceInterval); - } else { - TSize1Vec correlated; - TSize2Vec1Vec variables; - TMultivariatePriorCPtrSizePr1Vec correlationModels; - TModelCPtr1Vec correlatedTimeSeriesModels; - if (this->correlationModels(correlated, variables, correlationModels, - correlatedTimeSeriesModels)) { - for (std::size_t i = 0; i < variables.size(); ++i) { - if (!value[i].empty()) { - value[i][variables[i][0]] = m_TrendModel->detrend( - time[i][variables[i][0]], value[i][variables[i][0]], confidenceInterval); - value[i][variables[i][1]] = - correlatedTimeSeriesModels[i]->m_TrendModel->detrend( - time[i][variables[i][1]], value[i][variables[i][1]], - confidenceInterval); - } + value[0][0] = m_TrendModel->detrend(time[0][0], value[0][0], + confidenceInterval, m_IsNonNegative); + return; + } + + TSize1Vec correlated; + TSize2Vec1Vec variables; + TMultivariatePriorCPtrSizePr1Vec correlationModels; + TModelCPtr1Vec correlatedTimeSeriesModels; + if (this->correlationModels(correlated, variables, correlationModels, + correlatedTimeSeriesModels)) { + for (std::size_t i = 0; i < variables.size(); ++i) { + if (value[i].empty() == false) { + std::size_t v0{variables[i][0]}; + std::size_t v1{variables[i][1]}; + value[i][v0] = m_TrendModel->detrend( + time[i][v0], value[i][v0], confidenceInterval, m_IsNonNegative); + value[i][v1] = correlatedTimeSeriesModels[i]->m_TrendModel->detrend( + time[i][v1], value[i][v1], confidenceInterval, + correlatedTimeSeriesModels[i]->m_IsNonNegative); } } } @@ -807,15 +803,16 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& correlatedValue, TDouble2Vec hint) const { double correlateCorrection{0.0}; - if (!correlatedValue.empty()) { + if (correlatedValue.empty() == false) { TSize1Vec correlated{correlatedValue[0].first}; TSize2Vec1Vec variables; TMultivariatePriorCPtrSizePr1Vec correlationModel; - TModelCPtr1Vec correlatedModel; - if (m_Correlations->correlationModels(m_Id, correlated, variables, - correlationModel, correlatedModel)) { - double sample{correlatedModel[0]->m_TrendModel->detrend( - time, correlatedValue[0].second, 0.0)}; + TModelCPtr1Vec correlatedTimeSeriesModels; + if (m_Correlations->correlationModels(m_Id, correlated, variables, correlationModel, + correlatedTimeSeriesModels)) { + double sample{correlatedTimeSeriesModels[0]->m_TrendModel->detrend( + time, correlatedValue[0].second, 0.0, + correlatedTimeSeriesModels[0]->m_IsNonNegative)}; TSize10Vec marginalize{variables[0][1]}; TSizeDoublePr10Vec condition{{variables[0][1], sample}}; const common::CMultivariatePrior* joint{correlationModel[0].first}; @@ -830,11 +827,11 @@ CUnivariateTimeSeriesModel::predict(core_t::TTime time, double trend{0.0}; if (m_TrendModel->initialized()) { - trend = common::CBasicStatistics::mean(m_TrendModel->value(time)); + trend = m_TrendModel->value(time, 0.0, m_IsNonNegative).mean(); } if (hint.size() == 1) { - hint[0] = m_TrendModel->detrend(time, hint[0], 0.0); + hint[0] = m_TrendModel->detrend(time, hint[0], 0.0, m_IsNonNegative); } double median{ @@ -854,11 +851,11 @@ CUnivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, double confidenceInterval, const TDouble2VecWeightsAry& weights_) const { if (m_ResidualModel->isNonInformative()) { - return TDouble2Vec3Vec(); + return {}; } double trend{m_TrendModel->initialized() - ? common::CBasicStatistics::mean(m_TrendModel->value(time, confidenceInterval)) + ? m_TrendModel->value(time, 0.0, m_IsNonNegative).mean() : 0.0}; TDoubleWeightsAry weights(unpack(weights_)); @@ -915,7 +912,8 @@ bool CUnivariateTimeSeriesModel::forecast(core_t::TTime firstDataTime, }; m_TrendModel->forecast(startTime, endTime, bucketLength, confidenceInterval, - this->params().minimumSeasonalVarianceScale(), writer); + this->params().minimumSeasonalVarianceScale(), + m_IsNonNegative, writer); return true; } @@ -949,8 +947,8 @@ bool CUnivariateTimeSeriesModel::uncorrelatedProbability( double pu; maths_t::ETail tail; core_t::TTime time{time_[0][0]}; - TDouble1Vec sample{m_TrendModel->detrend(time, value[0][0], - params.seasonalConfidenceInterval())}; + TDouble1Vec sample{m_TrendModel->detrend( + time, value[0][0], params.seasonalConfidenceInterval(), m_IsNonNegative)}; if (m_ResidualModel->probabilityOfLessLikelySamples(calculation, sample, weights, pl, pu, tail)) { LOG_TRACE(<< "P(" << sample << " | weight = " << weights @@ -970,7 +968,7 @@ bool CUnivariateTimeSeriesModel::uncorrelatedProbability( double pMultiBucket{1.0}; TDouble1Vec feature; std::tie(feature, std::ignore) = m_MultibucketFeature->value(); - if (feature.size() > 0) { + if (feature.empty() == false) { for (auto calculation_ : expand(calculation)) { maths_t::ETail dummy; if (m_MultibucketFeatureModel->probabilityOfLessLikelySamples( @@ -1073,11 +1071,16 @@ bool CUnivariateTimeSeriesModel::correlatedProbability( trendModels[v0] = m_TrendModel; trendModels[v1] = correlatedTimeSeriesModels[correlateIndex]->m_TrendModel; const auto& correlationModel = correlationModels[correlateIndex].first; - - sample[0][0] = trendModels[0]->detrend( - time[i][0], value[i][0], params.seasonalConfidenceInterval()); - sample[0][1] = trendModels[1]->detrend( - time[i][1], value[i][1], params.seasonalConfidenceInterval()); + bool isNonNegative[2]; + isNonNegative[v0] = m_IsNonNegative; + isNonNegative[v1] = correlatedTimeSeriesModels[correlateIndex]->m_IsNonNegative; + + sample[0][0] = trendModels[0]->detrend(time[i][0], value[i][0], + params.seasonalConfidenceInterval(), + isNonNegative[0]); + sample[0][1] = trendModels[1]->detrend(time[i][1], value[i][1], + params.seasonalConfidenceInterval(), + isNonNegative[1]); weights[0] = CMultivariateTimeSeriesModel::unpack(params.weights()[i]); if (correlationModel->probabilityOfLessLikelySamples( @@ -1159,13 +1162,13 @@ void CUnivariateTimeSeriesModel::countWeights(core_t::TTime time, double countVarianceScale, TDouble2VecWeightsAry& trendWeights, TDouble2VecWeightsAry& residuaWeights) const { - if (m_TrendModel->seasonalComponents().size() > 0) { + if (m_TrendModel->seasonalComponents().empty() == false) { countVarianceScale = 1.0; } TDouble2Vec seasonalWeight; this->seasonalWeight(0.0, time, seasonalWeight); - double sample{m_TrendModel->detrend(time, value[0], 0.0)}; + double sample{m_TrendModel->detrend(time, value[0], 0.0, m_IsNonNegative)}; auto weights = maths_t::CUnitWeights::UNIT; maths_t::setCount(std::min(residualCountWeight / trendCountWeight, 1.0), weights); maths_t::setSeasonalVarianceScale(seasonalWeight[0], weights); @@ -1208,9 +1211,8 @@ void CUnivariateTimeSeriesModel::addCountWeights(core_t::TTime time, void CUnivariateTimeSeriesModel::seasonalWeight(double confidence, core_t::TTime time, TDouble2Vec& weight) const { - double scale{m_TrendModel - ->varianceScaleWeight(time, m_ResidualModel->marginalLikelihoodVariance(), confidence) - .second}; + double scale{m_TrendModel->varianceScaleWeight( + time, m_ResidualModel->marginalLikelihoodVariance(), confidence)(1)}; weight.assign(1, std::max(scale, this->params().minimumSeasonalVarianceScale())); } @@ -1471,7 +1473,8 @@ CUnivariateTimeSeriesModel::updateResidualModels(const common::CModelAddSamplesP TTimeDouble2VecSizeTrVec samples) { for (auto& residual : samples) { - residual.second[0] = m_TrendModel->detrend(residual.first, residual.second[0], 0.0); + residual.second[0] = m_TrendModel->detrend( + residual.first, residual.second[0], 0.0, m_IsNonNegative); } // We add the samples in value order since it makes clustering more stable. @@ -1604,7 +1607,7 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent(TFloatMeanAc m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); // Reinitialize the residual model with any values we've been given. - if (residuals.size() > 0) { + if (residuals.empty() == false) { maths_t::TDoubleWeightsAry1Vec weights(1); double buckets{std::accumulate(residuals.begin(), residuals.end(), 0.0, [](auto partialBuckets, const auto& residual) { @@ -1648,12 +1651,12 @@ bool CUnivariateTimeSeriesModel::correlationModels(TSize1Vec& correlated, TSize2Vec1Vec& variables, TMultivariatePriorCPtrSizePr1Vec& correlationModels, TModelCPtr1Vec& correlatedTimeSeriesModels) const { - if (m_Correlations) { + if (m_Correlations != nullptr) { correlated = m_Correlations->correlated(m_Id); m_Correlations->correlationModels(m_Id, correlated, variables, correlationModels, correlatedTimeSeriesModels); } - return correlated.size() > 0; + return correlated.empty() == false; } CTimeSeriesCorrelations::CTimeSeriesCorrelations(double minimumSignificantCorrelation, @@ -1721,7 +1724,7 @@ void CTimeSeriesCorrelations::processSamples() { SSampleData* samples2{&i2->second}; std::size_t n1{samples1->s_Times.size()}; std::size_t n2{samples2->s_Times.size()}; - std::size_t indices[] = {0, 1}; + std::size_t indices[]{0, 1}; if (n1 < n2) { std::swap(samples1, samples2); std::swap(n1, n2); @@ -1747,7 +1750,7 @@ void CTimeSeriesCorrelations::processSamples() { } for (std::size_t j1 = 0; j1 < n1; ++j1) { - std::size_t j2{0u}; + std::size_t j2{0}; if (n2 > 1) { std::size_t tag{samples1->s_Tags[j1]}; core_t::TTime time{samples1->s_Times[j1]}; @@ -1791,7 +1794,6 @@ void CTimeSeriesCorrelations::processSamples() { } void CTimeSeriesCorrelations::refresh(const CTimeSeriesCorrelateModelAllocator& allocator) { - using TDoubleVec = std::vector; using TSizeSizePrVec = std::vector; if (m_Correlations.changed()) { @@ -2103,7 +2105,7 @@ bool CTimeSeriesCorrelations::correlationModels(std::size_t id, correlatedTimeSeriesModels.push_back(m_TimeSeriesModels[correlate]); } - return correlationModels.size() > 0; + return correlationModels.empty() == false; } void CTimeSeriesCorrelations::refreshLookup() { @@ -2262,7 +2264,7 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, TDouble2Vec result(dimension); TDouble10Vec mode(m_ResidualModel->marginalLikelihoodMode(unpack(weights))); for (std::size_t d = 0; d < dimension; ++d) { - result[d] = mode[d] + common::CBasicStatistics::mean(m_TrendModel[d]->value(time)); + result[d] = mode[d] + m_TrendModel[d]->value(time, 0.0, m_IsNonNegative).mean(); } return result; } @@ -2290,7 +2292,8 @@ void CMultivariateTimeSeriesModel::detrend(const TTime2Vec1Vec& time_, std::size_t dimension{this->dimension()}; core_t::TTime time{time_[0][0]}; for (std::size_t d = 0; d < dimension; ++d) { - value[0][d] = m_TrendModel[d]->detrend(time, value[0][d], confidenceInterval); + value[0][d] = m_TrendModel[d]->detrend(time, value[0][d], + confidenceInterval, m_IsNonNegative); } } @@ -2298,13 +2301,11 @@ CMultivariateTimeSeriesModel::TDouble2Vec CMultivariateTimeSeriesModel::predict(core_t::TTime time, const TSizeDoublePr1Vec& /*correlated*/, TDouble2Vec hint) const { - using TUnivariatePriorPtr = std::shared_ptr; - std::size_t dimension{this->dimension()}; if (hint.size() == dimension) { for (std::size_t d = 0; d < dimension; ++d) { - hint[d] = m_TrendModel[d]->detrend(time, hint[d], 0.0); + hint[d] = m_TrendModel[d]->detrend(time, hint[d], 0.0, m_IsNonNegative); } } @@ -2316,10 +2317,10 @@ CMultivariateTimeSeriesModel::predict(core_t::TTime time, for (std::size_t d = 0; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { double trend{0.0}; if (m_TrendModel[d]->initialized()) { - trend = common::CBasicStatistics::mean(m_TrendModel[d]->value(time)); + trend = m_TrendModel[d]->value(time, 0.0, m_IsNonNegative).mean(); } double median{mean[d]}; - if (!m_ResidualModel->isNonInformative()) { + if (m_ResidualModel->isNonInformative() == false) { TUnivariatePriorPtr marginal{ m_ResidualModel->univariate(marginalize, NOTHING_TO_CONDITION).first}; median = hint.empty() @@ -2342,11 +2343,9 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, const TDouble2VecWeightsAry& weights_) const { if (m_ResidualModel->isNonInformative()) { - return TDouble2Vec3Vec(); + return {}; } - using TUnivariatePriorPtr = std::shared_ptr; - std::size_t dimension{this->dimension()}; TSize10Vec marginalize(dimension - 1); @@ -2357,7 +2356,7 @@ CMultivariateTimeSeriesModel::confidenceInterval(core_t::TTime time, maths_t::TDoubleWeightsAry weights{maths_t::CUnitWeights::UNIT}; for (std::size_t d = 0; d < dimension; --marginalize[std::min(d, dimension - 2)], ++d) { double trend{m_TrendModel[d]->initialized() - ? common::CBasicStatistics::mean(m_TrendModel[d]->value(time, confidenceInterval)) + ? m_TrendModel[d]->value(time, 0.0, m_IsNonNegative).mean() : 0.0}; for (std::size_t i = 0; i < maths_t::NUMBER_WEIGHT_STYLES; ++i) { @@ -2417,7 +2416,7 @@ bool CMultivariateTimeSeriesModel::probability(const common::CModelProbabilityPa TDouble10Vec1Vec sample{TDouble10Vec(dimension)}; for (std::size_t d = 0; d < dimension; ++d) { sample[0][d] = m_TrendModel[d]->detrend( - time, value[0][d], params.seasonalConfidenceInterval()); + time, value[0][d], params.seasonalConfidenceInterval(), m_IsNonNegative); } maths_t::TDouble10VecWeightsAry1Vec weights{unpack(params.weights()[0])}; @@ -2460,7 +2459,7 @@ bool CMultivariateTimeSeriesModel::probability(const common::CModelProbabilityPa if (m_MultibucketFeatureModel != nullptr && params.useMultibucketFeatures()) { TDouble10Vec1Vec feature; std::tie(feature, std::ignore) = m_MultibucketFeature->value(); - if (feature.size() > 0) { + if (feature.empty() == false) { TDouble10Vec2Vec pMultiBucket[2]{{{1.0}, {1.0}}, {{1.0}, {1.0}}}; for (auto calculation_ : expand(calculation)) { TDouble10Vec2Vec pl; @@ -2559,8 +2558,8 @@ void CMultivariateTimeSeriesModel::countWeights(core_t::TTime time, TDouble2Vec countVarianceScales(dimension, 1.0); TDouble10Vec sample(dimension); for (std::size_t d = 0; d < dimension; ++d) { - sample[d] = m_TrendModel[d]->detrend(time, value[d], 0.0); - if (m_TrendModel[d]->seasonalComponents().size() == 0) { + sample[d] = m_TrendModel[d]->detrend(time, value[d], 0.0, m_IsNonNegative); + if (m_TrendModel[d]->seasonalComponents().empty()) { trendCountWeights[d] /= countVarianceScale; countVarianceScales[d] = countVarianceScale; } @@ -2615,8 +2614,8 @@ void CMultivariateTimeSeriesModel::seasonalWeight(double confidence, weight.resize(dimension); TDouble10Vec variances(m_ResidualModel->marginalLikelihoodVariances()); for (std::size_t d = 0; d < dimension; ++d) { - double scale{ - m_TrendModel[d]->varianceScaleWeight(time, variances[d], confidence).second}; + double scale{m_TrendModel[d]->varianceScaleWeight(time, variances[d], + confidence)(1)}; weight[d] = std::max(scale, this->params().minimumSeasonalVarianceScale()); } } @@ -2847,7 +2846,7 @@ CMultivariateTimeSeriesModel::updateTrend(const common::CModelAddSamplesParams& if (result == E_Reset) { TFloatMeanAccumulatorVec10Vec window(dimension); for (std::size_t d = 0; d < dimension; ++d) { - window[d] = m_TrendModel[d]->residuals(); + window[d] = m_TrendModel[d]->residuals(m_IsNonNegative); } this->reinitializeStateGivenNewComponent(std::move(window)); } @@ -2863,7 +2862,8 @@ void CMultivariateTimeSeriesModel::updateResidualModels(const common::CModelAddS for (auto& residual : samples) { core_t::TTime time{residual.first}; for (std::size_t d = 0; d < dimension; ++d) { - residual.second[d] = m_TrendModel[d]->detrend(time, residual.second[d], 0.0); + residual.second[d] = m_TrendModel[d]->detrend(time, residual.second[d], + 0.0, m_IsNonNegative); } } @@ -2967,9 +2967,6 @@ void CMultivariateTimeSeriesModel::appendPredictionErrors(double interval, void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent(TFloatMeanAccumulatorVec10Vec residuals) { - using TDoubleVec = std::vector; - using TDouble10VecVec = std::vector; - if (m_Controllers != nullptr) { m_ResidualModel->decayRate(m_ResidualModel->decayRate() / (*m_Controllers)[E_ResidualControl].multiplier()); @@ -2984,7 +2981,7 @@ void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent(TFloatMean // Reinitialize the residual model with any values we've been given. m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); - if (residuals.size() > 0) { + if (residuals.empty() == false) { std::size_t dimension{this->dimension()}; TDouble10VecVec samples; diff --git a/lib/maths/time_series/CTrendComponent.cc b/lib/maths/time_series/CTrendComponent.cc index d1b62d5e1e..2a5cf37027 100644 --- a/lib/maths/time_series/CTrendComponent.cc +++ b/lib/maths/time_series/CTrendComponent.cc @@ -42,7 +42,7 @@ namespace ml { namespace maths { namespace time_series { namespace { -using TDoubleDoublePr = std::pair; +using TVector2x1 = CTrendComponent::TVector2x1; const double TIME_SCALES[]{144.0, 72.0, 36.0, 12.0, 4.0, 1.0, 0.25, 0.05}; const std::size_t NUMBER_MODELS{std::size(TIME_SCALES)}; @@ -68,20 +68,20 @@ double scaleTime(core_t::TTime time, core_t::TTime origin) { } //! Get the \p confidence interval for \p prediction and \p variance. -TDoubleDoublePr confidenceInterval(double prediction, double variance, double confidence) { +TVector2x1 confidenceInterval(double prediction, double variance, double confidence) { if (variance > 0.0) { try { boost::math::normal normal{prediction, std::sqrt(variance)}; double ql{boost::math::quantile(normal, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(normal, (100.0 + confidence) / 200.0)}; - return {ql, qu}; + return TVector2x1{{ql, qu}}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", prediction = " << prediction << ", variance = " << variance << ", confidence = " << confidence); } } - return {prediction, prediction}; + return TVector2x1{prediction}; } common::CNaiveBayesFeatureDensityFromPrior naiveBayesExemplar(double decayRate) { @@ -311,7 +311,7 @@ void CTrendComponent::dontShiftLevel(core_t::TTime time, double value) { } void CTrendComponent::linearScale(core_t::TTime time, double scale) { - double shift{(scale - 1.0) * common::CBasicStatistics::mean(this->value(time, 0.0))}; + double shift{(scale - 1.0) * this->value(time, 0.0).mean()}; this->shiftLevel(shift); } @@ -330,7 +330,7 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) { // Update the models. - double prediction{common::CBasicStatistics::mean(this->value(time, 0.0))}; + double prediction{this->value(time, 0.0).mean()}; double count{this->count()}; if (count > 0.0) { @@ -343,7 +343,7 @@ void CTrendComponent::add(core_t::TTime time, double value, double weight) { double scaledTime{scaleTime(time, m_RegressionOrigin)}; for (auto& model : m_TrendModels) { - TVector mse; + TVector3x1 mse; for (std::size_t order = 1; order <= TRegression::N; ++order) { mse(order - 1) = value - model.s_Regression.predict(order, scaledTime, MAX_CONDITION); } @@ -384,10 +384,9 @@ void CTrendComponent::propagateForwardsByTime(core_t::TTime interval) { m_MagnitudeOfLevelChangeModel.propagateForwardsByTime(interval_); } -CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, - double confidence) const { +CTrendComponent::TVector2x1 CTrendComponent::value(core_t::TTime time, double confidence) const { if (this->initialized() == false) { - return {0.0, 0.0}; + return TVector2x1{0.0}; } double a{this->weightOfPrediction(time)}; @@ -420,7 +419,7 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::value(core_t::TTime time, return confidenceInterval(prediction, variance, confidence); } - return {prediction, prediction}; + return TVector2x1{prediction}; } CTrendComponent::TPredictor CTrendComponent::predictor() const { @@ -469,10 +468,10 @@ core_t::TTime CTrendComponent::maximumForecastInterval() const { return static_cast(interval * timescale); } -CTrendComponent::TDoubleDoublePr CTrendComponent::variance(double confidence) const { +CTrendComponent::TVector2x1 CTrendComponent::variance(double confidence) const { if (this->initialized() == false) { - return {0.0, 0.0}; + return TVector2x1{0.0}; } double variance{m_PredictionErrorVariance}; @@ -483,20 +482,21 @@ CTrendComponent::TDoubleDoublePr CTrendComponent::variance(double confidence) co boost::math::chi_squared chi{df}; double ql{boost::math::quantile(chi, (100.0 - confidence) / 200.0)}; double qu{boost::math::quantile(chi, (100.0 + confidence) / 200.0)}; - return {ql * variance / df, qu * variance / df}; + return TVector2x1{{ql * variance / df, qu * variance / df}}; } catch (const std::exception& e) { LOG_ERROR(<< "Failed calculating confidence interval: " << e.what() << ", df = " << df << ", confidence = " << confidence); } } - return {variance, variance}; + return TVector2x1{variance}; } void CTrendComponent::forecast(core_t::TTime startTime, core_t::TTime endTime, core_t::TTime step, double confidence, + bool isNonNegative, const TSeasonalForecast& seasonal, const TWriteForecastResult& writer) const { if (endTime < startTime) { @@ -518,10 +518,10 @@ void CTrendComponent::forecast(core_t::TTime startTime, TDoubleVec factors; this->smoothingFactors(step, factors); - TDoubleVec modelWeights(this->initialForecastModelWeights()); - TDoubleVec errorWeights(this->initialForecastErrorWeights()); + TDoubleVec modelWeights(this->initialForecastModelWeights(NUMBER_MODELS)); + TDoubleVec errorWeights(this->initialForecastModelWeights(NUMBER_MODELS + 1)); TRegressionArrayVec models(NUMBER_MODELS); - TMatrixVec modelCovariances(NUMBER_MODELS); + TMatrix3x3Vec modelCovariances(NUMBER_MODELS); TDoubleVec mse(NUMBER_MODELS); for (std::size_t i = 0; i < NUMBER_MODELS; ++i) { // Note in the following we multiply the bias by the sample count @@ -552,7 +552,7 @@ void CTrendComponent::forecast(core_t::TTime startTime, TDoubleVec variances(NUMBER_MODELS + 1); for (core_t::TTime time = startTime; time < endTime; time += step) { double scaledDt{scaleTime(time, startTime)}; - TVector times({0.0, scaledDt, scaledDt * scaledDt}); + TVector3x1 times({0.0, scaledDt, scaledDt * scaledDt}); double a{this->weightOfPrediction(time)}; double b{1.0 - a}; @@ -574,21 +574,23 @@ void CTrendComponent::forecast(core_t::TTime startTime, for (std::size_t j = 0; j < NUMBER_MODELS; ++j) { variance_.add(variances[j], errorWeights[j]); } + double variance{a * common::CBasicStatistics::mean(variance_) + + b * common::CBasicStatistics::variance(m_ValueMoments)}; - double prediction{this->value(modelWeights, models, - scaleTime(time, m_RegressionOrigin))}; + TVector2x1 trend{confidenceInterval( + this->value(modelWeights, models, scaleTime(time, m_RegressionOrigin)), + variance, confidence)}; TDouble3Vec seasonal_(seasonal(time)); - TDouble3Vec level_(level.forecast(time, seasonal_[1] + prediction, confidence)); + TDouble3Vec level_(level.forecast(time, seasonal_[1] + trend.mean(), confidence)); - double ql; - double qu; - double variance{a * common::CBasicStatistics::mean(variance_) + - b * common::CBasicStatistics::variance(m_ValueMoments)}; - std::tie(ql, qu) = confidenceInterval(0.0, variance, confidence); + TDouble3Vec forecast{level_[0] + trend(0) + seasonal_[0], + level_[1] + trend.mean() + seasonal_[1], + level_[2] + trend(1) + seasonal_[2]}; + forecast[0] = isNonNegative ? std::max(forecast[0], 0.0) : forecast[0]; + forecast[1] = isNonNegative ? std::max(forecast[1], 0.0) : forecast[1]; + forecast[2] = isNonNegative ? std::max(forecast[2], 0.0) : forecast[2]; - writer(time, {level_[0] + seasonal_[0] + prediction + ql, - level_[1] + seasonal_[1] + prediction, - level_[2] + seasonal_[2] + prediction + qu}); + writer(time, forecast); } } @@ -671,22 +673,12 @@ CTrendComponent::TSizeVec CTrendComponent::selectModelOrdersForForecasting() con return result; } -CTrendComponent::TDoubleVec CTrendComponent::initialForecastModelWeights() const { - TDoubleVec result(NUMBER_MODELS); - for (std::size_t i = 0; i < NUMBER_MODELS; ++i) { - result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - - static_cast(i)); - } - return result; -} - -CTrendComponent::TDoubleVec CTrendComponent::initialForecastErrorWeights() const { - TDoubleVec result(NUMBER_MODELS + 1); - for (std::size_t i = 0; i < NUMBER_MODELS; ++i) { +CTrendComponent::TDoubleVec CTrendComponent::initialForecastModelWeights(std::size_t n) const { + TDoubleVec result(n); + for (std::size_t i = 0; i < n; ++i) { result[i] = std::exp(static_cast(NUMBER_MODELS / 2) - static_cast(i)); } - result[NUMBER_MODELS] = result[NUMBER_MODELS - 1] / std::exp(1.0); return result; } @@ -764,7 +756,7 @@ bool CTrendComponent::SModel::acceptRestoreTraverser(core::CStateRestoreTraverse // immediately after upgrade. These values will be aged out reasonably // quickly. - TVector mse; + TVector3x1 mse; for (std::size_t order = TRegression::N, scale = 1; order > 0; --order, scale *= 3) { mse(order - 1) = static_cast(scale) * diff --git a/lib/maths/time_series/unittest/CCalendarComponentTest.cc b/lib/maths/time_series/unittest/CCalendarComponentTest.cc index ab7553b24e..b494db269f 100644 --- a/lib/maths/time_series/unittest/CCalendarComponentTest.cc +++ b/lib/maths/time_series/unittest/CCalendarComponentTest.cc @@ -142,9 +142,7 @@ BOOST_FIXTURE_TEST_CASE(testFit, CTestFixture) { TMeanAccumulator mae; for (core_t::TTime time = 15724800 - timeZoneOffset; time < 15724800 - timeZoneOffset + DAY; time += HOUR) { - mae.add(std::fabs( - (maths::common::CBasicStatistics::mean(component.value(time, 0.0)) - trend(time)) / - trend(time))); + mae.add(std::fabs((component.value(time, 0.0).mean() - trend(time)) / trend(time))); } BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mae) < 0.06); } diff --git a/lib/maths/time_series/unittest/CSeasonalComponentTest.cc b/lib/maths/time_series/unittest/CSeasonalComponentTest.cc index e52e43f84b..5c27c05008 100644 --- a/lib/maths/time_series/unittest/CSeasonalComponentTest.cc +++ b/lib/maths/time_series/unittest/CSeasonalComponentTest.cc @@ -109,10 +109,6 @@ void generateSeasonalValues(test::CRandomNumbers& rng, } } -double mean(const TDoubleDoublePr& x) { - return (x.first + x.second) / 2.0; -} - const core_t::TTime FIVE_MINUTES{5 * core::constants::MINUTE}; const core_t::TTime TWO_HOURS{2 * core::constants::HOUR}; } @@ -171,9 +167,9 @@ BOOST_AUTO_TEST_CASE(testInitialize) { TMeanAccumulator meanError; for (std::size_t i = 20; i < 30; ++i) { - meanError.add(std::fabs(maths::common::CBasicStatistics::mean(component.value( - 2 * static_cast(i), 0.0)) - - static_cast(i % 10))); + meanError.add( + std::fabs(component.value(2 * static_cast(i), 0.0).mean() - + static_cast(i % 10))); } BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.3); } @@ -283,20 +279,21 @@ BOOST_AUTO_TEST_CASE(testConstant) { double error1 = 0.0; double error2 = 0.0; for (std::size_t j = 0; j < function.size(); ++j) { - TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); + auto interval = seasonal.value(time + function[j].first, 70.0); double f = function[j].second + residualMean; - double e = mean(interval) - f; + double e = interval.mean() - f; error1 += std::fabs(e); - error2 += std::max(std::max(interval.first - f, f - interval.second), 0.0); + error2 += std::max(std::max(interval(0) - f, f - interval(1)), 0.0); } if (d > 1) { - LOG_TRACE(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " - << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_TRACE( + << "f(0) = " << seasonal.value(time, 0.0).mean() << ", f(T) = " + << seasonal.value(time + core::constants::DAY - 1, 0.0).mean()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + seasonal.value(time, 0.0).mean(), + seasonal.value(time + core::constants::DAY - 1, 0.0).mean(), 0.1); } error1 /= static_cast(function.size()); error2 /= static_cast(function.size()); @@ -360,19 +357,20 @@ BOOST_AUTO_TEST_CASE(testStablePeriodic) { double error1 = 0.0; double error2 = 0.0; for (std::size_t j = 0; j < function.size(); ++j) { - TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); + auto interval = seasonal.value(time + function[j].first, 70.0); double f = residualMean + function[j].second; - double e = mean(interval) - f; + double e = interval.mean() - f; error1 += std::fabs(e); - error2 += std::max(std::max(interval.first - f, f - interval.second), 0.0); + error2 += std::max(std::max(interval(0) - f, f - interval(1)), 0.0); } if (d > 1) { - LOG_TRACE(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " - << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_TRACE( + << "f(0) = " << seasonal.value(time, 0.0).mean() << ", f(T) = " + << seasonal.value(time + core::constants::DAY - 1, 0.0).mean()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + seasonal.value(time, 0.0).mean(), + seasonal.value(time + core::constants::DAY - 1, 0.0).mean(), 0.1); } error1 /= static_cast(function.size()); @@ -452,20 +450,21 @@ BOOST_AUTO_TEST_CASE(testStablePeriodic) { double error1 = 0.0; double error2 = 0.0; for (std::size_t j = 0; j < function.size(); ++j) { - TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); + auto interval = seasonal.value(time + function[j].first, 70.0); double f = residualMean + function[j].second; - double e = mean(interval) - f; + double e = interval.mean() - f; error1 += std::fabs(e); - error2 += std::max(std::max(interval.first - f, f - interval.second), 0.0); + error2 += std::max(std::max(interval(0) - f, f - interval(1)), 0.0); } if (d > 1) { - LOG_TRACE(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " - << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_TRACE( + << "f(0) = " << seasonal.value(time, 0.0).mean() << ", f(T) = " + << seasonal.value(time + core::constants::DAY - 1, 0.0).mean()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + seasonal.value(time, 0.0).mean(), + seasonal.value(time + core::constants::DAY - 1, 0.0).mean(), 0.1); } error1 /= static_cast(function.size()); @@ -556,20 +555,21 @@ BOOST_AUTO_TEST_CASE(testTimeVaryingPeriodic) { double error1 = 0.0; double error2 = 0.0; for (std::size_t j = 0; j < function.size(); ++j) { - TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); + auto interval = seasonal.value(time + function[j].first, 70.0); double f = residualMean + scale * function[j].second; - double e = mean(interval) - f; + double e = interval.mean() - f; error1 += std::fabs(e); - error2 += std::max(std::max(interval.first - f, f - interval.second), 0.0); + error2 += std::max(std::max(interval(0) - f, f - interval(1)), 0.0); } if (d > 1) { - LOG_TRACE(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " - << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_TRACE( + << "f(0) = " << seasonal.value(time, 0.0).mean() << ", f(T) = " + << seasonal.value(time + core::constants::DAY - 1, 0.0).mean()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.4); + seasonal.value(time, 0.0).mean(), + seasonal.value(time + core::constants::DAY - 1, 0.0).mean(), 0.4); } error1 /= static_cast(function.size()); @@ -634,8 +634,7 @@ BOOST_AUTO_TEST_CASE(testWindowed) { for (core_t::TTime t = 0; t < core::constants::DAY; t += bucketLength) { double rt{weekendComponent.time().regression(time + t)}; error += std::fabs( - maths::common::CBasicStatistics::mean( - weekendComponent.value(time + t, 0.0)) - + weekendComponent.value(time + t, 0.0).mean() - weekendComponent.bucketing().regression(time + t)->predict(rt)); } error /= 48.0; @@ -653,8 +652,7 @@ BOOST_AUTO_TEST_CASE(testWindowed) { for (core_t::TTime t = 0; t < core::constants::DAY; t += bucketLength) { double rt{weekdayComponent.time().regression(time + t)}; error += std::fabs( - maths::common::CBasicStatistics::mean( - weekdayComponent.value(time + t, 0.0)) - + weekdayComponent.value(time + t, 0.0).mean() - weekdayComponent.bucketing().regression(time + t)->predict(rt)); } error /= 48.0; @@ -696,7 +694,7 @@ BOOST_AUTO_TEST_CASE(testVeryLowVariation) { double totalError1 = 0.0; double totalError2 = 0.0; core_t::TTime time = startTime; - for (std::size_t i = 0u, d = 0; i < n; ++i) { + for (std::size_t i = 0, d = 0; i < n; ++i) { seasonal.addPoint(samples[i].first, samples[i].second + residuals[i]); if (samples[i].first >= time + core::constants::DAY) { @@ -707,20 +705,21 @@ BOOST_AUTO_TEST_CASE(testVeryLowVariation) { double error1 = 0.0; double error2 = 0.0; for (std::size_t j = 0; j < function.size(); ++j) { - TDoubleDoublePr interval = seasonal.value(time + function[j].first, 70.0); + auto interval = seasonal.value(time + function[j].first, 70.0); double f = residualMean + function[j].second; - double e = mean(interval) - f; + double e = interval.mean() - f; error1 += std::fabs(e); - error2 += std::max(std::max(interval.first - f, f - interval.second), 0.0); + error2 += std::max(std::max(interval(0) - f, f - interval(1)), 0.0); } if (d > 1) { - LOG_TRACE(<< "f(0) = " << mean(seasonal.value(time, 0.0)) << ", f(T) = " - << mean(seasonal.value(time + core::constants::DAY - 1, 0.0))); + LOG_TRACE( + << "f(0) = " << seasonal.value(time, 0.0).mean() << ", f(T) = " + << seasonal.value(time + core::constants::DAY - 1, 0.0).mean()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - mean(seasonal.value(time, 0.0)), - mean(seasonal.value(time + core::constants::DAY - 1, 0.0)), 0.1); + seasonal.value(time, 0.0).mean(), + seasonal.value(time + core::constants::DAY - 1, 0.0).mean(), 0.1); } error1 /= static_cast(function.size()); error2 /= static_cast(function.size()); @@ -769,14 +768,14 @@ BOOST_AUTO_TEST_CASE(testVariance) { core_t::TTime t = (i * core::constants::DAY) / 48; double v_ = 80.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast(i) / 48.0); - TDoubleDoublePr vv = seasonal.variance(t, 98.0); - double v = (vv.first + vv.second) / 2.0; - LOG_TRACE(<< "v_ = " << v_ << ", v = " << core::CContainerPrinter::print(vv) + auto interval = seasonal.variance(t, 98.0); + double v = interval.mean(); + LOG_TRACE(<< "v_ = " << v_ << ", v = " << interval << ", relative error = " << std::fabs(v - v_) / v_); BOOST_REQUIRE_CLOSE_ABSOLUTE(v_, v, 0.5 * v_); - BOOST_TEST_REQUIRE(v_ > vv.first); - BOOST_TEST_REQUIRE(v_ < vv.second); + BOOST_TEST_REQUIRE(v_ > interval(0)); + BOOST_TEST_REQUIRE(v_ < interval(1)); error.add(std::fabs(v - v_) / v_); } @@ -823,12 +822,8 @@ BOOST_AUTO_TEST_CASE(testPrecession) { for (core_t::TTime time = TWO_HOURS; time < core::constants::WEEK; time += FIVE_MINUTES) { seasonalWithShift.addPoint(time, trend(time)); seasonalWithoutShift.addPoint(time, trend(time)); - double errorWithShift{ - maths::common::CBasicStatistics::mean(seasonalWithShift.value(time, 0.0)) - - trend(time)}; - double errorWithoutShift{maths::common::CBasicStatistics::mean( - seasonalWithoutShift.value(time, 0.0)) - - trend(time)}; + double errorWithShift{seasonalWithShift.value(time, 0.0).mean() - trend(time)}; + double errorWithoutShift{seasonalWithoutShift.value(time, 0.0).mean() - trend(time)}; meanErrorWithShift.add(std::fabs(errorWithShift)); meanErrorWithoutShift.add(std::fabs(errorWithoutShift)); } @@ -883,12 +878,8 @@ BOOST_AUTO_TEST_CASE(testWithRandomShifts) { for (core_t::TTime time = TWO_HOURS; time < core::constants::WEEK; time += FIVE_MINUTES) { seasonalWithShift.addPoint(time, trend(time)); seasonalWithoutShift.addPoint(time, trend(time)); - double errorWithShift{ - maths::common::CBasicStatistics::mean(seasonalWithShift.value(time, 0.0)) - - trend(time)}; - double errorWithoutShift{maths::common::CBasicStatistics::mean( - seasonalWithoutShift.value(time, 0.0)) - - trend(time)}; + double errorWithShift{seasonalWithShift.value(time, 0.0).mean() - trend(time)}; + double errorWithoutShift{seasonalWithoutShift.value(time, 0.0).mean() - trend(time)}; meanErrorWithShift.add(std::fabs(errorWithShift)); meanErrorWithoutShift.add(std::fabs(errorWithoutShift)); diff --git a/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc index 4c31890804..f973ea89c3 100644 --- a/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc @@ -61,10 +61,6 @@ using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::T using TFloatMeanAccumulatorVec = std::vector::TAccumulator>; -double mean(const TDoubleDoublePr& x) { - return (x.first + x.second) / 2.0; -} - class CDebugGenerator { public: static const bool ENABLED{false}; @@ -213,17 +209,17 @@ BOOST_FIXTURE_TEST_CASE(testSuperpositionOfSines, CTestFixture) { double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = std::fabs(trend[t / HALF_HOUR] - mean(prediction)); + auto prediction = decomposition.value(t, 70.0, false); + double residual = std::fabs(trend[t / HALF_HOUR] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += - std::max(std::max(prediction.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - prediction.second), + std::max(std::max(prediction(0) - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction(1)), 0.0); - debug.addPrediction(t, mean(prediction), residual); + debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -302,15 +298,15 @@ BOOST_FIXTURE_TEST_CASE(testDistortedPeriodicProblemCase, CTestFixture) { static_cast(t / HOUR) < timeseries.size(); t += HOUR) { double actual = timeseries[t / HOUR].second; - TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = std::fabs(actual - mean(prediction)); + auto prediction = decomposition.value(t, 70.0, false); + double residual = std::fabs(actual - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(actual); maxValue = std::max(maxValue, std::fabs(actual)); percentileError += std::max( - std::max(prediction.first - actual, actual - prediction.second), 0.0); - debug.addPrediction(t, mean(prediction), residual); + std::max(prediction(0) - actual, actual - prediction(1)), 0.0); + debug.addPrediction(t, prediction.mean(), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -392,17 +388,17 @@ BOOST_FIXTURE_TEST_CASE(testMinimizeLongComponents, CTestFixture) { double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = std::fabs(trend[t / HALF_HOUR] - mean(prediction)); + auto prediction = decomposition.value(t, 70.0, false); + double residual = std::fabs(trend[t / HALF_HOUR] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += - std::max(std::max(prediction.first - trend[t / HALF_HOUR], - trend[t / HALF_HOUR] - prediction.second), + std::max(std::max(prediction(0) - trend[t / HALF_HOUR], + trend[t / HALF_HOUR] - prediction(1)), 0.0); - debug.addPrediction(t, mean(prediction), residual); + debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -499,16 +495,16 @@ BOOST_FIXTURE_TEST_CASE(testWeekend, CTestFixture) { double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr prediction = decomposition.value(t, 70.0); + auto prediction = decomposition.value(t, 70.0, false); double actual = trend[(t - offset) / HALF_HOUR]; - double residual = std::fabs(actual - mean(prediction)); + double residual = std::fabs(actual - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(actual); maxValue = std::max(maxValue, std::fabs(actual)); percentileError += std::max( - std::max(prediction.first - actual, actual - prediction.second), 0.0); - debug.addPrediction(t, mean(prediction), residual); + std::max(prediction(0) - actual, actual - prediction(1)), 0.0); + debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -586,9 +582,9 @@ BOOST_FIXTURE_TEST_CASE(testNanHandling, CTestFixture) { [&componentsModifiedAfter](TFloatMeanAccumulatorVec) { ++componentsModifiedAfter; }); - auto value = decomposition.value(time); - BOOST_TEST_REQUIRE(maths::common::CMathsFuncs::isFinite(value.first)); - BOOST_TEST_REQUIRE(maths::common::CMathsFuncs::isFinite(value.second)); + auto value = decomposition.value(time, 0.0, false); + BOOST_TEST_REQUIRE(maths::common::CMathsFuncs::isFinite(value(0))); + BOOST_TEST_REQUIRE(maths::common::CMathsFuncs::isFinite(value(1))); } // The call to 'addPoint' that results in the removal of the component @@ -652,18 +648,18 @@ BOOST_FIXTURE_TEST_CASE(testSinglePeriodicity, CTestFixture) { double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { - TDoubleDoublePr prediction = decomposition.value(t, 70.0); + auto prediction = decomposition.value(t, 70.0, false); double residual = - std::fabs(trend[t / HALF_HOUR] + noiseMean - mean(prediction)); + std::fabs(trend[t / HALF_HOUR] + noiseMean - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += std::max( - std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), - (trend[t / HALF_HOUR] + noiseMean) - prediction.second), + std::max(prediction(0) - (trend[t / HALF_HOUR] + noiseMean), + (trend[t / HALF_HOUR] + noiseMean) - prediction(1)), 0.0); - debug.addPrediction(t, mean(prediction), residual); + debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -757,17 +753,16 @@ BOOST_FIXTURE_TEST_CASE(testSeasonalOnset, CTestFixture) { double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HOUR) { - TDoubleDoublePr prediction = decomposition.value(t, 70.0); - double residual = std::fabs(trend[t / HOUR] - mean(prediction)); + auto prediction = decomposition.value(t, 70.0, false); + double residual = std::fabs(trend[t / HOUR] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HOUR])); - percentileError += - std::max(std::max(prediction.first - trend[t / HOUR], - trend[t / HOUR] - prediction.second), - 0.0); - debug.addPrediction(t, mean(prediction), residual); + percentileError += std::max(std::max(prediction(0) - trend[t / HOUR], + trend[t / HOUR] - prediction(1)), + 0.0); + debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " @@ -846,16 +841,14 @@ BOOST_FIXTURE_TEST_CASE(testVarianceScale, CTestFixture) { variance = 10.0; } double expectedScale = variance / meanVariance; - TDoubleDoublePr interval = - decomposition.varianceScaleWeight(time + t, meanVariance, 70.0); + auto interval = decomposition.varianceScaleWeight(time + t, meanVariance, 70.0); LOG_TRACE(<< "time = " << t << ", expectedScale = " << expectedScale - << ", scale = " << core::CContainerPrinter::print(interval)); - double scale = (interval.first + interval.second) / 2.0; + << ", scale = " << interval); + double scale = interval.mean(); error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); - percentileError.add(std::max(std::max(interval.first - expectedScale, - expectedScale - interval.second), - 0.0)); + percentileError.add(std::max( + std::max(interval(0) - expectedScale, expectedScale - interval(1)), 0.0)); } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(error)); @@ -899,16 +892,14 @@ BOOST_FIXTURE_TEST_CASE(testVarianceScale, CTestFixture) { variance = 10.0; } double expectedScale = variance / meanVariance; - TDoubleDoublePr interval = - decomposition.varianceScaleWeight(time + t, meanVariance, 70.0); + auto interval = decomposition.varianceScaleWeight(time + t, meanVariance, 70.0); LOG_TRACE(<< "time = " << t << ", expectedScale = " << expectedScale - << ", scale = " << core::CContainerPrinter::print(interval)); - double scale = (interval.first + interval.second) / 2.0; + << ", scale = " << interval); + double scale = interval.mean(); error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); - percentileError.add(std::max(std::max(interval.first - expectedScale, - expectedScale - interval.second), - 0.0)); + percentileError.add(std::max( + std::max(interval(0) - expectedScale, expectedScale - interval(1)), 0.0)); } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(error)); @@ -948,12 +939,10 @@ BOOST_FIXTURE_TEST_CASE(testVarianceScale, CTestFixture) { TMeanAccumulator meanScale; double meanVariance = decomposition.meanVariance(); for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { - TDoubleDoublePr interval = decomposition.varianceScaleWeight( - times.back() + t, meanVariance, 70.0); - LOG_TRACE(<< "time = " << t - << ", scale = " << core::CContainerPrinter::print(interval)); - double scale = (interval.first + interval.second) / 2.0; - meanScale.add(scale); + auto interval = decomposition.varianceScaleWeight(times.back() + t, + meanVariance, 70.0); + LOG_TRACE(<< "time = " << t << ", scale = " << interval); + meanScale.add(interval.mean()); } LOG_DEBUG(<< "mean scale = " << maths::common::CBasicStatistics::mean(meanScale)); @@ -1005,18 +994,19 @@ BOOST_FIXTURE_TEST_CASE(testSpikeyDataProblemCase, CTestFixture) { double percentileError = 0.0; for (std::size_t j = 0; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr prediction = - decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); + auto prediction = + decomposition.value(lastWeekTimeseries[j].first, 70.0, false); + double residual = + std::fabs(lastWeekTimeseries[j].second - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max( - std::max(prediction.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - prediction.second), - 0.0); - debug.addPrediction(lastWeekTimeseries[j].first, mean(prediction), residual); + percentileError += + std::max(std::max(prediction(0) - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction(1)), + 0.0); + debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " @@ -1049,7 +1039,7 @@ BOOST_FIXTURE_TEST_CASE(testSpikeyDataProblemCase, CTestFixture) { } } }); - model.addSamples({decomposition.detrend(time, value, 70.0)}, + model.addSamples({decomposition.detrend(time, value, 70.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT); debug.addValue(time, value); } @@ -1073,15 +1063,15 @@ BOOST_FIXTURE_TEST_CASE(testSpikeyDataProblemCase, CTestFixture) { double ub; maths_t::ETail tail; model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, {decomposition.detrend(time, value, 0.0)}, + maths_t::E_TwoSided, {decomposition.detrend(time, value, 0.0, false)}, {maths_t::seasonalVarianceScaleWeight(std::max( - decomposition.varianceScaleWeight(time, variance, 70.0).second, 0.25))}, + decomposition.varianceScaleWeight(time, variance, 70.0)(1), 0.25))}, lb, ub, tail); double pScaled = (lb + ub) / 2.0; pMinScaled = std::min(pMinScaled, pScaled); model.probabilityOfLessLikelySamples( - maths_t::E_TwoSided, {decomposition.detrend(time, value, 0.0)}, + maths_t::E_TwoSided, {decomposition.detrend(time, value, 0.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT, lb, ub, tail); double pUnscaled = (lb + ub) / 2.0; pMinUnscaled = std::min(pMinUnscaled, pUnscaled); @@ -1132,18 +1122,19 @@ BOOST_FIXTURE_TEST_CASE(testVeryLargeValuesProblemCase, CTestFixture) { double percentileError = 0.0; for (std::size_t j = 0; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr prediction = - decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); + auto prediction = + decomposition.value(lastWeekTimeseries[j].first, 70.0, false); + double residual = + std::fabs(lastWeekTimeseries[j].second - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max( - std::max(prediction.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - prediction.second), - 0.0); - debug.addPrediction(lastWeekTimeseries[j].first, mean(prediction), residual); + percentileError += + std::max(std::max(prediction(0) - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction(1)), + 0.0); + debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -1181,7 +1172,7 @@ BOOST_FIXTURE_TEST_CASE(testVeryLargeValuesProblemCase, CTestFixture) { double variance = decomposition.meanVariance(); core_t::TTime time = maths::common::CIntegerTools::floor(endTime, DAY); for (core_t::TTime t = time; t < time + WEEK; t += TEN_MINS) { - scale.add(mean(decomposition.varianceScaleWeight(t, variance, 70.0))); + scale.add(decomposition.varianceScaleWeight(t, variance, 70.0).mean()); } LOG_DEBUG(<< "scale = " << maths::common::CBasicStatistics::mean(scale)); @@ -1230,18 +1221,19 @@ BOOST_FIXTURE_TEST_CASE(testMixedSmoothAndSpikeyDataProblemCase, CTestFixture) { double percentileError = 0.0; for (std::size_t j = 0; j < lastWeekTimeseries.size(); ++j) { - TDoubleDoublePr prediction = - decomposition.value(lastWeekTimeseries[j].first, 70.0); - double residual = std::fabs(lastWeekTimeseries[j].second - mean(prediction)); + auto prediction = + decomposition.value(lastWeekTimeseries[j].first, 70.0, false); + double residual = + std::fabs(lastWeekTimeseries[j].second - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); - percentileError += std::max( - std::max(prediction.first - lastWeekTimeseries[j].second, - lastWeekTimeseries[j].second - prediction.second), - 0.0); - debug.addPrediction(lastWeekTimeseries[j].first, mean(prediction), residual); + percentileError += + std::max(std::max(prediction(0) - lastWeekTimeseries[j].second, + lastWeekTimeseries[j].second - prediction(1)), + 0.0); + debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1305,8 +1297,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalPeriodicityWithMissingValues, CTestFixture) { value += noise[0]; decomposition.addPoint(time, value); debug.addValue(time, value); - double prediction = maths::common::CBasicStatistics::mean( - decomposition.value(time, 0.0)); + double prediction = decomposition.value(time, 0.0, false).mean(); if (decomposition.initialized()) { error.add(std::fabs(value - prediction) / std::fabs(value)); } @@ -1352,8 +1343,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalPeriodicityWithMissingValues, CTestFixture) { value += noise[0]; decomposition.addPoint(time, value); debug.addValue(time, value); - double prediction = maths::common::CBasicStatistics::mean( - decomposition.value(time, 0.0)); + double prediction = decomposition.value(time, 0.0, false).mean(); if (decomposition.initialized()) { error.add(std::fabs(value - prediction) / std::fabs(value)); } @@ -1411,13 +1401,13 @@ BOOST_FIXTURE_TEST_CASE(testLongTermTrend, CTestFixture) { double maxValue = 0.0; for (std::size_t j = i - 48; j < i; ++j) { - TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); - double residual = std::fabs(trend[j] - mean(prediction)); + auto prediction = decomposition.value(times[j], 70.0, false); + double residual = std::fabs(trend[j] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); - debug.addPrediction(times[j], mean(prediction), residual); + debug.addPrediction(times[j], prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " @@ -1487,13 +1477,13 @@ BOOST_FIXTURE_TEST_CASE(testLongTermTrend, CTestFixture) { double maxValue = 0.0; for (std::size_t j = i - 48; j < i; ++j) { - TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); - double residual = std::fabs(trend[j] - mean(prediction)); + auto prediction = decomposition.value(times[j], 70.0, false); + double residual = std::fabs(trend[j] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); - debug.addPrediction(times[j], mean(prediction), residual); + debug.addPrediction(times[j], prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " @@ -1562,13 +1552,13 @@ BOOST_FIXTURE_TEST_CASE(testLongTermTrendAndPeriodicity, CTestFixture) { double maxValue = 0.0; for (std::size_t j = i - 48; j < i; ++j) { - TDoubleDoublePr prediction = decomposition.value(times[j], 70.0); - double residual = std::fabs(trend[j] - mean(prediction)); + auto prediction = decomposition.value(times[j], 70.0, false); + double residual = std::fabs(trend[j] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); - debug.addPrediction(times[j], mean(prediction), residual); + debug.addPrediction(times[j], prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " @@ -1643,7 +1633,8 @@ BOOST_FIXTURE_TEST_CASE(testNonDiurnal, CTestFixture) { double maxValue = 0.0; for (std::size_t j = i - 12; j < i; ++j) { - double prediction = mean(decomposition.value(times[j], 70.0)); + double prediction = + decomposition.value(times[j], 70.0, false).mean(); double residual = std::fabs(trend[j] - prediction); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1717,7 +1708,8 @@ BOOST_FIXTURE_TEST_CASE(testNonDiurnal, CTestFixture) { double maxValue = 0.0; for (std::size_t j = i - 288; j < i; ++j) { - double prediction = mean(decomposition.value(times[j], 70.0)); + double prediction = + decomposition.value(times[j], 70.0, false).mean(); double residual = std::fabs(trend[j] - prediction); sumResidual += residual; maxResidual = std::max(maxResidual, residual); @@ -1781,8 +1773,8 @@ BOOST_FIXTURE_TEST_CASE(testPrecession, CTestFixture) { rng.generateNormalSamples(0.0, 0.1, 1, noise); decomposition.addPoint(time, trend + noise[0]); if (decomposition.initialized()) { - double prediction = mean(decomposition.value(time, 0.0)); - double residual = decomposition.detrend(time, trend, 0.0, FIVE_MINS); + double prediction = decomposition.value(time, 0.0, false).mean(); + double residual = decomposition.detrend(time, trend, 0.0, false, FIVE_MINS); sumResidual += std::fabs(residual); maxResidual = std::max(maxResidual, std::fabs(residual)); sumValue += std::fabs(trend); @@ -1839,8 +1831,8 @@ BOOST_FIXTURE_TEST_CASE(testRandomShifts, CTestFixture) { decomposition.addPoint(time, trend(time) + noise[0]); if (decomposition.initialized()) { - double prediction = mean(decomposition.value(time, 0.0)); - double residual = decomposition.detrend(time, trend(time), 0.0, FIVE_MINS); + double prediction = decomposition.value(time, 0.0, false).mean(); + double residual = decomposition.detrend(time, trend(time), 0.0, false, FIVE_MINS); sumResidual += residual; maxResidual = std::max(maxResidual, std::fabs(residual)); sumValue += std::fabs(trend(time)); @@ -1889,7 +1881,7 @@ BOOST_FIXTURE_TEST_CASE(testYearly, CTestFixture) { decomposition.addPoint(time, trend + noise[0]); if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; - TDouble1Vec predictionError{decomposition.detrend(time, trend, 0.0)}; + TDouble1Vec predictionError{decomposition.detrend(time, trend, 0.0, false)}; double multiplier{controller.multiplier(prediction, {predictionError}, 2 * HOUR, 1.0, 0.0005)}; decomposition.decayRate(multiplier * decomposition.decayRate()); @@ -1905,8 +1897,7 @@ BOOST_FIXTURE_TEST_CASE(testYearly, CTestFixture) { static_cast(time) / static_cast(YEAR))) + 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); - double prediction = - maths::common::CBasicStatistics::mean(decomposition.value(time, 0.0)); + double prediction = decomposition.value(time, 0.0, false).mean(); double error = std::fabs((prediction - trend) / trend); LOG_TRACE(<< "error = " << error); maxError = std::max(maxError, error); @@ -1966,8 +1957,7 @@ BOOST_FIXTURE_TEST_CASE(testWithOutliers, CTestFixture) { if (newComponents) { TMeanAccumulator error; for (core_t::TTime endTime = time + DAY; time < endTime; time += TEN_MINS) { - double prediction = maths::common::CBasicStatistics::mean( - decomposition.value(time, 0.0)); + double prediction = decomposition.value(time, 0.0, false).mean(); error.add(std::fabs(prediction - trend(time)) / trend(time)); debug.addValue(time, value); debug.addPrediction(time, prediction, trend(time) - prediction); @@ -2028,11 +2018,9 @@ BOOST_FIXTURE_TEST_CASE(testCalendar, CTestFixture) { std::size_t largeErrorCount = 0; for (core_t::TTime time_ = time - DAY; time_ < time; time_ += TEN_MINS) { - double prediction = - maths::common::CBasicStatistics::mean(decomposition.value(time_)); + double prediction = decomposition.value(time_, 0.0, false).mean(); double variance = - 4.0 * maths::common::CBasicStatistics::mean( - decomposition.varianceScaleWeight(time_, 4.0, 0.0)); + 4.0 * decomposition.varianceScaleWeight(time_, 4.0, 0.0).mean(); double actual = trend(time_); if (std::fabs(prediction - actual) / std::sqrt(variance) > 3.0) { LOG_TRACE(<< " prediction = " << prediction); @@ -2072,7 +2060,8 @@ BOOST_FIXTURE_TEST_CASE(testConditionOfTrend, CTestFixture) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); if (time > 10 * WEEK) { - BOOST_TEST_REQUIRE(std::fabs(decomposition.detrend(time, trend(time), 0.0)) < 3.0); + BOOST_TEST_REQUIRE( + std::fabs(decomposition.detrend(time, trend(time), 0.0, false)) < 3.0); } } } @@ -2123,13 +2112,13 @@ BOOST_FIXTURE_TEST_CASE(testComponentLifecycle, CTestFixture) { if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{ - decomposition.detrend(time, trend(time) + noise[0], 0.0)}; + decomposition.detrend(time, trend(time) + noise[0], 0.0, false)}; double multiplier{controller.multiplier(prediction, {predictionError}, FIVE_MINS, 1.0, 0.0001)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } - double prediction = mean(decomposition.value(time, 0.0)); + double prediction = decomposition.value(time, 0.0, false).mean(); if (time > 24 * WEEK) { errors[3].add(std::fabs(prediction - trend(time)) / trend(time)); } else if (time > 18 * WEEK && time < 21 * WEEK) { @@ -2170,13 +2159,13 @@ BOOST_FIXTURE_TEST_CASE(testStability, CTestFixture) { if (decomposition.initialized()) { TDouble1Vec mean{decomposition.meanValue(time)}; - TDouble1Vec predictionError{decomposition.detrend(time, trend(time), 0.0)}; + TDouble1Vec predictionError{decomposition.detrend(time, trend(time), 0.0, false)}; double multiplier{controller.multiplier(mean, {predictionError}, HALF_HOUR, 1.0, 0.0005)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } - double prediction{mean(decomposition.value(time, 0.0))}; + double prediction{decomposition.value(time, 0.0, false).mean()}; debug.addPrediction(time, prediction, trend(time) - prediction); if (time > 20 * WEEK) { @@ -2222,13 +2211,13 @@ BOOST_FIXTURE_TEST_CASE(testRemoveSeasonal, CTestFixture) { if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{ - decomposition.detrend(time, trend(time) + noise[0], 0.0)}; + decomposition.detrend(time, trend(time) + noise[0], 0.0, false)}; double multiplier{controller.multiplier( prediction, {predictionError}, FIVE_MINS, 1.0, 0.0001)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } - double prediction{mean(decomposition.value(time, 0.0))}; + double prediction{decomposition.value(time, 0.0, false).mean()}; debug.addPrediction(time, prediction, trend(time) + noise[0] - prediction); } @@ -2272,16 +2261,16 @@ BOOST_FIXTURE_TEST_CASE(testFastAndSlowSeasonality, CTestFixture) { if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{ - decomposition.detrend(time, trend(time) + noise[0], 0.0)}; + decomposition.detrend(time, trend(time) + noise[0], 0.0, false)}; double multiplier{controller.multiplier(prediction, {predictionError}, FIVE_MINS, 1.0, 0.0001)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } - double prediction{mean(decomposition.value(time, 0.0))}; + double prediction{decomposition.value(time, 0.0, false).mean()}; debug.addPrediction(time, prediction, trend(time) + noise[0] - prediction); if (time > 4 * DAY) { - double error{(std::fabs(decomposition.detrend(time, trend(time), 0.0, FIVE_MINS))) / + double error{(std::fabs(decomposition.detrend(time, trend(time), 0.0, false, FIVE_MINS))) / std::fabs(trend(time))}; BOOST_TEST_REQUIRE(error < 0.25); meanError.add(error); diff --git a/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc b/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc index 341005ce48..8e0344e2eb 100644 --- a/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc @@ -255,7 +255,7 @@ void reinitializeResidualModel(double learnRate, std::size_t dimension{residualModel.dimension()}; TFloatMeanAccumulatorVecVec residuals(dimension); for (std::size_t d = 0; d < dimension; ++d) { - residuals[d] = trends[d]->residuals(); + residuals[d] = trends[d]->residuals(false); } if (residuals.size() > 0) { TDouble10Vec1Vec samples; @@ -445,7 +445,7 @@ BOOST_AUTO_TEST_CASE(testMode) { core_t::TTime time{0}; for (auto sample : samples) { trend.addPoint(time, sample); - TDouble1Vec sample_{trend.detrend(time, sample, 0.0)}; + TDouble1Vec sample_{trend.detrend(time, sample, 0.0, false)}; prior.addSamples(sample_, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; @@ -458,7 +458,7 @@ BOOST_AUTO_TEST_CASE(testMode) { {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } - double expectedMode{maths::common::CBasicStatistics::mean(trend.value(time)) + + double expectedMode{trend.value(time, 0.0, false).mean() + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(1))); @@ -493,13 +493,13 @@ BOOST_AUTO_TEST_CASE(testMode) { {core::make_triple(time, TDouble2Vec{sample}, TAG)}); trend.addPoint(time, sample, maths_t::CUnitWeights::UNIT, makeComponentDetectedCallback(params.learnRate(), prior)); - prior.addSamples({trend.detrend(time, sample, 0.0)}, + prior.addSamples({trend.detrend(time, sample, 0.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } - double expectedMode{maths::common::CBasicStatistics::mean(trend.value(time)) + + double expectedMode{trend.value(time, 0.0, false).mean() + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(1))); @@ -532,7 +532,7 @@ BOOST_AUTO_TEST_CASE(testMode) { TDouble10Vec1Vec detrended{TDouble10Vec(3)}; for (std::size_t i = 0; i < sample.size(); ++i) { trends[i]->addPoint(time, sample[i]); - detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); + detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0, false); } prior.addSamples(detrended, maths_t::CUnitWeights::singleUnit(3)); @@ -549,8 +549,7 @@ BOOST_AUTO_TEST_CASE(testMode) { TDouble2Vec expectedMode(prior.marginalLikelihoodMode( maths_t::CUnitWeights::unit(3))); for (std::size_t i = 0; i < trends.size(); ++i) { - expectedMode[i] += - maths::common::CBasicStatistics::mean(trends[i]->value(time)); + expectedMode[i] += trends[i]->value(time, 0.0, false).mean(); } TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(3))); @@ -606,7 +605,7 @@ BOOST_AUTO_TEST_CASE(testMode) { [&reinitialize](TFloatMeanAccumulatorVec) { reinitialize = true; }); - detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); + detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0, false); } if (reinitialize) { reinitializeResidualModel(params.learnRate(), trends, prior); @@ -621,8 +620,7 @@ BOOST_AUTO_TEST_CASE(testMode) { TDouble2Vec expectedMode(prior.marginalLikelihoodMode( maths_t::CUnitWeights::unit(3))); for (std::size_t i = 0; i < trends.size(); ++i) { - expectedMode[i] += - maths::common::CBasicStatistics::mean(trends[i]->value(time)); + expectedMode[i] += trends[i]->value(time, 0.0, false).mean(); } TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit(3))); @@ -872,7 +870,7 @@ BOOST_AUTO_TEST_CASE(testAddSamples) { makeComponentDetectedCallback(params.learnRate(), prior, &controllers)); - double detrended{trend.detrend(time, sample, 0.0)}; + double detrended{trend.detrend(time, sample, 0.0, false)}; prior.addSamples({detrended}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); @@ -950,7 +948,7 @@ BOOST_AUTO_TEST_CASE(testAddSamples) { [&reinitialize](TFloatMeanAccumulatorVec) { reinitialize = true; }); - detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0); + detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0, false); mean[i] = trends[i]->meanValue(time); hasTrend |= true; } @@ -1024,7 +1022,7 @@ BOOST_AUTO_TEST_CASE(testPredict) { trend.addPoint(time, sample, maths_t::CUnitWeights::UNIT, makeComponentDetectedCallback(params.learnRate(), prior)); - prior.addSamples({trend.detrend(time, sample, 0.0)}, + prior.addSamples({trend.detrend(time, sample, 0.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); @@ -1035,7 +1033,7 @@ BOOST_AUTO_TEST_CASE(testPredict) { for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { double trend_{10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time_) / 86400.0)}; - double expected{maths::common::CBasicStatistics::mean(trend.value(time_)) + + double expected{trend.value(time_, 0.0, false).mean() + maths::common::CBasicStatistics::mean( prior.marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[0]}; @@ -1127,7 +1125,7 @@ BOOST_AUTO_TEST_CASE(testPredict) { [&reinitialize](TFloatMeanAccumulatorVec) { reinitialize = true; }); - detrended.push_back(trends[i]->detrend(time, sample[i], 0.0)); + detrended.push_back(trends[i]->detrend(time, sample[i], 0.0, false)); } if (reinitialize) { reinitializeResidualModel(params.learnRate(), trends, prior); @@ -1150,10 +1148,9 @@ BOOST_AUTO_TEST_CASE(testPredict) { static_cast(time_) / 86400.0)}; maths::common::CMultivariatePrior::TUnivariatePriorPtr margin{ prior.univariate(marginalize, condition).first}; - double expected{ - maths::common::CBasicStatistics::mean(trends[i]->value(time_)) + - maths::common::CBasicStatistics::mean( - margin->marginalLikelihoodConfidenceInterval(0.0))}; + double expected{trends[i]->value(time_, 0.0, false).mean() + + maths::common::CBasicStatistics::mean( + margin->marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[i]}; --marginalize[std::min(i, marginalize.size() - 1)]; LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted @@ -1300,12 +1297,13 @@ BOOST_AUTO_TEST_CASE(testProbability) { for (std::size_t i = 0; i < weight.size(); ++i) { weight_[i] = weight[i][0]; } - double lb[2], ub[2]; + double lb[2]; + double ub[2]; model0.residualModel().probabilityOfLessLikelySamples( calculation, sample, {weight_}, lb[0], ub[0], expectedTail[0]); model1.residualModel().probabilityOfLessLikelySamples( calculation, - {model1.trendModel().detrend(time, sample[0], confidence)}, + {model1.trendModel().detrend(time, sample[0], confidence, false)}, {weight_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; @@ -1403,7 +1401,7 @@ BOOST_AUTO_TEST_CASE(testProbability) { TDouble10Vec detrended; for (std::size_t j = 0; j < sample.size(); ++j) { detrended.push_back(model1.trendModel()[j]->detrend( - time, sample[j], confidence)); + time, sample[j], confidence, false)); } model1.residualModel().probabilityOfLessLikelySamples( calculation, {detrended}, {weight_}, lb[1], ub[1], @@ -1517,11 +1515,8 @@ BOOST_AUTO_TEST_CASE(testWeights) { static_cast(time_) / 86400.0), 2.0)}; - double expectedScale{ - model.trendModel() - .varianceScaleWeight( - time_, model.residualModel().marginalLikelihoodVariance(), 0.0) - .second}; + double expectedScale{model.trendModel().varianceScaleWeight( + time_, model.residualModel().marginalLikelihoodVariance(), 0.0)(1)}; model.seasonalWeight(0.0, time_, scale); LOG_DEBUG(<< "expected weight = " << expectedScale << ", scale = " << scale @@ -1591,11 +1586,8 @@ BOOST_AUTO_TEST_CASE(testWeights) { model.seasonalWeight(0.0, time_, scale); for (std::size_t i = 0; i < 3; ++i) { - double expectedScale{ - model.trendModel()[i] - ->varianceScaleWeight( - time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0) - .second}; + double expectedScale{model.trendModel()[i]->varianceScaleWeight( + time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0)(1)}; LOG_DEBUG(<< "expected weight = " << expectedScale << ", weight = " << scale << " (data weight = " << dataScale << ")"); BOOST_REQUIRE_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), diff --git a/lib/maths/time_series/unittest/CTrendComponentTest.cc b/lib/maths/time_series/unittest/CTrendComponentTest.cc index 6ca4006a7c..af9f91bf26 100644 --- a/lib/maths/time_series/unittest/CTrendComponentTest.cc +++ b/lib/maths/time_series/unittest/CTrendComponentTest.cc @@ -183,7 +183,7 @@ auto trainModel(ITR beginValues, ITR endValues) { component.add(time, *value); component.propagateForwardsByTime(BUCKET_LENGTH); - double prediction{maths::common::CBasicStatistics::mean(component.value(time, 0.0))}; + double prediction{component.value(time, 0.0).mean()}; controller.multiplier({prediction}, {{*value - prediction}}, BUCKET_LENGTH, 0.3, 0.012); component.decayRate(0.012 * controller.multiplier()); @@ -203,7 +203,7 @@ auto forecastErrors(ITR actual, core_t::TTime interval(std::distance(actual, endActual) * BUCKET_LENGTH); TDouble3VecVec forecast; - component.forecast(time, time + interval, BUCKET_LENGTH, 95.0, + component.forecast(time, time + interval, BUCKET_LENGTH, 95.0, false, [](core_t::TTime) { return TDouble3Vec(3, 0.0); }, [&forecast](core_t::TTime, const TDouble3Vec& value) { forecast.push_back(value); @@ -248,10 +248,10 @@ BOOST_AUTO_TEST_CASE(testValueAndVariance) { TMeanVarAccumulator normalisedResiduals; for (core_t::TTime time = start; time < end; time += BUCKET_LENGTH) { double value{values[(time - start) / BUCKET_LENGTH]}; - double prediction{maths::common::CBasicStatistics::mean(component.value(time, 0.0))}; + double prediction{component.value(time, 0.0).mean()}; if (time > start + BUCKET_LENGTH) { - double variance{maths::common::CBasicStatistics::mean(component.variance(0.0))}; + double variance{component.variance(0.0).mean()}; normalisedResiduals.add((value - prediction) / std::sqrt(variance)); } @@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE(testDecayRate) { regression.add(static_cast(time) / 604800.0, value); double expectedPrediction{regression.predict(static_cast(time) / 604800.0)}; - double prediction{maths::common::CBasicStatistics::mean(component.value(time, 0.0))}; + double prediction{component.value(time, 0.0).mean()}; error.add(std::fabs(prediction - expectedPrediction)); level.add(value); diff --git a/lib/model/CInterimBucketCorrector.cc b/lib/model/CInterimBucketCorrector.cc index a925985f5e..9f1466884c 100644 --- a/lib/model/CInterimBucketCorrector.cc +++ b/lib/model/CInterimBucketCorrector.cc @@ -123,10 +123,9 @@ double CInterimBucketCorrector::estimateBucketCompleteness(core_t::TTime time, std::uint64_t count_) const { double count{static_cast(count_)}; core_t::TTime bucketMidPoint{this->calcBucketMidPoint(time)}; - double bucketCount{ - m_FinalCountTrend.initialized() - ? maths::common::CBasicStatistics::mean(m_FinalCountTrend.value(bucketMidPoint)) - : maths::common::CBasicStatistics::mean(m_FinalCountMean)}; + double bucketCount{m_FinalCountTrend.initialized() + ? m_FinalCountTrend.value(bucketMidPoint, 0.0, true).mean() + : maths::common::CBasicStatistics::mean(m_FinalCountMean)}; return bucketCount > 0.0 ? maths::common::CTools::truncate(count / bucketCount, 0.0, 1.0) : 1.0; From 9205e5b4ac686e576e8dd398044a452a7436d323 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Sun, 15 May 2022 16:33:36 +0100 Subject: [PATCH 02/12] Small tidy up --- lib/maths/time_series/CTimeSeriesDecomposition.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/maths/time_series/CTimeSeriesDecomposition.cc b/lib/maths/time_series/CTimeSeriesDecomposition.cc index 30c5770453..f4f7ac8fa9 100644 --- a/lib/maths/time_series/CTimeSeriesDecomposition.cc +++ b/lib/maths/time_series/CTimeSeriesDecomposition.cc @@ -333,18 +333,21 @@ CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, bool isNo CTimeSeriesDecomposition::TFilteredPredictor CTimeSeriesDecomposition::predictor(int components) const { + CTrendComponent::TPredictor trend_{m_Components.trend().predictor()}; + return [ components, trend = std::move(trend_), this ](core_t::TTime time, const TBoolVec& removedSeasonalMask) { - double baseline{0.0}; + + double result{0.0}; time += m_TimeShift; if ((components & E_TrendForced) != 0) { - baseline += trend(time); + result += trend(time); } else if ((components & E_Trend) != 0) { if (m_Components.usingTrendForPrediction()) { - baseline += trend(time); + result += trend(time); } } @@ -354,7 +357,7 @@ CTimeSeriesDecomposition::predictor(int components) const { if (seasonal[i].initialized() && (removedSeasonalMask.empty() || removedSeasonalMask[i] == false) && seasonal[i].time().inWindow(time)) { - baseline += seasonal[i].value(time, 0.0).mean(); + result += seasonal[i].value(time, 0.0).mean(); } } } @@ -362,12 +365,12 @@ CTimeSeriesDecomposition::predictor(int components) const { if ((components & E_Calendar) != 0) { for (const auto& component : m_Components.calendar()) { if (component.initialized() && component.feature().inWindow(time)) { - baseline += component.value(time, 0.0).mean(); + result += component.value(time, 0.0).mean(); } } } - return baseline; + return result; }; } From d26267aa2762315cfcabc1c1e82ce04bc8694d22 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 16 May 2022 09:29:20 +0100 Subject: [PATCH 03/12] Testing --- .../time_series/CTimeSeriesDecomposition.h | 9 +-- .../CTimeSeriesDecompositionDetail.h | 4 +- .../CTimeSeriesDecompositionInterface.h | 1 - .../time_series/CTimeSeriesDecomposition.cc | 52 ++++++++++-------- .../CTimeSeriesDecompositionDetail.cc | 11 ++-- .../unittest/CTimeSeriesDecompositionTest.cc | 55 ++++++++++++++++--- 6 files changed, 88 insertions(+), 44 deletions(-) diff --git a/include/maths/time_series/CTimeSeriesDecomposition.h b/include/maths/time_series/CTimeSeriesDecomposition.h index cd9e4e703c..7ffa6c5ea3 100644 --- a/include/maths/time_series/CTimeSeriesDecomposition.h +++ b/include/maths/time_series/CTimeSeriesDecomposition.h @@ -240,11 +240,7 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition core::CStateRestoreTraverser& traverser); //! Get the predicted value of the time series at \p time. - TVector2x1 value(core_t::TTime time, - double confidence, - int components, - const TBoolVec& removedSeasonalMask = {}, - bool smooth = true) const; + TVector2x1 value(core_t::TTime time, double confidence, int components, bool smooth = true) const; //! Compute the variance scale weight to apply at \p time. TVector2x1 @@ -253,7 +249,8 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition //! The correction to produce a smooth join between periodic //! repeats and partitions. template - TVector2x1 smooth(const F& f, core_t::TTime time, int components) const; + auto smooth(const F& f, core_t::TTime time, int components) const + -> decltype(f(time)); private: //! The time over which discontinuities between weekdays diff --git a/include/maths/time_series/CTimeSeriesDecompositionDetail.h b/include/maths/time_series/CTimeSeriesDecompositionDetail.h index db1fcd1704..dd802c5148 100644 --- a/include/maths/time_series/CTimeSeriesDecompositionDetail.h +++ b/include/maths/time_series/CTimeSeriesDecompositionDetail.h @@ -52,6 +52,7 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionDetail public: using TDoubleVec = std::vector; using TMakePredictor = std::function; + using TFilteredPredictor = std::function; using TMakeFilteredPredictor = std::function; using TChangePointUPtr = std::unique_ptr; @@ -586,7 +587,8 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionDetail void useTrendForPrediction(); //! Get a factory for the seasonal components test. - TMakeTestForSeasonality makeTestForSeasonality(const TFilteredPredictor& predictor) const; + TMakeTestForSeasonality + makeTestForSeasonality(const TMakeFilteredPredictor& makePredictor) const; //! Get the mean value of the baseline in the vicinity of \p time. double meanValue(core_t::TTime time) const; diff --git a/include/maths/time_series/CTimeSeriesDecompositionInterface.h b/include/maths/time_series/CTimeSeriesDecompositionInterface.h index a65db1c192..78539ca974 100644 --- a/include/maths/time_series/CTimeSeriesDecompositionInterface.h +++ b/include/maths/time_series/CTimeSeriesDecompositionInterface.h @@ -45,7 +45,6 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionTypes { public: using TBoolVec = std::vector; using TPredictor = std::function; - using TFilteredPredictor = std::function; using TFloatMeanAccumulator = common::CBasicStatistics::SSampleMean::TAccumulator; using TFloatMeanAccumulatorVec = std::vector; diff --git a/lib/maths/time_series/CTimeSeriesDecomposition.cc b/lib/maths/time_series/CTimeSeriesDecomposition.cc index f4f7ac8fa9..46f688cd8d 100644 --- a/lib/maths/time_series/CTimeSeriesDecomposition.cc +++ b/lib/maths/time_series/CTimeSeriesDecomposition.cc @@ -226,10 +226,17 @@ void CTimeSeriesDecomposition::addPoint(core_t::TTime time, m_LastValueTime = std::max(m_LastValueTime, time); this->propagateForwardsTo(time); - auto testForSeasonality = m_Components.makeTestForSeasonality( - [this](core_t::TTime time_, const TBoolVec& removedSeasonalMask) { - return this->value(time_, 0.0, E_Seasonal, removedSeasonalMask).mean(); - }); + auto testForSeasonality = m_Components.makeTestForSeasonality([this]() { + return [ predictor = this->predictor(E_Seasonal), + this ](core_t::TTime time_, const TBoolVec& removedSeasonalMask) { + return predictor(time_, removedSeasonalMask) + + this->smooth( + [&](core_t::TTime shiftedTime) { + return predictor(shiftedTime - m_TimeShift, removedSeasonalMask); + }, + time_ + m_TimeShift, E_Seasonal); + }; + }); SAddValue message{time, lastTime, @@ -241,8 +248,8 @@ void CTimeSeriesDecomposition::addPoint(core_t::TTime time, this->value(time, 0.0, E_Calendar).mean(), *this, [this] { - auto predictor_ = this->predictor(E_All | E_TrendForced); - return [predictor = std::move(predictor_)](core_t::TTime time_) { + return [predictor = this->predictor(E_All | E_TrendForced)]( + core_t::TTime time_) { return predictor(time_, {}); }; }, @@ -277,11 +284,8 @@ double CTimeSeriesDecomposition::meanValue(core_t::TTime time) const { } CTimeSeriesDecomposition::TVector2x1 -CTimeSeriesDecomposition::value(core_t::TTime time, - double confidence, - int components, - const TBoolVec& removedSeasonalMask, - bool smooth) const { +CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, int components, bool smooth) const { + TVector2x1 result{0.0}; time += m_TimeShift; @@ -295,12 +299,9 @@ CTimeSeriesDecomposition::value(core_t::TTime time, } if ((components & E_Seasonal) != 0) { - const auto& seasonal = m_Components.seasonal(); - for (std::size_t i = 0; i < seasonal.size(); ++i) { - if (seasonal[i].initialized() && - (removedSeasonalMask.empty() || removedSeasonalMask[i] == false) && - seasonal[i].time().inWindow(time)) { - result += seasonal[i].value(time, confidence); + for (const auto& component : m_Components.seasonal()) { + if (component.initialized() && component.time().inWindow(time)) { + result += component.value(time, confidence); } } } @@ -317,7 +318,7 @@ CTimeSeriesDecomposition::value(core_t::TTime time, result += this->smooth( [&](core_t::TTime time_) { return this->value(time_ - m_TimeShift, confidence, - components & E_Seasonal, removedSeasonalMask, false); + components & E_Seasonal, false); }, time, components); } @@ -612,15 +613,18 @@ void CTimeSeriesDecomposition::initializeMediator() { } template -CTimeSeriesDecomposition::TVector2x1 -CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) const { +auto CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) const + -> decltype(f(time)) { + + using TResultType = decltype(f(time)); + if ((components & E_Seasonal) != E_Seasonal) { - return TVector2x1{0.0}; + return TResultType{0.0}; } auto offset = [&f, time](core_t::TTime discontinuity) { - TVector2x1 baselineMinusEps{f(discontinuity - 1)}; - TVector2x1 baselinePlusEps{f(discontinuity + 1)}; + auto baselineMinusEps = f(discontinuity - 1); + auto baselinePlusEps = f(discontinuity + 1); return 0.5 * std::max((1.0 - static_cast(std::abs(time - discontinuity)) / static_cast(SMOOTHING_INTERVAL)), @@ -650,7 +654,7 @@ CTimeSeriesDecomposition::smooth(const F& f, core_t::TTime time, int components) } } - return TVector2x1{0.0}; + return TResultType{0.0}; } const core_t::TTime CTimeSeriesDecomposition::SMOOTHING_INTERVAL{14400}; diff --git a/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc b/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc index e8a584b588..820a20bcc9 100644 --- a/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/time_series/CTimeSeriesDecompositionDetail.cc @@ -1927,10 +1927,11 @@ void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction() { } CTimeSeriesDecompositionDetail::TMakeTestForSeasonality -CTimeSeriesDecompositionDetail::CComponents::makeTestForSeasonality(const TFilteredPredictor& predictor) const { - return [predictor, this](const CExpandingWindow& window, core_t::TTime minimumPeriod, - std::size_t minimumResolutionToTestModelledComponent, - const TFilteredPredictor& preconditioner) { +CTimeSeriesDecompositionDetail::CComponents::makeTestForSeasonality( + const TMakeFilteredPredictor& makePredictor) const { + return [makePredictor, this](const CExpandingWindow& window, core_t::TTime minimumPeriod, + std::size_t minimumResolutionToTestModelledComponent, + const TFilteredPredictor& preconditioner) { core_t::TTime valuesStartTime{window.beginValuesTime()}; core_t::TTime windowBucketStartTime{window.bucketStartTime()}; core_t::TTime windowBucketLength{window.bucketLength()}; @@ -1950,7 +1951,7 @@ CTimeSeriesDecompositionDetail::CComponents::makeTestForSeasonality(const TFilte test.minimumPeriod(minimumPeriod) .minimumModelSize(2 * m_SeasonalComponentSize / 3) .maximumModelSize(2 * m_SeasonalComponentSize) - .modelledSeasonalityPredictor(predictor); + .modelledSeasonalityPredictor(makePredictor()); std::ptrdiff_t maximumNumberComponents{MAXIMUM_COMPONENTS}; for (const auto& component : this->seasonal()) { test.addModelledSeasonality(component.time(), minimumResolutionToTestModelledComponent, diff --git a/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc index f973ea89c3..0478fc36e6 100644 --- a/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc @@ -23,11 +23,9 @@ #include #include #include -#include #include #include -#include #include #include @@ -1007,7 +1005,7 @@ BOOST_FIXTURE_TEST_CASE(testSpikeyDataProblemCase, CTestFixture) { lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); - } + } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); @@ -1135,7 +1133,7 @@ BOOST_FIXTURE_TEST_CASE(testVeryLargeValuesProblemCase, CTestFixture) { lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); - } + } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_DEBUG(<< "'max residual' / 'max value' = " << maxResidual / maxValue); @@ -1234,7 +1232,7 @@ BOOST_FIXTURE_TEST_CASE(testMixedSmoothAndSpikeyDataProblemCase, CTestFixture) { lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); - } + } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); @@ -1437,7 +1435,7 @@ BOOST_FIXTURE_TEST_CASE(testLongTermTrend, CTestFixture) { LOG_DEBUG(<< "Saw Tooth Not Periodic"); { core_t::TTime drops[]{0, 30 * DAY, 50 * DAY, 60 * DAY, - 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; + 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; times.clear(); trend.clear(); @@ -2228,7 +2226,8 @@ BOOST_FIXTURE_TEST_CASE(testRemoveSeasonal, CTestFixture) { BOOST_FIXTURE_TEST_CASE(testFastAndSlowSeasonality, CTestFixture) { - // Test we have good modelling of the fast component after detecting a slow periodic component. + // Test we have good modelling of the fast component after detecting a slow + // periodic component. test::CRandomNumbers rng; @@ -2283,6 +2282,48 @@ BOOST_FIXTURE_TEST_CASE(testFastAndSlowSeasonality, CTestFixture) { BOOST_TEST_REQUIRE(2, decomposition.seasonalComponents().size()); } +BOOST_FIXTURE_TEST_CASE(testNonNegative, CTestFixture) { + + // Test if we state the tie series is non-negative then we never predict + // a negative value for it. + + test::CRandomNumbers rng; + + auto trend = [](core_t::TTime time) { + return std::max(15.0 - 0.5 * static_cast(time) / static_cast(DAY), 1.0) + + std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(DAY)); + }; + + maths::time_series::CTimeSeriesDecomposition decomposition(0.012, FIVE_MINS); + CDebugGenerator debug; + + TMeanAccumulator meanError; + + TDoubleVec noise; + for (core_t::TTime time = 0; time < 6 * WEEK; time += FIVE_MINS) { + rng.generateNormalSamples(0.0, 0.1, 1, noise); + + decomposition.addPoint(time, trend(time) + noise[0]); + debug.addValue(time, trend(time) + noise[0]); + + auto prediction = decomposition.value(time, 0.0, true); + BOOST_TEST_REQUIRE(prediction(0) >= 0.0); + BOOST_TEST_REQUIRE(prediction(1) >= 0.0); + debug.addPrediction(time, prediction.mean(), + trend(time) + noise[0] - prediction.mean()); + + if (time > 4 * DAY && trend(time) > 1.0) { + double error{(std::fabs(decomposition.detrend(time, trend(time), 0.0, true, FIVE_MINS))) / + std::fabs(trend(time))}; + BOOST_TEST_REQUIRE(error < 0.8); + meanError.add(error); + } + } + + BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.1); +} + BOOST_FIXTURE_TEST_CASE(testSwap, CTestFixture) { // Test that swapping preserves checksums. From aa3ec6d81418c579a9bddf982428c983070b6fdb Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 16 May 2022 18:20:54 +0100 Subject: [PATCH 04/12] More testing --- include/maths/common/CModel.h | 4 +- include/maths/time_series/CTimeSeriesModel.h | 8 +- lib/maths/common/CModel.cc | 8 +- lib/maths/common/unittest/CModelTest.cc | 5 +- lib/maths/time_series/CTimeSeriesModel.cc | 16 ++-- .../time_series/unittest/CForecastTest.cc | 13 ++- .../unittest/CTimeSeriesDecompositionTest.cc | 10 +- .../unittest/CTimeSeriesModelTest.cc | 94 ++++++++++++++++--- lib/model/CEventRateModel.cc | 4 +- lib/model/CEventRatePopulationModel.cc | 4 +- lib/model/CIndividualModel.cc | 4 +- lib/model/CMetricModel.cc | 4 +- lib/model/CMetricPopulationModel.cc | 4 +- lib/model/unittest/CEventRateModelTest.cc | 16 ++-- .../unittest/CEventRatePopulationModelTest.cc | 4 +- lib/model/unittest/CMetricModelTest.cc | 4 +- .../unittest/CMetricPopulationModelTest.cc | 8 +- lib/model/unittest/CModelToolsTest.cc | 5 +- .../CProbabilityAndInfluenceCalculatorTest.cc | 5 +- 19 files changed, 146 insertions(+), 74 deletions(-) diff --git a/include/maths/common/CModel.h b/include/maths/common/CModel.h index 30299491b4..bc9b54119e 100644 --- a/include/maths/common/CModel.h +++ b/include/maths/common/CModel.h @@ -105,12 +105,12 @@ class MATHS_COMMON_EXPORT CModelAddSamplesParams { public: //! Set whether or not the data are integer valued. - CModelAddSamplesParams& integer(bool integer); + CModelAddSamplesParams& isInteger(bool isInteger); //! Get the data type. maths_t::EDataType type() const; //! Set whether or not the data are non-negative. - CModelAddSamplesParams& nonNegative(bool nonNegative); + CModelAddSamplesParams& isNonNegative(bool isNonNegative); //! Get the whether the data are non-negative. bool isNonNegative() const; diff --git a/include/maths/time_series/CTimeSeriesModel.h b/include/maths/time_series/CTimeSeriesModel.h index 98828a0d6c..fc876da054 100644 --- a/include/maths/time_series/CTimeSeriesModel.h +++ b/include/maths/time_series/CTimeSeriesModel.h @@ -278,10 +278,10 @@ class MATHS_TIME_SERIES_EXPORT CUnivariateTimeSeriesModel : public common::CMode std::size_t m_Id; //! True if the data are non-negative. - bool m_IsNonNegative; + bool m_IsNonNegative{false}; //! True if the model can be forecast. - bool m_IsForecastable; + bool m_IsForecastable{true}; //! These control the trend and residual model decay rates (see //! CDecayRateController for more details). @@ -308,7 +308,7 @@ class MATHS_TIME_SERIES_EXPORT CUnivariateTimeSeriesModel : public common::CMode TAnomalyModelPtr m_AnomalyModel; //! Models the correlations between time series. - CTimeSeriesCorrelations* m_Correlations; + CTimeSeriesCorrelations* m_Correlations{nullptr}; }; //! \brief Manages the creation correlate models. @@ -730,7 +730,7 @@ class MATHS_TIME_SERIES_EXPORT CMultivariateTimeSeriesModel : public common::CMo private: //! True if the data are non-negative. - bool m_IsNonNegative; + bool m_IsNonNegative{false}; //! These control the trend and residual model decay rates (see //! CDecayRateController for more details). diff --git a/lib/maths/common/CModel.cc b/lib/maths/common/CModel.cc index 02bffa17cf..967a7fd061 100644 --- a/lib/maths/common/CModel.cc +++ b/lib/maths/common/CModel.cc @@ -87,8 +87,8 @@ core_t::TTime CModelParams::maximumTimeToTestForChange() const { //////// CModelAddSamplesParams //////// -CModelAddSamplesParams& CModelAddSamplesParams::integer(bool integer) { - m_Type = integer ? maths_t::E_IntegerData : maths_t::E_ContinuousData; +CModelAddSamplesParams& CModelAddSamplesParams::isInteger(bool isInteger) { + m_Type = isInteger ? maths_t::E_IntegerData : maths_t::E_ContinuousData; return *this; } @@ -96,8 +96,8 @@ maths_t::EDataType CModelAddSamplesParams::type() const { return m_Type; } -CModelAddSamplesParams& CModelAddSamplesParams::nonNegative(bool nonNegative) { - m_IsNonNegative = nonNegative; +CModelAddSamplesParams& CModelAddSamplesParams::isNonNegative(bool isNonNegative) { + m_IsNonNegative = isNonNegative; return *this; } diff --git a/lib/maths/common/unittest/CModelTest.cc b/lib/maths/common/unittest/CModelTest.cc index 6e421a8ce3..4bdec6c828 100644 --- a/lib/maths/common/unittest/CModelTest.cc +++ b/lib/maths/common/unittest/CModelTest.cc @@ -51,7 +51,10 @@ BOOST_AUTO_TEST_CASE(testAll) { TDouble2VecWeightsAryVec trendWeights{weight1}; TDouble2VecWeightsAryVec priorWeights{weight2}; maths::common::CModelAddSamplesParams params; - params.integer(true).propagationInterval(1.5).trendWeights(trendWeights).priorWeights(priorWeights); + params.isInteger(true) + .propagationInterval(1.5) + .trendWeights(trendWeights) + .priorWeights(priorWeights); BOOST_REQUIRE_EQUAL(maths_t::E_IntegerData, params.type()); BOOST_REQUIRE_EQUAL(1.5, params.propagationInterval()); BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(trendWeights), diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index c0ede3ac38..5397ba757e 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -593,16 +593,15 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel( const TDecayRateController2Ary* controllers, const TMultibucketFeature* multibucketFeature, bool modelAnomalies) - : common::CModel(params), m_Id(id), m_IsNonNegative(false), m_IsForecastable(true), - m_TrendModel(trendModel.clone()), m_ResidualModel(residualModel.clone()), + : common::CModel(params), m_Id(id), m_TrendModel(trendModel.clone()), + m_ResidualModel(residualModel.clone()), m_MultibucketFeature(multibucketFeature != nullptr ? multibucketFeature->clone() : nullptr), m_MultibucketFeatureModel(multibucketFeature != nullptr ? residualModel.clone() : nullptr), m_AnomalyModel(modelAnomalies ? std::make_unique( params.bucketLength(), params.decayRate()) - : nullptr), - m_Correlations(nullptr) { + : nullptr) { if (controllers != nullptr) { m_Controllers = std::make_unique(*controllers); } @@ -610,8 +609,7 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel( CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const common::SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) - : common::CModel(params.s_Params), m_IsForecastable(false), - m_Correlations(nullptr) { + : common::CModel(params.s_Params), m_IsForecastable(false) { if (traverser.traverseSubLevel([&](auto& traverser_) { return this->acceptRestoreTraverser(params, traverser_); }) == false) { @@ -1410,8 +1408,7 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const CUnivariateTimeSeri : nullptr), m_AnomalyModel(!isForForecast && other.m_AnomalyModel != nullptr ? std::make_unique(*other.m_AnomalyModel) - : nullptr), - m_Correlations(nullptr) { + : nullptr) { if (!isForForecast && other.m_Controllers != nullptr) { m_Controllers = std::make_unique(*other.m_Controllers); } @@ -2128,8 +2125,7 @@ CMultivariateTimeSeriesModel::CMultivariateTimeSeriesModel( const TDecayRateController2Ary* controllers, const TMultibucketFeature* multibucketFeature, bool modelAnomalies) - : common::CModel(params), m_IsNonNegative(false), - m_ResidualModel(residualModel.clone()), + : common::CModel(params), m_ResidualModel(residualModel.clone()), m_MultibucketFeature(multibucketFeature != nullptr ? multibucketFeature->clone() : nullptr), m_MultibucketFeatureModel(multibucketFeature != nullptr ? residualModel.clone() : nullptr), diff --git a/lib/maths/time_series/unittest/CForecastTest.cc b/lib/maths/time_series/unittest/CForecastTest.cc index 8d2b3b6246..8da489984e 100644 --- a/lib/maths/time_series/unittest/CForecastTest.cc +++ b/lib/maths/time_series/unittest/CForecastTest.cc @@ -197,7 +197,7 @@ class CTest { for (std::size_t i = 0; i < noise.size(); ++i, time += m_BucketLength) { maths::common::CModelAddSamplesParams params; - params.integer(false) + params.isInteger(false) .propagationInterval(1.0) .trendWeights(weights) .priorWeights(weights); @@ -442,8 +442,8 @@ BOOST_AUTO_TEST_CASE(testNonNegative) { rng.generateNormalSamples(2.0, 3.0, 48, noise); for (auto value = noise.begin(); value != noise.end(); ++value, time += bucketLength) { maths::common::CModelAddSamplesParams params; - params.integer(false) - .nonNegative(true) + params.isInteger(false) + .isNonNegative(true) .propagationInterval(1.0) .trendWeights(weights) .priorWeights(weights); @@ -526,7 +526,7 @@ BOOST_AUTO_TEST_CASE(testFinancialIndex) { TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; for (std::size_t i = 0; i < n; ++i) { maths::common::CModelAddSamplesParams params; - params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); + params.isInteger(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); model.addSamples( params, {core::make_triple(timeseries[i].first, TDouble2Vec{timeseries[i].second}, TAG)}); @@ -588,7 +588,10 @@ BOOST_AUTO_TEST_CASE(testTruncation) { for (core_t::TTime time = 0; time < dataEndTime; time += bucketLength) { maths::common::CModelAddSamplesParams params; - params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); + params.isInteger(false) + .propagationInterval(1.0) + .trendWeights(weights) + .priorWeights(weights); double yi{static_cast(time)}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{yi}, TAG)}); } diff --git a/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc index 0478fc36e6..3387acfae8 100644 --- a/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc @@ -1005,7 +1005,7 @@ BOOST_FIXTURE_TEST_CASE(testSpikeyDataProblemCase, CTestFixture) { lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); - } + } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); @@ -1133,7 +1133,7 @@ BOOST_FIXTURE_TEST_CASE(testVeryLargeValuesProblemCase, CTestFixture) { lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); - } + } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_DEBUG(<< "'max residual' / 'max value' = " << maxResidual / maxValue); @@ -1232,7 +1232,7 @@ BOOST_FIXTURE_TEST_CASE(testMixedSmoothAndSpikeyDataProblemCase, CTestFixture) { lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); - } + } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); @@ -1435,7 +1435,7 @@ BOOST_FIXTURE_TEST_CASE(testLongTermTrend, CTestFixture) { LOG_DEBUG(<< "Saw Tooth Not Periodic"); { core_t::TTime drops[]{0, 30 * DAY, 50 * DAY, 60 * DAY, - 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; + 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; times.clear(); trend.clear(); @@ -2284,7 +2284,7 @@ BOOST_FIXTURE_TEST_CASE(testFastAndSlowSeasonality, CTestFixture) { BOOST_FIXTURE_TEST_CASE(testNonNegative, CTestFixture) { - // Test if we state the tie series is non-negative then we never predict + // Test if we specify the time series is non-negative then we never predict // a negative value for it. test::CRandomNumbers rng; diff --git a/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc b/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc index 8e0344e2eb..95da8e9916 100644 --- a/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/time_series/unittest/CTimeSeriesModelTest.cc @@ -81,7 +81,7 @@ using TSetWeightsFunc = void (*)(double, std::size_t, TDouble2VecWeightsAry&); const double MINIMUM_SEASONAL_SCALE{0.25}; const double MINIMUM_SIGNIFICANT_CORRELATION{0.4}; const double DECAY_RATE{0.0005}; -const std::size_t TAG{0u}; +const std::size_t TAG{0}; //! \brief Implements the allocator for new correlate priors. class CTimeSeriesCorrelateModelAllocator @@ -127,7 +127,7 @@ addSampleParams(double interval, const TDouble2VecWeightsAryVec& trendWeights, const TDouble2VecWeightsAryVec& residualWeights) { maths::common::CModelAddSamplesParams params; - params.integer(false) + params.isInteger(false) .propagationInterval(interval) .trendWeights(trendWeights) .priorWeights(residualWeights); @@ -211,7 +211,7 @@ auto makeComponentDetectedCallback(double learnRate, } residualModel.setToNonInformative(0.0, residualModel.decayRate()); - if (residuals.size() > 0) { + if (residuals.empty() == false) { maths_t::TDoubleWeightsAry1Vec weights(1); double buckets{ std::accumulate(residuals.begin(), residuals.end(), 0.0, @@ -257,7 +257,7 @@ void reinitializeResidualModel(double learnRate, for (std::size_t d = 0; d < dimension; ++d) { residuals[d] = trends[d]->residuals(false); } - if (residuals.size() > 0) { + if (residuals.empty() == false) { TDouble10Vec1Vec samples; TDoubleVec weights; double time{0.0}; @@ -292,19 +292,18 @@ void reinitializeResidualModel(double learnRate, } } -class CChangeDebug { +class CDebug { public: static const bool ENABLED{false}; public: - explicit CChangeDebug(std::string file = "results.py") - : m_File{std::move(file)} { + explicit CDebug(std::string file = "results.py") : m_File{std::move(file)} { if (ENABLED) { m_ModelBounds.resize(3); m_Forecast.resize(3); } } - ~CChangeDebug() { + ~CDebug() { if (ENABLED) { std::ofstream file; file.open(m_File); @@ -1394,7 +1393,8 @@ BOOST_AUTO_TEST_CASE(testProbability) { for (std::size_t i = 0; i < weight.size(); ++i) { weight_[i] = weight[i]; } - double lb[2], ub[2]; + double lb[2]; + double ub[2]; model0.residualModel().probabilityOfLessLikelySamples( calculation, {TDouble10Vec(sample)}, {weight_}, lb[0], ub[0], expectedTail[0]); @@ -2032,7 +2032,7 @@ BOOST_AUTO_TEST_CASE(testStepChangeDiscontinuities) { maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; - CChangeDebug debug("prior_reinitialization.py"); + CDebug debug("prior_reinitialization.py"); core_t::TTime time{0}; TDoubleVec samples; @@ -2058,7 +2058,7 @@ BOOST_AUTO_TEST_CASE(testStepChangeDiscontinuities) { maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; - CChangeDebug debug("piecewise_constant.py"); + CDebug debug("piecewise_constant.py"); // Add some data to the model. @@ -2128,7 +2128,7 @@ BOOST_AUTO_TEST_CASE(testStepChangeDiscontinuities) { auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}; - CChangeDebug debug("saw_tooth.py"); + CDebug debug("saw_tooth.py"); // Add some data to the model. @@ -2219,7 +2219,7 @@ BOOST_AUTO_TEST_CASE(testLinearScaling) { auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; - CChangeDebug debug; + CDebug debug; core_t::TTime time{0}; TDoubleVec samples; @@ -2297,7 +2297,7 @@ BOOST_AUTO_TEST_CASE(testDaylightSaving) { auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; - CChangeDebug debug; + CDebug debug; core_t::TTime time{0}; TDoubleVec samples; @@ -2354,6 +2354,72 @@ BOOST_AUTO_TEST_CASE(testDaylightSaving) { } } +BOOST_AUTO_TEST_CASE(testNonNegative) { + // Test after a non-negative time series drops to zero we always predict + // high probability for zero values using the one-sided above calculation. + + TDouble2VecWeightsAryVec trendWeights{maths_t::CUnitWeights::unit(1)}; + TDouble2VecWeightsAryVec residualWeights{maths_t::CUnitWeights::unit(1)}; + auto updateModel = [&](core_t::TTime time, double value, + maths::time_series::CUnivariateTimeSeriesModel& model) { + model.countWeights(time, {value}, 1.0, 1.0, 0.0, 1.0, trendWeights[0], + residualWeights[0]); + auto params = addSampleParams(1.0, trendWeights, residualWeights); + params.isNonNegative(true); + model.addSamples(params, {core::make_triple(time, TDouble2Vec{value}, TAG)}); + }; + + test::CRandomNumbers rng; + + core_t::TTime day{core::constants::DAY}; + core_t::TTime week{core::constants::WEEK}; + + auto trend = [&](core_t::TTime time) { + return (time > 10 * day ? -2.0 + : std::max(15.0 - 0.5 * static_cast(time) / + static_cast(day), + 1.0)) + + std::sin(boost::math::double_constants::two_pi * + static_cast(time) / static_cast(day)); + }; + + core_t::TTime bucketLength{600}; + maths::time_series::CTimeSeriesDecomposition trendModel{24.0 * DECAY_RATE, bucketLength}; + auto controllers = decayRateControllers(1); + maths::time_series::CUnivariateTimeSeriesModel timeSeriesModel{ + modelParams(bucketLength), 0, trendModel, + univariateNormal(DECAY_RATE / 3.0), &controllers}; + CDebug debug; + + core_t::TTime time{0}; + TDoubleVec noise; + rng.generateNormalSamples(0.0, 0.1, 3 * week / bucketLength, noise); + TDouble2VecWeightsAry probabilityCalculationWeights{ + maths_t::CUnitWeights::unit(1)}; + + double pMinAfterDrop{1.0}; + for (auto sample : noise) { + sample = std::max(trend(time) + sample, 0.0); + updateModel(time, sample, timeSeriesModel); + + if (time > 10 * day) { + maths::common::CModelProbabilityParams params; + params.addCalculation(maths_t::E_OneSidedAbove) + .seasonalConfidenceInterval(50.0) + .addWeights(probabilityCalculationWeights); + maths::common::SModelProbabilityResult result; + timeSeriesModel.probability(params, {{time}}, {{sample}}, result); + pMinAfterDrop = std::min(pMinAfterDrop, result.s_Probability); + } + + debug.addValueAndPrediction(time, sample, timeSeriesModel); + time += bucketLength; + } + + LOG_DEBUG(<< "pMinAfterDrop = " << pMinAfterDrop); + BOOST_TEST_REQUIRE(pMinAfterDrop > 0.3); +} + BOOST_AUTO_TEST_CASE(testSkipAnomalyModelUpdate) { // We test that when parameters dictate to skip updating the anomaly model // probabilities become smaller despite seeing many anomalies. diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index 58b5439553..6c4ae3542b 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -352,8 +352,8 @@ void CEventRateModel::sample(core_t::TTime startTime, }; maths::common::CModelAddSamplesParams params; - params.integer(true) - .nonNegative(true) + params.isInteger(true) + .isNonNegative(true) .propagationInterval(scaledInterval) .trendWeights(trendWeights) .priorWeights(priorWeights) diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index ef0cccc2ec..8ac619e8d2 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -508,8 +508,8 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, }; maths::common::CModelAddSamplesParams params; - params.integer(true) - .nonNegative(true) + params.isInteger(true) + .isNonNegative(true) .propagationInterval(this->propagationTime(cid, sampleTime)) .trendWeights(attribute.second.s_TrendWeights) .priorWeights(attribute.second.s_ResidualWeights) diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 8906ba79de..ba98f4add3 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -409,8 +409,8 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr maths_t::CUnitWeights::unit(dimension)}; maths_t::setCount(TDouble2Vec(dimension, 50.0), weights[0]); maths::common::CModelAddSamplesParams params; - params.integer(true) - .nonNegative(true) + params.isInteger(true) + .isNonNegative(true) .propagationInterval(1.0) .trendWeights(weights) .priorWeights(weights); diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index 3dc5ce2712..8691fe9d85 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -331,8 +331,8 @@ void CMetricModel::sample(core_t::TTime startTime, }; maths::common::CModelAddSamplesParams params; - params.integer(data_.second.s_IsInteger) - .nonNegative(data_.second.s_IsNonNegative) + params.isInteger(data_.second.s_IsInteger) + .isNonNegative(data_.second.s_IsNonNegative) .propagationInterval(scaledInterval) .trendWeights(trendWeights) .priorWeights(priorWeights) diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index a174292e94..ab9dadfebc 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -495,8 +495,8 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, }; maths::common::CModelAddSamplesParams params; - params.integer(attribute.second.s_IsInteger) - .nonNegative(attribute.second.s_IsNonNegative) + params.isInteger(attribute.second.s_IsInteger) + .isNonNegative(attribute.second.s_IsNonNegative) .propagationInterval(this->propagationTime(cid, latest)) .trendWeights(attribute.second.s_TrendWeights) .priorWeights(attribute.second.s_ResidualWeights) diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 4c02aec6ad..788f360fba 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -231,8 +231,8 @@ BOOST_FIXTURE_TEST_CASE(testCountSample, CTestFixture) { LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); - std::size_t i{0u}; - std::size_t j{0u}; + std::size_t i{0}; + std::size_t j{0}; for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength, ++j) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; @@ -248,8 +248,8 @@ BOOST_FIXTURE_TEST_CASE(testCountSample, CTestFixture) { model->sample(bucketStartTime, bucketEndTime, m_ResourceMonitor); maths::common::CModelAddSamplesParams params_; - params_.integer(true) - .nonNegative(true) + params_.isInteger(true) + .isNonNegative(true) .propagationInterval(1.0) .trendWeights(weights) .priorWeights(weights); @@ -326,8 +326,8 @@ BOOST_FIXTURE_TEST_CASE(testNonZeroCountSample, CTestFixture) { LOG_DEBUG(<< "startTime = " << startTime << ", endTime = " << endTime << ", # events = " << eventTimes.size()); - std::size_t i{0u}; - std::size_t j{0u}; + std::size_t i{0}; + std::size_t j{0}; for (core_t::TTime bucketStartTime = startTime; bucketStartTime < endTime; bucketStartTime += bucketLength) { core_t::TTime bucketEndTime = bucketStartTime + bucketLength; @@ -344,8 +344,8 @@ BOOST_FIXTURE_TEST_CASE(testNonZeroCountSample, CTestFixture) { if (*model->currentBucketCount(0, bucketStartTime) > 0) { maths::common::CModelAddSamplesParams params_; - params_.integer(true) - .nonNegative(true) + params_.isInteger(true) + .isNonNegative(true) .propagationInterval(1.0) .trendWeights(weights) .priorWeights(weights); diff --git a/lib/model/unittest/CEventRatePopulationModelTest.cc b/lib/model/unittest/CEventRatePopulationModelTest.cc index 23896e050e..0720fdee86 100644 --- a/lib/model/unittest/CEventRatePopulationModelTest.cc +++ b/lib/model/unittest/CEventRatePopulationModelTest.cc @@ -371,8 +371,8 @@ BOOST_FIXTURE_TEST_CASE(testFeatures, CTestFixture) { samples.emplace_back(startTime + bucketLength / 2, sample, 0); } maths::common::CModelAddSamplesParams params_; - params_.integer(true) - .nonNegative(true) + params_.isInteger(true) + .isNonNegative(true) .propagationInterval(1.0) .trendWeights(trendWeights) .priorWeights(residualWeights); diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index 3b3c6518d1..07ed01771a 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -191,8 +191,8 @@ BOOST_FIXTURE_TEST_CASE(testSample, CTestFixture) { maths::common::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights( numberSamples, maths_t::CUnitWeights::unit(1)); maths::common::CModelAddSamplesParams params_; - params_.integer(false) - .nonNegative(true) + params_.isInteger(false) + .isNonNegative(true) .propagationInterval(1.0) .trendWeights(weights) .priorWeights(weights); diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index 3b5b3006aa..75594d8fed 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -365,7 +365,7 @@ BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { TSizeSizePrDoubleVecMap expectedSampleTimes; TSizeSizePrDoubleVecMap expectedSamples[3]; TSizeMathsModelPtrMap expectedPopulationModels[3]; - bool nonNegative = true; + bool isNonNegative = true; for (const auto& message : messages) { if (message.s_Time >= startTime + bucketLength) { @@ -408,8 +408,8 @@ BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { attribute.second.s_Values, attribute.second.s_TrendWeights, attribute.second.s_ResidualWeights); maths::common::CModelAddSamplesParams params_; - params_.integer(false) - .nonNegative(nonNegative) + params_.isInteger(false) + .isNonNegative(isNonNegative) .propagationInterval(1.0) .trendWeights(attribute.second.s_TrendWeights) .priorWeights(attribute.second.s_ResidualWeights); @@ -442,7 +442,7 @@ BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { CEventData eventData = this->addArrival(message, m_Gatherer); std::size_t pid = *eventData.personId(); std::size_t cid = *eventData.attributeId(); - nonNegative &= message.s_Dbl1Vec.get()[0] < 0.0; + isNonNegative &= message.s_Dbl1Vec.get()[0] < 0.0; double sampleCount = m_Gatherer->sampleCount(cid); if (sampleCount > 0.0) { diff --git a/lib/model/unittest/CModelToolsTest.cc b/lib/model/unittest/CModelToolsTest.cc index 3087f7b545..1331422956 100644 --- a/lib/model/unittest/CModelToolsTest.cc +++ b/lib/model/unittest/CModelToolsTest.cc @@ -200,7 +200,10 @@ BOOST_AUTO_TEST_CASE(testProbabilityCache) { rng.random_shuffle(samples.begin(), samples.end()); for (auto sample : samples) { maths::common::CModelAddSamplesParams params; - params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); + params.isInteger(false) + .propagationInterval(1.0) + .trendWeights(weights) + .priorWeights(weights); model.addSamples( params, {core::make_triple(time_, TDouble2Vec(1, sample), TAG)}); } diff --git a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc index 5fa6c0319b..126f4a55c8 100644 --- a/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc +++ b/lib/model/unittest/CProbabilityAndInfluenceCalculatorTest.cc @@ -117,7 +117,7 @@ addSamples(core_t::TTime bucketLength, const SAMPLES& samples, maths::common::CM TDouble2VecWeightsAryVec weights{ maths_t::CUnitWeights::unit(dimension(samples[0]))}; maths::common::CModelAddSamplesParams params; - params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); + params.isInteger(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); core_t::TTime time{0}; for (const auto& sample_ : samples) { model.addSamples(params, {sample(time, sample_)}); @@ -280,7 +280,8 @@ void testProbabilityAndGetInfluences(model_t::EFeature feature, double probability; BOOST_TEST_REQUIRE(calculator.calculate(probability, influences)); - double pj, pe; + double pj; + double pe; BOOST_TEST_REQUIRE(pJoint.calculate(pj)); BOOST_TEST_REQUIRE(pExtreme.calculate(pe)); From 287195804696f3c2b3ccff0c56badd1599cfcedc Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 16 May 2022 18:41:28 +0100 Subject: [PATCH 05/12] Docs --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index abaf9175a8..b545ef3d39 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -35,6 +35,7 @@ * Upgrade PyTorch to version 1.11. (See {ml-pull}2233[#2233], {ml-pull}2235[#2235] and {ml-pull}2238[#2238].) * Upgrade zlib to version 1.2.12 on Windows. (See {ml-pull}2253[#2253].) +* Address root cause for actuals equals typical equals zero anomalies. (See {ml-pull}2270[#2270].) === Bug Fixes From 62fe2f15bce8044194423dd670df195326fca852 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 16 May 2022 20:11:16 +0100 Subject: [PATCH 06/12] Windows build --- lib/maths/time_series/CTimeSeriesDecomposition.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/maths/time_series/CTimeSeriesDecomposition.cc b/lib/maths/time_series/CTimeSeriesDecomposition.cc index 46f688cd8d..ff9db2eacd 100644 --- a/lib/maths/time_series/CTimeSeriesDecomposition.cc +++ b/lib/maths/time_series/CTimeSeriesDecomposition.cc @@ -226,8 +226,9 @@ void CTimeSeriesDecomposition::addPoint(core_t::TTime time, m_LastValueTime = std::max(m_LastValueTime, time); this->propagateForwardsTo(time); - auto testForSeasonality = m_Components.makeTestForSeasonality([this]() { - return [ predictor = this->predictor(E_Seasonal), + auto testForSeasonality = m_Components.makeTestForSeasonality([this] { + auto predictor_ = this->predictor(E_Seasonal); + return [ predictor = std::move(predictor_), this ](core_t::TTime time_, const TBoolVec& removedSeasonalMask) { return predictor(time_, removedSeasonalMask) + this->smooth( @@ -248,8 +249,8 @@ void CTimeSeriesDecomposition::addPoint(core_t::TTime time, this->value(time, 0.0, E_Calendar).mean(), *this, [this] { - return [predictor = this->predictor(E_All | E_TrendForced)]( - core_t::TTime time_) { + auto predictor_ = this->predictor(E_All | E_TrendForced); + return [predictor = std::move(predictor_)](core_t::TTime time_) { return predictor(time_, {}); }; }, From 5734348ea7c052caa51c0c070a8ab4f77a02f2e0 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 18 May 2022 10:04:19 +0100 Subject: [PATCH 07/12] Require smoothing is explicit for private function to avoid accidentally calling the wrong implementation --- .../maths/time_series/CTimeSeriesDecomposition.h | 2 +- .../time_series/CTimeSeriesDecompositionStub.h | 4 +--- lib/maths/time_series/CTimeSeriesDecomposition.cc | 14 +++++++------- lib/maths/time_series/CTimeSeriesModel.cc | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/maths/time_series/CTimeSeriesDecomposition.h b/include/maths/time_series/CTimeSeriesDecomposition.h index 7ffa6c5ea3..55a413837a 100644 --- a/include/maths/time_series/CTimeSeriesDecomposition.h +++ b/include/maths/time_series/CTimeSeriesDecomposition.h @@ -240,7 +240,7 @@ class MATHS_TIME_SERIES_EXPORT EMPTY_BASE_OPT CTimeSeriesDecomposition core::CStateRestoreTraverser& traverser); //! Get the predicted value of the time series at \p time. - TVector2x1 value(core_t::TTime time, double confidence, int components, bool smooth = true) const; + TVector2x1 value(core_t::TTime time, double confidence, int components, bool smooth) const; //! Compute the variance scale weight to apply at \p time. TVector2x1 diff --git a/include/maths/time_series/CTimeSeriesDecompositionStub.h b/include/maths/time_series/CTimeSeriesDecompositionStub.h index 0578b9ed37..657cc927c5 100644 --- a/include/maths/time_series/CTimeSeriesDecompositionStub.h +++ b/include/maths/time_series/CTimeSeriesDecompositionStub.h @@ -62,9 +62,7 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesDecompositionStub double meanValue(core_t::TTime time) const override; //! Returns zero vector. - TVector2x1 value(core_t::TTime time, - double confidence = 0.0, - bool isNonNegative = false) const override; + TVector2x1 value(core_t::TTime time, double confidence, bool isNonNegative) const override; //! Returns 0. core_t::TTime maximumForecastInterval() const override; diff --git a/lib/maths/time_series/CTimeSeriesDecomposition.cc b/lib/maths/time_series/CTimeSeriesDecomposition.cc index ff9db2eacd..767372447f 100644 --- a/lib/maths/time_series/CTimeSeriesDecomposition.cc +++ b/lib/maths/time_series/CTimeSeriesDecomposition.cc @@ -244,9 +244,9 @@ void CTimeSeriesDecomposition::addPoint(core_t::TTime time, m_TimeShift, value, weights, - this->value(time, 0.0, E_TrendForced).mean(), - this->value(time, 0.0, E_Seasonal).mean(), - this->value(time, 0.0, E_Calendar).mean(), + this->value(time, 0.0, E_TrendForced, true).mean(), + this->value(time, 0.0, E_Seasonal, true).mean(), + this->value(time, 0.0, E_Calendar, true).mean(), *this, [this] { auto predictor_ = this->predictor(E_All | E_TrendForced); @@ -329,7 +329,7 @@ CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, int compo CTimeSeriesDecomposition::TVector2x1 CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, bool isNonNegative) const { - auto result = this->value(time, confidence, E_All); + auto result = this->value(time, confidence, E_All, true); return isNonNegative ? max(result, 0.0) : result; } @@ -449,9 +449,9 @@ double CTimeSeriesDecomposition::detrend(core_t::TTime time, return value; } - TVector2x1 interval{this->value(time, confidence, E_All ^ E_Seasonal)}; - auto shiftedInterval = [=](core_t::TTime shift) { - TVector2x1 result{interval + this->value(time + shift, confidence, E_Seasonal)}; + TVector2x1 interval{this->value(time, confidence, E_All ^ E_Seasonal, true)}; + auto shiftedInterval = [&](core_t::TTime shift) { + TVector2x1 result{interval + this->value(time + shift, confidence, E_Seasonal, true)}; return isNonNegative ? max(result, 0.0) : result; }; diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index 5397ba757e..715c4e475d 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -609,7 +609,7 @@ CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel( CUnivariateTimeSeriesModel::CUnivariateTimeSeriesModel(const common::SModelRestoreParams& params, core::CStateRestoreTraverser& traverser) - : common::CModel(params.s_Params), m_IsForecastable(false) { + : common::CModel(params.s_Params) { if (traverser.traverseSubLevel([&](auto& traverser_) { return this->acceptRestoreTraverser(params, traverser_); }) == false) { From ca7d731b23746bc18c2dfc349d0dcd7c9b618217 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 18 May 2022 11:53:19 +0100 Subject: [PATCH 08/12] Type fix --- lib/maths/time_series/CCalendarCyclicTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/maths/time_series/CCalendarCyclicTest.cc b/lib/maths/time_series/CCalendarCyclicTest.cc index 00f3034eda..6f15bbc414 100644 --- a/lib/maths/time_series/CCalendarCyclicTest.cc +++ b/lib/maths/time_series/CCalendarCyclicTest.cc @@ -183,9 +183,9 @@ void CCalendarCyclicTest::add(core_t::TTime time, double error, double weight) { bool isVeryLarge{100.0 * (1.0 - this->survivalFunction(error)) >= VERY_LARGE_ERROR_PERCENTILE}; m_CurrentBucketErrorStats.s_LargeErrorCount += - std::min(std::numeric_limits::max() - + std::min(std::numeric_limits::max() - m_CurrentBucketErrorStats.s_LargeErrorCount, - static_cast(isVeryLarge ? (1 << 17) + 1 : 1)); + static_cast(isVeryLarge ? (1 << 17) + 1 : 1)); m_CurrentBucketErrorStats.s_LargeErrorSum += this->winsorise(error); } } From 91438a9e5c9e522997ef0f58a4d00aef7480921c Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 18 May 2022 21:36:39 +0100 Subject: [PATCH 09/12] Workaround compiler bug --- lib/maths/time_series/CTimeSeriesModel.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index 715c4e475d..e6cafb0ccc 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -758,7 +758,7 @@ CUnivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights) TDouble1Vec modes(m_ResidualModel->marginalLikelihoodModes(unpack(weights))); result.reserve(modes.size()); for (auto mode : modes) { - result.emplace_back(mode); + result.push_back({mode}); } return result; } @@ -2268,7 +2268,7 @@ CMultivariateTimeSeriesModel::mode(core_t::TTime time, CMultivariateTimeSeriesModel::TDouble2Vec1Vec CMultivariateTimeSeriesModel::correlateModes(core_t::TTime /*time*/, const TDouble2VecWeightsAry1Vec& /*weights*/) const { - return TDouble2Vec1Vec(); + return {}; } CMultivariateTimeSeriesModel::TDouble2Vec1Vec @@ -2277,7 +2277,7 @@ CMultivariateTimeSeriesModel::residualModes(const TDouble2VecWeightsAry& weights TDouble2Vec1Vec result; result.reserve(modes.size()); for (const auto& mode : modes) { - result.push_back(TDouble2Vec(mode)); + result.push_back(mode); } return result; } From 772b6faefe9769cc68138f609fe7e5ac0596d419 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 19 May 2022 10:22:25 +0100 Subject: [PATCH 10/12] Formatting --- lib/maths/time_series/CCalendarCyclicTest.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/maths/time_series/CCalendarCyclicTest.cc b/lib/maths/time_series/CCalendarCyclicTest.cc index 6f15bbc414..b32586b704 100644 --- a/lib/maths/time_series/CCalendarCyclicTest.cc +++ b/lib/maths/time_series/CCalendarCyclicTest.cc @@ -182,10 +182,10 @@ void CCalendarCyclicTest::add(core_t::TTime time, double error, double weight) { if (error >= largeError) { bool isVeryLarge{100.0 * (1.0 - this->survivalFunction(error)) >= VERY_LARGE_ERROR_PERCENTILE}; - m_CurrentBucketErrorStats.s_LargeErrorCount += - std::min(std::numeric_limits::max() - - m_CurrentBucketErrorStats.s_LargeErrorCount, - static_cast(isVeryLarge ? (1 << 17) + 1 : 1)); + m_CurrentBucketErrorStats.s_LargeErrorCount += std::min( + std::numeric_limits::max() - + m_CurrentBucketErrorStats.s_LargeErrorCount, + static_cast(isVeryLarge ? (1 << 17) + 1 : 1)); m_CurrentBucketErrorStats.s_LargeErrorSum += this->winsorise(error); } } From d24fc98b3391e103bd8bbbae506ede5658495050 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 19 May 2022 10:27:53 +0100 Subject: [PATCH 11/12] Tidy up --- include/maths/time_series/CTimeSeriesTestForSeasonality.h | 2 +- lib/maths/time_series/CTimeSeriesDecomposition.cc | 8 ++++---- lib/maths/time_series/CTimeSeriesTestForSeasonality.cc | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/maths/time_series/CTimeSeriesTestForSeasonality.h b/include/maths/time_series/CTimeSeriesTestForSeasonality.h index 6e40064f27..7783ee02fd 100644 --- a/include/maths/time_series/CTimeSeriesTestForSeasonality.h +++ b/include/maths/time_series/CTimeSeriesTestForSeasonality.h @@ -243,7 +243,7 @@ class MATHS_TIME_SERIES_EXPORT CTimeSeriesTestForSeasonality { std::size_t size); //! Add a predictor for the currently modelled seasonal conponents. - void modelledSeasonalityPredictor(const TPredictor& predictor); + void modelledSeasonalityPredictor(TPredictor predictor); //! Fit and remove any seasonality we're modelling and can't test. void prepareWindowForDecompose(); diff --git a/lib/maths/time_series/CTimeSeriesDecomposition.cc b/lib/maths/time_series/CTimeSeriesDecomposition.cc index 767372447f..ab7ff90eb1 100644 --- a/lib/maths/time_series/CTimeSeriesDecomposition.cc +++ b/lib/maths/time_series/CTimeSeriesDecomposition.cc @@ -336,7 +336,7 @@ CTimeSeriesDecomposition::value(core_t::TTime time, double confidence, bool isNo CTimeSeriesDecomposition::TFilteredPredictor CTimeSeriesDecomposition::predictor(int components) const { - CTrendComponent::TPredictor trend_{m_Components.trend().predictor()}; + auto trend_ = m_Components.trend().predictor(); return [ components, trend = std::move(trend_), this ](core_t::TTime time, const TBoolVec& removedSeasonalMask) { @@ -396,7 +396,7 @@ void CTimeSeriesDecomposition::forecast(core_t::TTime startTime, return; } - auto seasonal = [this, confidence](core_t::TTime time) { + auto seasonal = [this, confidence](core_t::TTime time) -> TVector2x1 { TVector2x1 prediction{0.0}; for (const auto& component : m_Components.seasonal()) { if (component.initialized() && component.time().inWindow(time)) { @@ -450,14 +450,14 @@ double CTimeSeriesDecomposition::detrend(core_t::TTime time, } TVector2x1 interval{this->value(time, confidence, E_All ^ E_Seasonal, true)}; - auto shiftedInterval = [&](core_t::TTime shift) { + auto shiftedInterval = [&](core_t::TTime shift) -> TVector2x1 { TVector2x1 result{interval + this->value(time + shift, confidence, E_Seasonal, true)}; return isNonNegative ? max(result, 0.0) : result; }; core_t::TTime shift{0}; if (maximumTimeShift > 0) { - auto loss = [&](double shift_) { + auto loss = [&](double shift_) -> double { TVector2x1 interval_{shiftedInterval(static_cast(shift_ + 0.5))}; return std::fabs(std::min(value - interval_(0), 0.0) + std::max(value - interval_(1), 0.0)); diff --git a/lib/maths/time_series/CTimeSeriesTestForSeasonality.cc b/lib/maths/time_series/CTimeSeriesTestForSeasonality.cc index de3cb09862..62df739e68 100644 --- a/lib/maths/time_series/CTimeSeriesTestForSeasonality.cc +++ b/lib/maths/time_series/CTimeSeriesTestForSeasonality.cc @@ -301,8 +301,8 @@ void CTimeSeriesTestForSeasonality::addModelledSeasonality(const CSeasonalTime& } } -void CTimeSeriesTestForSeasonality::modelledSeasonalityPredictor(const TPredictor& predictor) { - m_ModelledPredictor = predictor; +void CTimeSeriesTestForSeasonality::modelledSeasonalityPredictor(TPredictor predictor) { + m_ModelledPredictor = std::move(predictor); } void CTimeSeriesTestForSeasonality::prepareWindowForDecompose() { From 44ddc14320809228500abdbccb5c82a3c3eecdf5 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 19 May 2022 10:58:07 +0100 Subject: [PATCH 12/12] Review comment --- lib/maths/time_series/CTimeSeriesModel.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/maths/time_series/CTimeSeriesModel.cc b/lib/maths/time_series/CTimeSeriesModel.cc index e6cafb0ccc..3511c8b014 100644 --- a/lib/maths/time_series/CTimeSeriesModel.cc +++ b/lib/maths/time_series/CTimeSeriesModel.cc @@ -123,10 +123,15 @@ const std::string VERSION_7_11_TAG("7.11"); const std::string ID_6_3_TAG{"a"}; const std::string IS_NON_NEGATIVE_6_3_TAG{"b"}; const std::string IS_FORECASTABLE_6_3_TAG{"c"}; +//const std::string RNG_6_3_TAG{"d"}; Removed in 6.5 const std::string CONTROLLER_6_3_TAG{"e"}; const core::TPersistenceTag TREND_MODEL_6_3_TAG{"f", "trend_model"}; const core::TPersistenceTag RESIDUAL_MODEL_6_3_TAG{"g", "residual_model"}; const std::string ANOMALY_MODEL_6_3_TAG{"h"}; +//const std::string RECENT_SAMPLES_6_3_TAG{"i"}; Removed in 6.5 +//const std::string CANDIDATE_CHANGE_POINT_6_3_TAG{"j"}; Removed in 7.11 +//const std::string CURRENT_CHANGE_INTERVAL_6_3_TAG{"k"}; Removed in 7.11 +//const std::string CHANGE_DETECTOR_6_3_TAG{"l"}; Removed in 7.11 const std::string MULTIBUCKET_FEATURE_6_3_TAG{"m"}; const std::string MULTIBUCKET_FEATURE_MODEL_6_3_TAG{"n"};