Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

Improve and use periodic boundary condition for seasonal component modeling ({pull}84[#84])
Improve robustness w.r.t. outliers of detection and initialisation of seasonal components ({pull}90[#90])
Improve behavior when there are abrupt changes in the seasonal components present in a time series ({pull}91[#91])
Explicit change point detection and modelling ({pull}92[#92])

=== Bug Fixes
Expand Down
4 changes: 2 additions & 2 deletions include/maths/CAdaptiveBucketing.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ class MATHS_EXPORT CAdaptiveBucketing {

//! An IIR low pass filter for the total desired end point displacement
//! in refine.
TFloatMeanAccumulator m_LpForce;
TFloatMeanAccumulator m_MeanDesiredDisplacement;

//! The total desired end point displacement in refine.
TFloatMeanAccumulator m_Force;
TFloatMeanAccumulator m_MeanAbsDesiredDisplacement;
};
}
}
Expand Down
4 changes: 0 additions & 4 deletions include/maths/CCalendarComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,6 @@ class MATHS_EXPORT CCalendarComponent : private CDecompositionComponent {
//! Get the mean variance of the component residuals.
double meanVariance() const;

//! Get the maximum ratio between a residual variance and the mean
//! residual variance.
double heteroscedasticity() const;

//! Get a checksum for this object.
uint64_t checksum(uint64_t seed = 0) const;

Expand Down
4 changes: 0 additions & 4 deletions include/maths/CDecompositionComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,6 @@ class MATHS_EXPORT CDecompositionComponent {
//! Get the mean variance of the function residuals.
double meanVariance() const;

//! Get the maximum ratio between a residual variance and the mean
//! residual variance.
double heteroscedasticity() const;

//! Get the maximum size to use for the bucketing.
std::size_t maxSize() const;

Expand Down
4 changes: 0 additions & 4 deletions include/maths/CSeasonalComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,6 @@ class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent {
//! Get the mean variance of the component residuals.
double meanVariance() const;

//! Get the maximum ratio between a residual variance and the mean
//! residual variance.
double heteroscedasticity() const;

//! Get the covariance matrix of the regression parameters' at \p time.
//!
//! \param[in] time The time of interest.
Expand Down
193 changes: 155 additions & 38 deletions include/maths/CTimeSeriesDecompositionDetail.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
virtual void handle(const SDetectedCalendar& message);

//! Start using the trend for prediction.
void useTrendForPrediction(void);
void useTrendForPrediction();

//! Test to see if using the trend improves prediction accuracy.
bool shouldUseTrendForPrediction();

//! Apply \p shift to the level at \p time and \p value.
void shiftLevel(core_t::TTime time, double value, double shift);
Expand Down Expand Up @@ -440,6 +443,63 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
using TCalendarComponentPtrVec = std::vector<CCalendarComponent*>;
using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;

//! \brief Manages the setting of the error gain when updating
//! the components with a value.
//!
//! DESCRIPTION:\n
//! The gain is the scale applied to the error in the prediction
//! when updating the components with a new value. If we think it
//! is safe, we use a large gain since this improves prediction
//! accuracy. However, this can also lead to instability if, for
//! example, the seasonal components present in the time series
//! suddenly change. When instability occurs it manifests as the
//! amplitude of all the components growing.
//!
//! This object therefore monitors the sum of the absolute component
//! amplitudes and decreases the gain when it detects that this is
//! significantly increasing.
class MATHS_EXPORT CGainController {
public:
//! Initialize by reading state from \p traverser.
bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser);

//! Persist state by passing information to \p inserter.
void acceptPersistInserter(core::CStatePersistInserter& inserter) const;

//! Clear all state.
void clear();

//! Get the gain to use when updating the components with a new value.
double gain() const;

//! Add seed predictions \p predictions.
void seed(const TDoubleVec& predictions);

//! Add the predictions \p predictions at \p time.
void add(core_t::TTime time, const TDoubleVec& predictions);

//! Age by \p factor.
void age(double factor);

//! Shift the mean prediction error regression model time origin
//! to \p time.
void shiftOrigin(core_t::TTime time);

//! Get a checksum for this object.
uint64_t checksum(uint64_t seed) const;

private:
using TRegression = CRegression::CLeastSquaresOnline<1>;

private:
//! The origin for the mean prediction error regression model.
core_t::TTime m_RegressionOrigin = 0;
//! The sum of the absolute component predictions w.r.t. their means.
TFloatMeanAccumulator m_MeanSumAmplitudes;
//! A regression model for the absolute component predictions.
TRegression m_MeanSumAmplitudesTrend;
};

//! \brief Tracks prediction errors with and without components.
//!
//! DESCRIPTION:\n
Expand All @@ -456,19 +516,23 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {

//! Update the errors.
//!
//! \param[in] referenceError The reference error with no components.
//! \param[in] error The prediction error.
//! \param[in] prediction The prediction from the component.
//! \param[in] varianceIncrease The increase in predicted variance
//! due to the component.
//! \param[in] weight The weight of \p error.
void add(double error, double prediction, double weight);
void add(double referenceError,
double error,
double prediction,
double varianceIncrease,
double weight);

//! Clear the error statistics.
void clear();

//! Check if we should discard \p seasonal.
bool remove(core_t::TTime bucketLength, CSeasonalComponent& seasonal) const;

//! Check if we should discard \p calendar.
bool remove(core_t::TTime bucketLength, CCalendarComponent& calendar) const;
//! Check if we should discard the component.
bool remove(core_t::TTime bucketLength, core_t::TTime period) const;

//! Age the errors by \p factor.
void age(double factor);
Expand All @@ -477,22 +541,33 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
uint64_t checksum(uint64_t seed) const;

private:
//! Truncate large, i.e. more than 6 sigma, errors.
static double winsorise(double squareError, const TFloatMeanAccumulator& variance);
using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
using TVector = CVectorNx1<CFloatStorage, 3>;
using TVectorMeanAccumulator = CBasicStatistics::SSampleMean<TVector>::TAccumulator;

private:
//! The mean prediction error in the window.
TFloatMeanAccumulator m_MeanErrorWithComponent;
//! Truncate large, i.e. more than 6 sigma, errors.
TVector winsorise(const TVector& squareError) const;

//! The mean prediction error in the window without the component.
TFloatMeanAccumulator m_MeanErrorWithoutComponent;
private:
//! The vector mean errors:
//! <pre>
//! | excluding all components from the prediction |
//! | including the component in the prediction |
//! | excluding the component from the prediction |
//! </pre>
TVectorMeanAccumulator m_MeanErrors;

//! The maximum increase in variance due to the component.
TMaxAccumulator m_MaxVarianceIncrease;
};

using TComponentErrorsVec = std::vector<CComponentErrors>;
using TComponentErrorsPtrVec = std::vector<CComponentErrors*>;

//! \brief The seasonal components of the decomposition.
struct MATHS_EXPORT SSeasonal {
class MATHS_EXPORT CSeasonal {
public:
//! Initialize by reading state from \p traverser.
bool acceptRestoreTraverser(double decayRate,
core_t::TTime bucketLength,
Expand All @@ -508,15 +583,26 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! - \p start elapsed time.
void propagateForwards(core_t::TTime start, core_t::TTime end);

//! Clear the components' prediction errors.
void clearPredictionErrors();

//! Get the combined size of the seasonal components.
std::size_t size() const;

//! Get the components.
const maths_t::TSeasonalComponentVec& components() const;
//! Get the components.
maths_t::TSeasonalComponentVec& components();

//! Get the state to update.
void componentsErrorsAndDeltas(core_t::TTime time,
TSeasonalComponentPtrVec& components,
TComponentErrorsPtrVec& errors,
TDoubleVec& deltas);

//! Append the predictions at \p time.
void appendPredictions(core_t::TTime time, TDoubleVec& predictions) const;

//! Check if we need to interpolate any of the components.
bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const;

Expand All @@ -526,6 +612,23 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! Check if any of the components has been initialized.
bool initialized() const;

//! Add and initialize a new component.
void add(const CSeasonalTime& seasonalTime,
std::size_t size,
double decayRate,
double bucketLength,
CSplineTypes::EBoundaryCondition boundaryCondition,
core_t::TTime startTime,
core_t::TTime endTime,
const TFloatMeanAccumulatorVec& values);

//! Refresh state after adding new components.
void refreshForNewComponents();

//! Remove all components excluded by adding the component corresponding
//! to \p time.
void removeExcludedComponents(const CSeasonalTime& time);

//! Remove low value components
bool prune(core_t::TTime time, core_t::TTime bucketLength);

Expand All @@ -544,17 +647,19 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! Get the memory used by this object.
std::size_t memoryUsage() const;

//! The seasonal components.
maths_t::TSeasonalComponentVec s_Components;
private:
//! The components.
maths_t::TSeasonalComponentVec m_Components;

//! The prediction errors relating to the component.
TComponentErrorsVec s_PredictionErrors;
//! The components' prediction errors.
TComponentErrorsVec m_PredictionErrors;
};

using TSeasonalPtr = std::shared_ptr<SSeasonal>;
using TSeasonalPtr = std::shared_ptr<CSeasonal>;

//! \brief Calendar periodic components of the decomposition.
struct MATHS_EXPORT SCalendar {
class MATHS_EXPORT CCalendar {
public:
//! Initialize by reading state from \p traverser.
bool acceptRestoreTraverser(double decayRate,
core_t::TTime bucketLength,
Expand All @@ -570,9 +675,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! - \p start elapsed time.
void propagateForwards(core_t::TTime start, core_t::TTime end);

//! Clear the components' prediction errors.
void clearPredictionErrors();

//! Get the combined size of the seasonal components.
std::size_t size() const;

//! Get the components.
const maths_t::TCalendarComponentVec& components() const;

//! Check if there is already a component for \p feature.
bool haveComponent(CCalendarFeature feature) const;

Expand All @@ -581,6 +692,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
TCalendarComponentPtrVec& components,
TComponentErrorsPtrVec& errors);

//! Append the predictions at \p time.
void appendPredictions(core_t::TTime time, TDoubleVec& predictions) const;

//! Check if we need to interpolate any of the components.
bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const;

Expand All @@ -590,6 +704,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! Check if any of the components has been initialized.
bool initialized() const;

//! Add and initialize a new component.
void add(const CCalendarFeature& feature, std::size_t size, double decayRate, double bucketLength);

//! Remove low value components.
bool prune(core_t::TTime time, core_t::TTime bucketLength);

Expand All @@ -605,14 +722,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! Get the memory used by this object.
std::size_t memoryUsage() const;

private:
//! The calendar components.
maths_t::TCalendarComponentVec s_Components;
maths_t::TCalendarComponentVec m_Components;

//! The prediction errors after removing the component.
TComponentErrorsVec s_PredictionErrors;
//! The components' prediction errors.
TComponentErrorsVec m_PredictionErrors;
};

using TCalendarPtr = std::shared_ptr<SCalendar>;
using TCalendarPtr = std::shared_ptr<CCalendar>;

private:
//! Get the total size of the components.
Expand All @@ -624,16 +742,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! Add new seasonal components to \p components.
bool addSeasonalComponents(const CPeriodicityHypothesisTestsResult& result,
const CExpandingWindow& window,
const TPredictor& predictor,
CTrendComponent& trend,
maths_t::TSeasonalComponentVec& components,
TComponentErrorsVec& errors) const;
const TPredictor& predictor);

//! Add a new calendar component to \p components.
bool addCalendarComponent(const CCalendarFeature& feature,
core_t::TTime time,
maths_t::TCalendarComponentVec& components,
TComponentErrorsVec& errors) const;
bool addCalendarComponent(const CCalendarFeature& feature, core_t::TTime time);

//! Reweight the outlier values in \p values.
//!
Expand Down Expand Up @@ -682,6 +794,11 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! The raw data bucketing interval.
core_t::TTime m_BucketLength;

//! Sets the gain used when updating with a new value.
//!
//! \see CGainController for more details.
CGainController m_GainController;

//! The number of buckets to use to estimate a periodic component.
std::size_t m_SeasonalComponentSize;

Expand All @@ -700,17 +817,17 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail {
//! The mean error variance scale for the components.
TFloatMeanAccumulator m_MeanVarianceScale;

//! The moments of the values added.
TMeanVarAccumulator m_Moments;
//! The moments of the error in the predictions excluding the trend.
TMeanVarAccumulator m_PredictionErrorWithoutTrend;

//! The moments of the values added after subtracting a trend.
TMeanVarAccumulator m_MomentsMinusTrend;
//! The moments of the error in the predictions including the trend.
TMeanVarAccumulator m_PredictionErrorWithTrend;

//! Set to true if the trend model should be used for prediction.
bool m_UsingTrendForPrediction;
bool m_UsingTrendForPrediction = false;

//! Set to true if non-null when the seasonal components change.
bool* m_Watcher;
bool* m_Watcher = nullptr;
};
};

Expand Down
2 changes: 1 addition & 1 deletion lib/api/CAnomalyJob.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ const std::string LAST_RESULTS_TIME_TAG("j");

//! The minimum version required to read the state corresponding to a model snapshot.
//! This should be updated every time there is a breaking change to the model state.
const std::string MODEL_SNAPSHOT_MIN_VERSION("6.3.0");
const std::string MODEL_SNAPSHOT_MIN_VERSION("6.4.0");
}

// Statics
Expand Down
Loading