Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Implement new rules design #119

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/CHANGELOG.asciidoc
Expand Up @@ -44,6 +44,9 @@ new processes being created and macOS uses the sandbox functionality ({pull}98[#
Fix a bug causing us to under estimate the memory used by shared pointers and reduce the memory consumed
by unnecessary reference counting ({pull}108[#108])

Detectors now support rules that allow the user to improve the results by providing some domain specific
knowledge in the form of rule. ({pull}119[#119])

=== Bug Fixes

Age seasonal components in proportion to the fraction of values with which they're updated ({pull}88[#88])
Expand Down
16 changes: 7 additions & 9 deletions include/api/CDetectionRulesJsonParser.h
Expand Up @@ -38,24 +38,22 @@ class API_EXPORT CDetectionRulesJsonParser {
bool parseRules(const std::string& json, TDetectionRuleVec& rules);

private:
bool parseRuleScope(const rapidjson::Value& ruleObject, model::CDetectionRule& rule);
bool parseRuleConditions(const rapidjson::Value& ruleObject, model::CDetectionRule& rule);
bool parseFilterId(const rapidjson::Value& conditionObject,
model::CRuleCondition& ruleCondition);

static bool hasStringMember(const rapidjson::Value& object, const std::string& name);
static bool hasArrayMember(const rapidjson::Value& object, const std::string& name);
static bool hasDoubleMember(const rapidjson::Value& object, const std::string& name);
static bool parseRuleActions(const rapidjson::Value& ruleObject,
model::CDetectionRule& rule);
static bool parseConditionsConnective(const rapidjson::Value& ruleObject,
model::CDetectionRule& rule);
static bool parseRuleConditionType(const rapidjson::Value& ruleConditionObject,
model::CRuleCondition& ruleCondition);
static bool parseCondition(const rapidjson::Value& ruleConditionObject,
model::CRuleCondition& ruleCondition);
static bool parseConditionAppliesTo(const rapidjson::Value& ruleConditionObject,
model::CRuleCondition& ruleCondition);
static bool parseConditionOperator(const rapidjson::Value& conditionObject,
model::CRuleCondition& ruleCondition);
static bool parseConditionThreshold(const rapidjson::Value& conditionObject,
model::CRuleCondition& ruleCondition);
model::CRuleCondition& condition);
static bool parseConditionValue(const rapidjson::Value& conditionObject,
model::CRuleCondition& condition);

private:
//! The filters per id used by categorical rule conditions.
Expand Down
61 changes: 18 additions & 43 deletions include/model/CDetectionRule.h
Expand Up @@ -8,6 +8,7 @@
#define INCLUDED_ml_model_CDetectionRule_h

#include <model/CRuleCondition.h>
#include <model/CRuleScope.h>
#include <model/ImportExport.h>
#include <model/ModelTypes.h>

Expand All @@ -21,48 +22,33 @@ class CAnomalyDetectorModel;
//! \brief A rule that dictates an action to be taken when certain conditions occur.
//!
//! DESCRIPTION:\n
//! A rule describes an action to be taken and the conditions under which
//! the action should be taken. A rule has an action and one or more conditions.
//! The conditions are combined according to the rule's connective which can
//! be either OR or AND. A rule can optionally have a target field specified.
//! When such target is not specified, the rule applies to the series that is
//! checked against the rule. When a target is specified, the rule applies to
//! all series that are contained within the target. For example, if the target
//! is the partition field and no targetFieldValue is specified, then if the
//! conditions trigger the rule, the rule will apply to all series within the
//! partition. However, when no target is specified, the rule will trigger only
//! for series that are described in the conditions themselves.
//! A rule describes actions to be taken when the scope and conditions are met.
//! A rule has one or more actions, a scope and zero or more conditions.
//! The scope dictates to which series the rule applies.
//! When conditions are present, they dictate to which results the rule applies
//! depending the result's values. Multiple conditions are combined with a logical AND.
class MODEL_EXPORT CDetectionRule {

public:
using TRuleConditionVec = std::vector<CRuleCondition>;
using TDouble1Vec = core::CSmallVector<double, 1>;

//! Rule actions can apply to filtering results, skipping sampling or both.
//! Rule actions can apply to skip results, skip model updates, or both.
//! This is meant to work as a bit mask so added values should be powers of 2.
enum ERuleAction { E_FilterResults = 1, E_SkipSampling = 2 };

enum EConditionsConnective { E_Or, E_And };
enum ERuleAction { E_SkipResult = 1, E_SkipModelUpdate = 2 };

public:
//! Default constructor.
//! The rule's action defaults to FILTER_RESULTS and the connective to OR.
CDetectionRule();

//! Set the rule's action.
void action(int ruleAction);

//! Set the conditions' connective.
void conditionsConnective(EConditionsConnective connective);
//! Adds a requirement for \p field not to be in \p filter for the rule to apply
void includeScope(std::string field, const core::CPatternSet& filter);

//! Adds a requirement for \p field not to be in \p filter for the rule to apply
void excludeScope(std::string field, const core::CPatternSet& filter);

//! Add a condition.
void addCondition(const CRuleCondition& condition);

//! Set the target field name.
void targetFieldName(const std::string& targetFieldName);

//! Set the target field value.
void targetFieldValue(const std::string& targetFieldValue);

//! Check whether the rule applies on a series.
//! \p action is bitwise and'ed with the m_Action member
bool apply(ERuleAction action,
Expand All @@ -77,30 +63,19 @@ class MODEL_EXPORT CDetectionRule {
std::string print() const;

private:
//! Check whether the given series is in the scope
//! of the rule's target.
bool isInScope(const CAnomalyDetectorModel& model, std::size_t pid, std::size_t cid) const;

std::string printAction() const;
std::string printConditionsConnective() const;

private:
//! The rule action. It works as a bit mask so its value
//! may not match any of the declared enum values but the
//! corresponding bit will be 1 when an action is enabled.
int m_Action;
int m_Action{E_SkipResult};

//! The rule scope.
CRuleScope m_Scope;

//! The conditions that trigger the rule.
TRuleConditionVec m_Conditions;

//! The way the rule's conditions are logically connected (i.e. OR, AND).
EConditionsConnective m_ConditionsConnective;

//! The optional target field name. Empty when not specified.
std::string m_TargetFieldName;

//! The optional target field value. Empty when not specified.
std::string m_TargetFieldValue;
};
}
}
Expand Down
78 changes: 21 additions & 57 deletions include/model/CRuleCondition.h
Expand Up @@ -21,7 +21,7 @@ class CPatternSet;
namespace model {
class CAnomalyDetectorModel;

//! \brief A condition that may trigger a rule.
//! \brief A numeric condition that may trigger a rule.
//!
//! DESCRIPTION:\n
//! A condition has a type that determines the calculation
Expand All @@ -34,52 +34,27 @@ class MODEL_EXPORT CRuleCondition {
using TPatternSetCRef = boost::reference_wrapper<const core::CPatternSet>;

public:
enum ERuleConditionType {
E_CategoricalMatch,
E_CategoricalComplement,
E_NumericalActual,
E_NumericalTypical,
E_NumericalDiffAbs,
enum ERuleConditionAppliesTo {
E_Actual,
E_Typical,
E_DiffFromTypical,
E_Time
};

enum EConditionOperator { E_LT, E_LTE, E_GT, E_GTE };

struct SCondition {
SCondition(EConditionOperator op, double threshold);

bool test(double value) const;

EConditionOperator s_Op;
double s_Threshold;
};
enum ERuleConditionOperator { E_LT, E_LTE, E_GT, E_GTE };

public:
//! Default constructor.
CRuleCondition();

//! Set the condition type.
void type(ERuleConditionType ruleType);

//! Set the field name. Empty means it is not specified.
void fieldName(const std::string& fieldName);
//! Set which value the condition applies to.
void appliesTo(ERuleConditionAppliesTo appliesTo);

//! Set the field value. Empty means it is not specified.
void fieldValue(const std::string& fieldValue);
//! Set the condition operator.
void op(ERuleConditionOperator op);

//! Get the numerical condition.
SCondition& condition();

//! Set the value filter (used for categorical only).
void valueFilter(const core::CPatternSet& valueFilter);

//! Is the condition categorical?
//! Categorical conditions are pattern match conditions i.e.
//! E_CategoricalMatch and E_CategoricalComplement
bool isCategorical() const;

//! Is the condition numerical?
bool isNumerical() const;
//! Set the condition value.
void value(double value);

//! Pretty-print the condition.
std::string print() const;
Expand All @@ -88,35 +63,24 @@ class MODEL_EXPORT CRuleCondition {
bool test(const CAnomalyDetectorModel& model,
model_t::EFeature feature,
const model_t::CResultType& resultType,
bool isScoped,
std::size_t pid,
std::size_t cid,
core_t::TTime time) const;

private:
bool checkCondition(const CAnomalyDetectorModel& model,
model_t::EFeature feature,
model_t::CResultType resultType,
std::size_t pid,
std::size_t cid,
core_t::TTime time) const;
std::string print(ERuleConditionType type) const;
std::string print(EConditionOperator op) const;
bool testValue(double value) const;
std::string print(ERuleConditionAppliesTo appliesTo) const;
std::string print(ERuleConditionOperator op) const;

private:
//! The condition type.
ERuleConditionType m_Type;

//! The numerical condition.
SCondition m_Condition;

//! The field name. Empty when not specified.
std::string m_FieldName;
//! The value the condition applies to.
ERuleConditionAppliesTo m_AppliesTo;

//! The field value. Empty when not specified.
std::string m_FieldValue;
//! The condition operator.
ERuleConditionOperator m_Operator;

TPatternSetCRef m_ValueFilter;
//! The condition value.
double m_Value;
};
}
}
Expand Down
65 changes: 65 additions & 0 deletions include/model/CRuleScope.h
@@ -0,0 +1,65 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
#ifndef INCLUDED_ml_model_CRuleScope_h
#define INCLUDED_ml_model_CRuleScope_h

#include <model/ImportExport.h>

#include <core/CPatternSet.h>
#include <core/CTriple.h>

#include <boost/ref.hpp>

#include <string>
#include <vector>

namespace ml {
namespace model {

class CAnomalyDetectorModel;

//! \brief The scope of the rule. It dictates the series where the rule applies.
//!
//! DESCRIPTION:\n
//! The rule scope allows to specify when a rule applies based on the series
//! split fields (partition, over, by). When the scope is empty, the rule
//! applies to all series. Fields can be specified to either be included in
//! the scope or excluded from it depending on whether they are contained
//! in a filter (i.e. a list of string patterns). Multiple fields are combined
//! with a logical AND.
class MODEL_EXPORT CRuleScope {
public:
enum ERuleScopeFilterType { E_Include, E_Exclude };

using TPatternSetCRef = boost::reference_wrapper<const core::CPatternSet>;
using TStrPatternSetCRefFilterTypeTriple =
core::CTriple<std::string, TPatternSetCRef, ERuleScopeFilterType>;
using TStrPatternSetCRefFilterTypeTripleVec = std::vector<TStrPatternSetCRefFilterTypeTriple>;

public:
//! Default constructor.
CRuleScope() = default;

//! Adds a requirement for \p field to be in \p filter for the rule to apply
void include(std::string field, const core::CPatternSet& filter);

//! Adds a requirement for \p field not to be in \p filter for the rule to apply
void exclude(std::string field, const core::CPatternSet& filter);

//! Check whether the given series is in the rule scope.
bool check(const CAnomalyDetectorModel& model, std::size_t pid, std::size_t cid) const;

//! Pretty-print the scope.
std::string print() const;

private:
//! A vector that holds the triple of the field, filter and its type.
TStrPatternSetCRefFilterTypeTripleVec m_Scope;
};
}
}

#endif // INCLUDED_ml_model_CRuleScope_h