diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
index 161f8bc27b..9c302668b9 100644
--- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
+++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
@@ -187,6 +187,8 @@ public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog
/// It is used when score is calculated for each root cause item. The range of beta should be in [0,1].
/// For a larger beta, root cause items which have a large difference between value and expected value will get a high score.
/// For a small beta, root cause items which have a high relative change will get a low score.
+ /// A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1].
+ /// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments.
///
///
///
///
///
- public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5)
+ public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.3, double rootCauseThreshold = 0.95)
{
IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);
//check the root cause input
CheckRootCauseInput(host, src);
- //check beta
+ //check parameters
host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");
+ host.CheckUserArg(rootCauseThreshold >= 0 && rootCauseThreshold <= 1, nameof(beta), "Must be in [0,1]");
//find out the root cause
- RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta);
+ RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, rootCauseThreshold);
RootCause dst = analyzer.Analyze();
return dst;
}
diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs
index 4a7cdaa086..bc18c0b248 100644
--- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs
+++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs
@@ -13,16 +13,17 @@ namespace Microsoft.ML.TimeSeries
public class RootCauseAnalyzer
{
private static double _anomalyRatioThreshold = 0.5;
- private static double _anomalyDeltaThreshold = 0.95;
private static double _anomalyPreDeltaThreshold = 2;
private RootCauseLocalizationInput _src;
private double _beta;
+ private double _rootCauseThreshold;
- public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta)
+ public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta, double rootCauseThreshold)
{
_src = src;
_beta = beta;
+ _rootCauseThreshold = rootCauseThreshold;
}
public RootCause Analyze()
@@ -39,10 +40,10 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src)
dst.Items = new List();
DimensionInfo dimensionInfo = SeparateDimension(src.AnomalyDimension, src.AggregateSymbol);
- Tuple> pointInfo = GetPointsInfo(src, dimensionInfo);
+ Tuple, TimeSeriesPoint>> pointInfo = GetPointsInfo(src, dimensionInfo);
PointTree pointTree = pointInfo.Item1;
PointTree anomalyTree = pointInfo.Item2;
- Dictionary dimPointMapping = pointInfo.Item3;
+ Dictionary, TimeSeriesPoint> dimPointMapping = pointInfo.Item3;
//which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension
if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0)
@@ -81,11 +82,12 @@ private DimensionInfo SeparateDimension(Dictionary dimensions, O
return info;
}
- private Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
+ private Tuple, TimeSeriesPoint>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
{
PointTree pointTree = new PointTree();
PointTree anomalyTree = new PointTree();
- Dictionary dimPointMapping = new Dictionary();
+ DimensionComparer dc = new DimensionComparer();
+ Dictionary, TimeSeriesPoint> dimPointMapping = new Dictionary, TimeSeriesPoint>(dc);
List totalPoints = GetTotalPointsForAnomalyTimestamp(src);
Dictionary subDim = GetSubDim(src.AnomalyDimension, dimensionInfo.DetailDims);
@@ -94,9 +96,9 @@ private Tuple> GetPoin
{
if (ContainsAll(point.Dimension, subDim))
{
- if (!dimPointMapping.ContainsKey(GetDicCode(point.Dimension)))
+ if (!dimPointMapping.ContainsKey(point.Dimension))
{
- dimPointMapping.Add(GetDicCode(point.Dimension), point);
+ dimPointMapping.Add(point.Dimension, point);
bool isValidPoint = point.IsAnomaly == true;
if (ContainsAll(point.Dimension, subDim))
{
@@ -111,7 +113,7 @@ private Tuple> GetPoin
}
}
- return new Tuple>(pointTree, anomalyTree, dimPointMapping);
+ return new Tuple, TimeSeriesPoint>>(pointTree, anomalyTree, dimPointMapping);
}
protected Dictionary GetSubDim(Dictionary dimension, List keyList)
@@ -327,7 +329,7 @@ private AnomalyDirection GetRootCauseDirection(TimeSeriesPoint rootCausePoint)
}
}
- private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol)
+ private void GetRootCauseDirectionAndScore(Dictionary, TimeSeriesPoint> dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol)
{
TimeSeriesPoint anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol);
if (dst.Items.Count > 1)
@@ -378,11 +380,11 @@ private void GetRootCauseDirectionAndScore(Dictionary d
}
}
- private TimeSeriesPoint GetPointByDimension(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol)
+ private TimeSeriesPoint GetPointByDimension(Dictionary, TimeSeriesPoint> dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol)
{
- if (dimPointMapping.ContainsKey(GetDicCode(dimension)))
+ if (dimPointMapping.ContainsKey(dimension))
{
- return dimPointMapping[GetDicCode(dimension)];
+ return dimPointMapping[dimension];
}
int count = 0;
@@ -419,11 +421,6 @@ private TimeSeriesPoint GetPointByDimension(Dictionary
}
}
- private static string GetDicCode(Dictionary dic)
- {
- return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray());
- }
-
private void BuildTree(PointTree tree, List aggDims, TimeSeriesPoint point, Object aggSymbol)
{
int aggNum = 0;
@@ -476,22 +473,23 @@ private BestDimension FindBestDimension(SortedDictionary
bool isRatioNan = Double.IsNaN(valueRatioMap[best]);
if (dimension.Key.AnomalyDis.Count > 1)
{
- if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)))
+ if (best.AnomalyDis.Count != 1 && !isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
{
- best = dimension.Key;
+ best = GetBestDimension(best, dimension, valueRatioMap);
}
}
- else
+ else if (dimension.Key.AnomalyDis.Count == 1)
{
+
if (best.AnomalyDis.Count > 1)
{
best = dimension.Key;
}
- else
+ else if (best.AnomalyDis.Count == 1)
{
if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
{
- best = dimension.Key;
+ best = GetBestDimension(best, dimension, valueRatioMap);
}
}
}
@@ -502,6 +500,22 @@ private BestDimension FindBestDimension(SortedDictionary
return best;
}
+ private BestDimension GetBestDimension(BestDimension best, KeyValuePair dimension, Dictionary valueRatioMap)
+ {
+ if (valueRatioMap[best].CompareTo(dimension.Value) == 0)
+ {
+ if (dimension.Key.AnomalyDis.Count != dimension.Key.PointDis.Count)
+ {
+ best = dimension.Key;
+ }
+ }
+ else
+ {
+ best = dimension.Key;
+ }
+ return best;
+ }
+
///
/// Calculate the surprise score according to root cause point and anomaly point
///
@@ -569,6 +583,10 @@ private double GetFinalScore(double surprise, double ep, double beta)
else
{
a = (1 - Math.Pow(2, -surprise));
+ if (Double.IsNaN(a))
+ {
+ a = 1;
+ }
b = (1 - Math.Pow(2, -ep));
}
@@ -593,7 +611,7 @@ private static Dictionary UpdateDimensionValue(Dictionary distribution, List bigDictionary, Dictio
private bool IsAggregationDimension(Object val, Object aggSymbol)
{
- return val.Equals(aggSymbol);
+ return Convert.ToString(val).Equals(aggSymbol);
}
}
@@ -748,4 +766,47 @@ public RootCauseScore(double surprise, double explanatoryScore)
ExplanatoryScore = explanatoryScore;
}
}
-}
+
+ internal class DimensionComparer : EqualityComparer>
+ {
+ public override bool Equals(Dictionary x, Dictionary y)
+ {
+ if (x == null && y == null)
+ {
+ return true;
+ }
+ if ((x == null && y != null) || (x != null && y == null))
+ {
+ return false;
+ }
+ if (x.Count != y.Count)
+ {
+ return false;
+ }
+ if (x.Keys.Except(y.Keys).Any())
+ {
+ return false;
+ }
+ if (y.Keys.Except(x.Keys).Any())
+ {
+ return false;
+ }
+ foreach (var pair in x)
+ {
+ if (!pair.Value.Equals(y[pair.Key]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public override int GetHashCode(Dictionary obj)
+ {
+ int code = 0;
+ foreach (KeyValuePair pair in obj)
+ code = code ^ pair.GetHashCode();
+ return code;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs
index a55a35060d..f3ce5ea8c1 100644
--- a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs
+++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs
@@ -11,7 +11,7 @@ namespace Microsoft.ML.TimeSeries
public sealed class RootCause
{
///
- /// A List for root cause item. Instance of the item should be
+ /// A List for root cause item. Instance of the item should be .
///
public List Items { get; set; }
public RootCause()
@@ -23,12 +23,12 @@ public RootCause()
public sealed class RootCauseLocalizationInput
{
///
- /// When the anomaly incident occurs
+ /// When the anomaly incident occurs.
///
public DateTime AnomalyTimestamp { get; set; }
///
- /// Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause
+ /// Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause.
///
public Dictionary AnomalyDimension { get; set; }
@@ -38,7 +38,7 @@ public sealed class RootCauseLocalizationInput
public List Slices { get; set; }
///
- /// The aggregated type, the type should be
+ /// The aggregated type, the type should be .
///
public AggregateType AggregateType { get; set; }
@@ -56,7 +56,7 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, string aggregateSymbol)
+ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, Object aggregateSymbol)
{
AnomalyTimestamp = anomalyTimestamp;
AnomalyDimension = anomalyDimension;
@@ -64,6 +64,8 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary
///
public List Path;
///
- /// The dimension for the detected root cause point
+ /// The dimension for the detected root cause point.
///
public Dictionary Dimension;
///
- /// The direction for the detected root cause point, should be
+ /// The direction for the detected root cause point, should be .
///
public AnomalyDirection Direction;
@@ -156,7 +158,7 @@ public bool Equals(RootCauseItem other)
public sealed class MetricSlice
{
///
- /// Timestamp for the point list
+ /// Timestamp for the point list.
///
public DateTime TimeStamp { get; set; }
///
@@ -169,20 +171,22 @@ public MetricSlice(DateTime timeStamp, List points)
TimeStamp = timeStamp;
Points = points;
}
+
+ public MetricSlice() { }
}
public sealed class TimeSeriesPoint : IEquatable
{
///
- /// Value of a time series point
+ /// Value of a time series point.
///
public double Value { get; set; }
///
- /// Forecasted value for the time series point
+ /// Forecasted value for the time series point.
///
public double ExpectedValue { get; set; }
///
- /// Whether the point is an anomaly point
+ /// Whether the point is an anomaly point.
///
public bool IsAnomaly { get; set; }
///
@@ -190,7 +194,7 @@ public sealed class TimeSeriesPoint : IEquatable
///
public Dictionary Dimension { get; set; }
///
- /// Difference between value and expected value
+ /// Difference between value and expected value.
///
public double Delta { get; set; }
@@ -198,6 +202,8 @@ public TimeSeriesPoint(Dictionary dimension)
{
Dimension = dimension;
}
+ public TimeSeriesPoint() { }
+
public TimeSeriesPoint(double value, double expectedValue, bool isAnomaly, Dictionary dimension)
{
Value = value;
diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
index d25b614d37..406d0c879f 100644
--- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
+++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
@@ -527,10 +527,10 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile)
{
var ml = new MLContext(1);
IDataView dataView;
- if(loadDataFromFile)
+ if (loadDataFromFile)
{
var dataPath = GetDataPath(Path.Combine("Timeseries", "anomaly_detection.csv"));
-
+
// Load data from file into the dataView
dataView = ml.Data.LoadFromTextFile(dataPath, new[] {
new TextLoader.Column("Value", DataKind.Single, 0),
@@ -577,9 +577,9 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile)
[Theory, CombinatorialData]
public void TestSrCnnBatchAnomalyDetector(
- [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)]SrCnnDetectMode mode,
- [CombinatorialValues(true, false)]bool loadDataFromFile,
- [CombinatorialValues(-1, 24, 26, 512)]int batchSize)
+ [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)] SrCnnDetectMode mode,
+ [CombinatorialValues(true, false)] bool loadDataFromFile,
+ [CombinatorialValues(-1, 24, 26, 512)] int batchSize)
{
var ml = new MLContext(1);
IDataView dataView;
@@ -596,7 +596,7 @@ public void TestSrCnnBatchAnomalyDetector(
var data = new List();
for (int index = 0; index < 20; index++)
{
- data.Add(new TimeSeriesDataDouble { Value = 5 } );
+ data.Add(new TimeSeriesDataDouble { Value = 5 });
}
data.Add(new TimeSeriesDataDouble { Value = 10 });
for (int index = 0; index < 5; index++)