diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 161f8bc27b..9c302668b9 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -187,6 +187,8 @@ public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog /// It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. /// For a larger beta, root cause items which have a large difference between value and expected value will get a high score. /// For a small beta, root cause items which have a high relative change will get a low score. + /// A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1]. + /// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments. /// /// /// /// /// - public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5) + public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.3, double rootCauseThreshold = 0.95) { IHostEnvironment host = CatalogUtils.GetEnvironment(catalog); //check the root cause input CheckRootCauseInput(host, src); - //check beta + //check parameters host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]"); + host.CheckUserArg(rootCauseThreshold >= 0 && rootCauseThreshold <= 1, nameof(beta), "Must be in [0,1]"); //find out the root cause - RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta); + RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, rootCauseThreshold); RootCause dst = analyzer.Analyze(); return dst; } diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 4a7cdaa086..bc18c0b248 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -13,16 +13,17 @@ namespace Microsoft.ML.TimeSeries public class RootCauseAnalyzer { private static double _anomalyRatioThreshold = 0.5; - private static double _anomalyDeltaThreshold = 0.95; private static double _anomalyPreDeltaThreshold = 2; private RootCauseLocalizationInput _src; private double _beta; + private double _rootCauseThreshold; - public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta) + public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta, double rootCauseThreshold) { _src = src; _beta = beta; + _rootCauseThreshold = rootCauseThreshold; } public RootCause Analyze() @@ -39,10 +40,10 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) dst.Items = new List(); DimensionInfo dimensionInfo = SeparateDimension(src.AnomalyDimension, src.AggregateSymbol); - Tuple> pointInfo = GetPointsInfo(src, dimensionInfo); + Tuple, TimeSeriesPoint>> pointInfo = GetPointsInfo(src, dimensionInfo); PointTree pointTree = pointInfo.Item1; PointTree anomalyTree = pointInfo.Item2; - Dictionary dimPointMapping = pointInfo.Item3; + Dictionary, TimeSeriesPoint> dimPointMapping = pointInfo.Item3; //which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0) @@ -81,11 +82,12 @@ private DimensionInfo SeparateDimension(Dictionary dimensions, O return info; } - private Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) + private Tuple, TimeSeriesPoint>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) { PointTree pointTree = new PointTree(); PointTree anomalyTree = new PointTree(); - Dictionary dimPointMapping = new Dictionary(); + DimensionComparer dc = new DimensionComparer(); + Dictionary, TimeSeriesPoint> dimPointMapping = new Dictionary, TimeSeriesPoint>(dc); List totalPoints = GetTotalPointsForAnomalyTimestamp(src); Dictionary subDim = GetSubDim(src.AnomalyDimension, dimensionInfo.DetailDims); @@ -94,9 +96,9 @@ private Tuple> GetPoin { if (ContainsAll(point.Dimension, subDim)) { - if (!dimPointMapping.ContainsKey(GetDicCode(point.Dimension))) + if (!dimPointMapping.ContainsKey(point.Dimension)) { - dimPointMapping.Add(GetDicCode(point.Dimension), point); + dimPointMapping.Add(point.Dimension, point); bool isValidPoint = point.IsAnomaly == true; if (ContainsAll(point.Dimension, subDim)) { @@ -111,7 +113,7 @@ private Tuple> GetPoin } } - return new Tuple>(pointTree, anomalyTree, dimPointMapping); + return new Tuple, TimeSeriesPoint>>(pointTree, anomalyTree, dimPointMapping); } protected Dictionary GetSubDim(Dictionary dimension, List keyList) @@ -327,7 +329,7 @@ private AnomalyDirection GetRootCauseDirection(TimeSeriesPoint rootCausePoint) } } - private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol) + private void GetRootCauseDirectionAndScore(Dictionary, TimeSeriesPoint> dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol) { TimeSeriesPoint anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); if (dst.Items.Count > 1) @@ -378,11 +380,11 @@ private void GetRootCauseDirectionAndScore(Dictionary d } } - private TimeSeriesPoint GetPointByDimension(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol) + private TimeSeriesPoint GetPointByDimension(Dictionary, TimeSeriesPoint> dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol) { - if (dimPointMapping.ContainsKey(GetDicCode(dimension))) + if (dimPointMapping.ContainsKey(dimension)) { - return dimPointMapping[GetDicCode(dimension)]; + return dimPointMapping[dimension]; } int count = 0; @@ -419,11 +421,6 @@ private TimeSeriesPoint GetPointByDimension(Dictionary } } - private static string GetDicCode(Dictionary dic) - { - return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray()); - } - private void BuildTree(PointTree tree, List aggDims, TimeSeriesPoint point, Object aggSymbol) { int aggNum = 0; @@ -476,22 +473,23 @@ private BestDimension FindBestDimension(SortedDictionary bool isRatioNan = Double.IsNaN(valueRatioMap[best]); if (dimension.Key.AnomalyDis.Count > 1) { - if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))) + if (best.AnomalyDis.Count != 1 && !isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) { - best = dimension.Key; + best = GetBestDimension(best, dimension, valueRatioMap); } } - else + else if (dimension.Key.AnomalyDis.Count == 1) { + if (best.AnomalyDis.Count > 1) { best = dimension.Key; } - else + else if (best.AnomalyDis.Count == 1) { if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) { - best = dimension.Key; + best = GetBestDimension(best, dimension, valueRatioMap); } } } @@ -502,6 +500,22 @@ private BestDimension FindBestDimension(SortedDictionary return best; } + private BestDimension GetBestDimension(BestDimension best, KeyValuePair dimension, Dictionary valueRatioMap) + { + if (valueRatioMap[best].CompareTo(dimension.Value) == 0) + { + if (dimension.Key.AnomalyDis.Count != dimension.Key.PointDis.Count) + { + best = dimension.Key; + } + } + else + { + best = dimension.Key; + } + return best; + } + /// /// Calculate the surprise score according to root cause point and anomaly point /// @@ -569,6 +583,10 @@ private double GetFinalScore(double surprise, double ep, double beta) else { a = (1 - Math.Pow(2, -surprise)); + if (Double.IsNaN(a)) + { + a = 1; + } b = (1 - Math.Pow(2, -ep)); } @@ -593,7 +611,7 @@ private static Dictionary UpdateDimensionValue(Dictionary distribution, List bigDictionary, Dictio private bool IsAggregationDimension(Object val, Object aggSymbol) { - return val.Equals(aggSymbol); + return Convert.ToString(val).Equals(aggSymbol); } } @@ -748,4 +766,47 @@ public RootCauseScore(double surprise, double explanatoryScore) ExplanatoryScore = explanatoryScore; } } -} + + internal class DimensionComparer : EqualityComparer> + { + public override bool Equals(Dictionary x, Dictionary y) + { + if (x == null && y == null) + { + return true; + } + if ((x == null && y != null) || (x != null && y == null)) + { + return false; + } + if (x.Count != y.Count) + { + return false; + } + if (x.Keys.Except(y.Keys).Any()) + { + return false; + } + if (y.Keys.Except(x.Keys).Any()) + { + return false; + } + foreach (var pair in x) + { + if (!pair.Value.Equals(y[pair.Key])) + { + return false; + } + } + return true; + } + + public override int GetHashCode(Dictionary obj) + { + int code = 0; + foreach (KeyValuePair pair in obj) + code = code ^ pair.GetHashCode(); + return code; + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs index a55a35060d..f3ce5ea8c1 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.TimeSeries public sealed class RootCause { /// - /// A List for root cause item. Instance of the item should be + /// A List for root cause item. Instance of the item should be . /// public List Items { get; set; } public RootCause() @@ -23,12 +23,12 @@ public RootCause() public sealed class RootCauseLocalizationInput { /// - /// When the anomaly incident occurs + /// When the anomaly incident occurs. /// public DateTime AnomalyTimestamp { get; set; } /// - /// Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause + /// Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause. /// public Dictionary AnomalyDimension { get; set; } @@ -38,7 +38,7 @@ public sealed class RootCauseLocalizationInput public List Slices { get; set; } /// - /// The aggregated type, the type should be + /// The aggregated type, the type should be . /// public AggregateType AggregateType { get; set; } @@ -56,7 +56,7 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, string aggregateSymbol) + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, Object aggregateSymbol) { AnomalyTimestamp = anomalyTimestamp; AnomalyDimension = anomalyDimension; @@ -64,6 +64,8 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary /// public List Path; /// - /// The dimension for the detected root cause point + /// The dimension for the detected root cause point. /// public Dictionary Dimension; /// - /// The direction for the detected root cause point, should be + /// The direction for the detected root cause point, should be . /// public AnomalyDirection Direction; @@ -156,7 +158,7 @@ public bool Equals(RootCauseItem other) public sealed class MetricSlice { /// - /// Timestamp for the point list + /// Timestamp for the point list. /// public DateTime TimeStamp { get; set; } /// @@ -169,20 +171,22 @@ public MetricSlice(DateTime timeStamp, List points) TimeStamp = timeStamp; Points = points; } + + public MetricSlice() { } } public sealed class TimeSeriesPoint : IEquatable { /// - /// Value of a time series point + /// Value of a time series point. /// public double Value { get; set; } /// - /// Forecasted value for the time series point + /// Forecasted value for the time series point. /// public double ExpectedValue { get; set; } /// - /// Whether the point is an anomaly point + /// Whether the point is an anomaly point. /// public bool IsAnomaly { get; set; } /// @@ -190,7 +194,7 @@ public sealed class TimeSeriesPoint : IEquatable /// public Dictionary Dimension { get; set; } /// - /// Difference between value and expected value + /// Difference between value and expected value. /// public double Delta { get; set; } @@ -198,6 +202,8 @@ public TimeSeriesPoint(Dictionary dimension) { Dimension = dimension; } + public TimeSeriesPoint() { } + public TimeSeriesPoint(double value, double expectedValue, bool isAnomaly, Dictionary dimension) { Value = value; diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index d25b614d37..406d0c879f 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -527,10 +527,10 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile) { var ml = new MLContext(1); IDataView dataView; - if(loadDataFromFile) + if (loadDataFromFile) { var dataPath = GetDataPath(Path.Combine("Timeseries", "anomaly_detection.csv")); - + // Load data from file into the dataView dataView = ml.Data.LoadFromTextFile(dataPath, new[] { new TextLoader.Column("Value", DataKind.Single, 0), @@ -577,9 +577,9 @@ public void AnomalyDetectionWithSrCnn(bool loadDataFromFile) [Theory, CombinatorialData] public void TestSrCnnBatchAnomalyDetector( - [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)]SrCnnDetectMode mode, - [CombinatorialValues(true, false)]bool loadDataFromFile, - [CombinatorialValues(-1, 24, 26, 512)]int batchSize) + [CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyAndMargin)] SrCnnDetectMode mode, + [CombinatorialValues(true, false)] bool loadDataFromFile, + [CombinatorialValues(-1, 24, 26, 512)] int batchSize) { var ml = new MLContext(1); IDataView dataView; @@ -596,7 +596,7 @@ public void TestSrCnnBatchAnomalyDetector( var data = new List(); for (int index = 0; index < 20; index++) { - data.Add(new TimeSeriesDataDouble { Value = 5 } ); + data.Add(new TimeSeriesDataDouble { Value = 5 }); } data.Add(new TimeSeriesDataDouble { Value = 10 }); for (int index = 0; index < 5; index++)