diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs index 179dda7444..52729c6c97 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs @@ -4,6 +4,10 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.TimeSeriesProcessing; +using Microsoft.ML.Core.Data; +using Microsoft.ML.TimeSeries; +using System.IO; +using Microsoft.ML.Data; namespace Microsoft.ML.Samples.Dynamic { @@ -34,26 +38,26 @@ public static void IidChangePointDetectorTransform() var ml = new MLContext(); // Generate sample series data with a change - const int size = 16; - var data = new List(size); - for (int i = 0; i < size / 2; i++) + const int Size = 16; + var data = new List(Size); + for (int i = 0; i < Size / 2; i++) data.Add(new IidChangePointData(5)); // This is a change point - for (int i = 0; i < size / 2; i++) + for (int i = 0; i < Size / 2; i++) data.Add(new IidChangePointData(7)); // Convert data to IDataView. var dataView = ml.CreateStreamingDataView(data); // Setup IidSpikeDetector arguments - string outputColumnName = "Prediction"; - string inputColumnName = "Value"; + string outputColumnName = nameof(ChangePointPrediction.Prediction); + string inputColumnName = nameof(IidChangePointData.Value); var args = new IidChangePointDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = 95, // The confidence for spike detection in the range [0, 100] - ChangeHistoryLength = size / 4, // The length of the sliding window on p-values for computing the martingale score. + ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score. }; // The transformed data. @@ -88,5 +92,116 @@ public static void IidChangePointDetectorTransform() // 7 0 7.00 0.50 0.00 // 7 0 7.00 0.50 0.00 } + + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // IidChangePointDetector is applied then to identify points where data distribution changed using time series + // prediction engine. The engine is checkpointed and then loaded back from disk into memory and used for prediction. + public static void IidChangePointDetectorPrediction() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a change + const int Size = 16; + var data = new List(Size); + for (int i = 0; i < Size / 2; i++) + data.Add(new IidChangePointData(5)); + // This is a change point + for (int i = 0; i < Size / 2; i++) + data.Add(new IidChangePointData(7)); + + // Convert data to IDataView. + var dataView = ml.CreateStreamingDataView(data); + + // Setup IidSpikeDetector arguments + string outputColumnName = nameof(ChangePointPrediction.Prediction); + string inputColumnName = nameof(IidChangePointData.Value); + var args = new IidChangePointDetector.Arguments() + { + Source = inputColumnName, + Name = outputColumnName, + Confidence = 95, // The confidence for spike detection in the range [0, 100] + ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score. + }; + + // Time Series model. + ITransformer model = new IidChangePointEstimator(ml, args).Fit(dataView); + + // Create a time series prediction engine from the model. + var engine = model.CreateTimeSeriesPredictionFunction(ml); + for(int index = 0; index < 8; index++) + { + // Anomaly change point detection. + var prediction = engine.Predict(new IidChangePointData(5)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 5, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + } + + // Change point + var changePointPrediction = engine.Predict(new IidChangePointData(7)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, changePointPrediction.Prediction[0], + changePointPrediction.Prediction[1], changePointPrediction.Prediction[2], changePointPrediction.Prediction[3]); + + // Checkpoint the model. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + // Reference to current time series engine because in the next step "engine" will point to the + // checkpointed model being loaded from disk. + var timeseries1 = engine; + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = TransformerChain.LoadFrom(ml, file); + + // Create a time series prediction engine from the checkpointed model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + var prediction = engine.Predict(new IidChangePointData(7)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + } + + // Prediction from the original time series engine should match the prediction from + // check pointed model. + engine = timeseries1; + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + var prediction = engine.Predict(new IidChangePointData(7)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + } + + // Data Alert Score P-Value Martingale value + // 5 0 5.00 0.50 0.00 <-- Time Series 1. + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint (and model is checkpointed). + + // 7 0 7.00 0.13 33950.16 <-- Time Series 2 : Model loaded back from disk and prediction is made. + // 7 0 7.00 0.26 60866.34 + // 7 0 7.00 0.38 78362.04 + // 7 0 7.00 0.50 0.01 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + + // 7 0 7.00 0.13 33950.16 <-- Time Series 1 and prediction is made. + // 7 0 7.00 0.26 60866.34 + // 7 0 7.00 0.38 78362.04 + // 7 0 7.00 0.50 0.01 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs index 3c6ae7a9c9..5dcb7e1774 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs @@ -1,9 +1,13 @@ using System; +using System.IO; using System.Linq; using System.Collections.Generic; +using Microsoft.ML.Data; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.TimeSeriesProcessing; +using Microsoft.ML.Core.Data; +using Microsoft.ML.TimeSeries; namespace Microsoft.ML.Samples.Dynamic { @@ -34,27 +38,27 @@ public static void IidSpikeDetectorTransform() var ml = new MLContext(); // Generate sample series data with a spike - const int size = 10; - var data = new List(size); - for (int i = 0; i < size / 2; i++) + const int Size = 10; + var data = new List(Size); + for (int i = 0; i < Size / 2; i++) data.Add(new IidSpikeData(5)); // This is a spike data.Add(new IidSpikeData(10)); - for (int i = 0; i < size / 2; i++) + for (int i = 0; i < Size / 2; i++) data.Add(new IidSpikeData(5)); // Convert data to IDataView. var dataView = ml.CreateStreamingDataView(data); // Setup IidSpikeDetector arguments - string outputColumnName = "Prediction"; - string inputColumnName = "Value"; + string outputColumnName = nameof(IidSpikePrediction.Prediction); + string inputColumnName = nameof(IidSpikeData.Value); var args = new IidSpikeDetector.Arguments() { Source = inputColumnName, Name = outputColumnName, Confidence = 95, // The confidence for spike detection in the range [0, 100] - PvalueHistoryLength = size / 4 // The size of the sliding window for computing the p-value + PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. }; // The transformed data. @@ -83,5 +87,83 @@ public static void IidSpikeDetectorTransform() // 0 5.00 0.50 // 0 5.00 0.50 } + + public static void IidSpikeDetectorPrediction() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a spike + const int Size = 10; + var data = new List(Size); + for (int i = 0; i < Size / 2; i++) + data.Add(new IidSpikeData(5)); + // This is a spike + data.Add(new IidSpikeData(10)); + for (int i = 0; i < Size / 2; i++) + data.Add(new IidSpikeData(5)); + + // Convert data to IDataView. + var dataView = ml.CreateStreamingDataView(data); + + // Setup IidSpikeDetector arguments + string outputColumnName = nameof(IidSpikePrediction.Prediction); + string inputColumnName = nameof(IidSpikeData.Value); + var args = new IidSpikeDetector.Arguments() + { + Source = inputColumnName, + Name = outputColumnName, + Confidence = 95, // The confidence for spike detection in the range [0, 100] + PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. + }; + + // The transformed model. + ITransformer model = new IidSpikeEstimator(ml, args).Fit(dataView); + + // Create a time series prediction engine from the model. + var engine = model.CreateTimeSeriesPredictionFunction(ml); + for (int index = 0; index < 5; index++) + { + // Anomaly spike detection. + var prediction = engine.Predict(new IidSpikeData(5)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + } + + // Spike. + var spikePrediction = engine.Predict(new IidSpikeData(10)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 10, spikePrediction.Prediction[0], + spikePrediction.Prediction[1], spikePrediction.Prediction[2]); + + // Checkpoint the model. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = TransformerChain.LoadFrom(ml, file); + + for (int index = 0; index < 5; index++) + { + // Anomaly spike detection. + var prediction = engine.Predict(new IidSpikeData(5)); + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + } + + // Data Alert Score P-Value + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 10 1 10.00 0.00 <-- alert is on, predicted spike (check-point model) + // 5 0 5.00 0.26 <-- load model from disk. + // 5 0 5.00 0.26 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + } } } diff --git a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs index 487a01a4d4..3a899f4339 100644 --- a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs @@ -212,7 +212,8 @@ public sealed class IidChangePointEstimator : TrivialEstimator /// Host Environment. /// Name of the input column. - /// Name of the output column. + /// Name of the output column. Column is a vector of type double and size 4. + /// The vector contains Alert, Raw Score, P-Value and Martingale score as first four values. /// The confidence for change point detection in the range [0, 100]. /// The length of the sliding window on p-values for computing the martingale score. /// The martingale used for scoring. diff --git a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs index 7bc8c6b7a2..3fdf968025 100644 --- a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs @@ -191,7 +191,8 @@ public sealed class IidSpikeEstimator : TrivialEstimator /// /// Host Environment. /// Name of the input column. - /// Name of the output column. + /// Name of the output column. Column is a vector of type double and size 3. + /// The vector contains Alert, Raw Score, P-Value as first three values. /// The confidence for spike detection in the range [0, 100]. /// The size of the sliding window for computing the p-value. /// The argument that determines whether to detect positive or negative anomalies, or both. diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index 4c1d1681f6..792b06d6b3 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -53,6 +53,12 @@ public sealed class TimeSeriesPredictionFunction : PredictionEngineB private long _rowPosition; private ITransformer InputTransformer { get; set; } + /// + /// Checkpoints to disk with the updated + /// state. + /// + /// Usually . + /// Path to file on disk where the updated model needs to be saved. public void CheckPoint(IHostEnvironment env, string modelPath) { using (var file = File.Create(modelPath)) @@ -246,6 +252,26 @@ public override void Predict(TSrc example, ref TDst prediction) public static class PredictionFunctionExtensions { + /// + /// creates a prediction function/engine for a time series pipeline + /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. + /// + /// Class describing input schema to the model. + /// Class describing the output schema of the prediction. + /// The time series pipeline in the form of a . + /// Usually + /// To ignore missing columns. Default is false. + /// Input schema definition. Default is null. + /// Output schema definition. Default is null. + ///

Example code can be found by searching for TimeSeriesPredictionFunction in ML.NET.

+ /// + /// + /// + /// + /// public static TimeSeriesPredictionFunction CreateTimeSeriesPredictionFunction(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class diff --git a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs index 856ba1a0e3..9171c34662 100644 --- a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs @@ -225,7 +225,8 @@ public sealed class SsaChangePointEstimator : IEstimator /// /// Host Environment. /// Name of the input column. - /// Name of the output column. + /// Name of the output column. Column is a vector of type double and size 4. + /// The vector contains Alert, Raw Score, P-Value and Martingale score as first four values. /// The confidence for change point detection in the range [0, 100]. /// The number of points from the beginning of the sequence used for training. /// The size of the sliding window for computing the p-value. diff --git a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs index 9c3d72cd2f..f98c8bf34b 100644 --- a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs @@ -206,7 +206,8 @@ public sealed class SsaSpikeEstimator : IEstimator /// /// Host Environment. /// Name of the input column. - /// Name of the output column. + /// Name of the output column. Column is a vector of type double and size 3. + /// The vector contains Alert, Raw Score, P-Value as first three values. /// The confidence for spike detection in the range [0, 100]. /// The size of the sliding window for computing the p-value. /// The number of points from the beginning of the sequence used for training.