From 53f3dd1e4e9bc1f5a85508f9e008298cac54f533 Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Fri, 4 Jan 2019 13:47:22 -0800 Subject: [PATCH 1/4] Remove Legacy dependency from Microsoft.ML.Benchmarks --- .../LegacyPredictionEngineBench.cs | 109 ------------------ ...sticDualCoordinateAscentClassifierBench.cs | 84 +++++++------- 2 files changed, 44 insertions(+), 149 deletions(-) delete mode 100644 test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs diff --git a/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs deleted file mode 100644 index 24ee741b72..0000000000 --- a/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using BenchmarkDotNet.Attributes; -using Microsoft.ML.Legacy; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Benchmarks -{ -#pragma warning disable 612, 618 - public class LegacyPredictionEngineBench - { - private IrisData _irisExample; - private PredictionModel _irisModel; - - private SentimentData _sentimentExample; - private PredictionModel _sentimentModel; - - private BreastCancerData _breastCancerExample; - private PredictionModel _breastCancerModel; - - [GlobalSetup(Target = nameof(MakeIrisPredictions))] - public void SetupIrisPipeline() - { - _irisExample = new IrisData() - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }; - - string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_irisDataPath).CreateFrom(useHeader: true, separator: '\t')); - pipeline.Add(new ColumnConcatenator("Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })); - pipeline.Add(new StochasticDualCoordinateAscentClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _irisModel = pipeline.Train(); - } - - [GlobalSetup(Target = nameof(MakeSentimentPredictions))] - public void SetupSentimentPipeline() - { - _sentimentExample = new SentimentData() - { - SentimentText = "Not a big fan of this." - }; - - string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_sentimentDataPath).CreateFrom(useHeader: true, separator: '\t')); - pipeline.Add(new TextFeaturizer("Features", "SentimentText")); - pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _sentimentModel = pipeline.Train(); - } - - [GlobalSetup(Target = nameof(MakeBreastCancerPredictions))] - public void SetupBreastCancerPipeline() - { - _breastCancerExample = new BreastCancerData() - { - Features = new[] { 5f, 1f, 1f, 1f, 2f, 1f, 3f, 1f, 1f } - }; - - string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_breastCancerDataPath).CreateFrom(useHeader: false, separator: '\t')); - pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _breastCancerModel = pipeline.Train(); - } - - [Benchmark] - public void MakeIrisPredictions() - { - for (int i = 0; i < 10000; i++) - { - _irisModel.Predict(_irisExample); - } - } - - [Benchmark] - public void MakeSentimentPredictions() - { - for (int i = 0; i < 10000; i++) - { - _sentimentModel.Predict(_sentimentExample); - } - } - - [Benchmark] - public void MakeBreastCancerPredictions() - { - for (int i = 0; i < 10000; i++) - { - _breastCancerModel.Predict(_breastCancerExample); - } - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index fe16701fe7..5ccb7a48c4 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -7,10 +7,9 @@ using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Engines; using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; +using Microsoft.ML.Learners; using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; namespace Microsoft.ML.Benchmarks @@ -22,7 +21,6 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination - private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; private readonly IrisData _example = new IrisData() { SepalLength = 3.3f, @@ -31,31 +29,42 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics PetalWidth = 5.1f, }; - private Legacy.PredictionModel _trainedModel; - private IrisData[][] _batches; - private ClassificationMetrics _metrics; + private TransformerChain> _trainedModel; + private PredictionEngine _predictionEngine; + private MultiClassClassifierMetrics _metrics; protected override IEnumerable GetMetrics() { if (_metrics != null) yield return new Metric( - nameof(ClassificationMetrics.AccuracyMacro), + nameof(MultiClassClassifierMetrics.AccuracyMacro), _metrics.AccuracyMacro.ToString("0.##", CultureInfo.InvariantCulture)); } [Benchmark] - public Legacy.PredictionModel TrainIris() => Train(_dataPath); + public TransformerChain> TrainIris() => Train(_dataPath); - private Legacy.PredictionModel Train(string dataPath) + private TransformerChain> Train(string dataPath) { - var pipeline = new Legacy.LearningPipeline(); + var env = new MLContext(seed: 1, conc: 1); + var reader = new TextLoader(env, + columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4), + }, + hasHeader: true + ); - pipeline.Add(new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: true)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + IDataView data = reader.Read(dataPath); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); + var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) + .Append(new SdcaMultiClassTrainer(env, "Label", "Features")); - return pipeline.Train(); + return pipeline.Fit(data); } [Benchmark] @@ -123,39 +132,34 @@ public void TrainSentiment() _consumer.Consume(predicted); } - [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })] + [GlobalSetup(Target = nameof(PredictIris))] public void SetupPredictBenchmarks() { + var env = new MLContext(seed: 1, conc: 1); _trainedModel = Train(_dataPath); - _consumer.Consume(_trainedModel.Predict(_example)); + _predictionEngine = _trainedModel.CreatePredictionEngine(env); + _consumer.Consume(_predictionEngine.Predict(_example)); - var testData = new Legacy.Data.TextLoader(_dataPath).CreateFrom(useHeader: true); - var evaluator = new ClassificationEvaluator(); - _metrics = evaluator.Evaluate(_trainedModel, testData); + var reader = new TextLoader(env, + columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4), + }, + hasHeader: true + ); - _batches = new IrisData[_batchSizes.Length][]; - for (int i = 0; i < _batches.Length; i++) - { - var batch = new IrisData[_batchSizes[i]]; - _batches[i] = batch; - for (int bi = 0; bi < batch.Length; bi++) - { - batch[bi] = _example; - } - } + IDataView testData = reader.Read(_dataPath); + IDataView scoredTestData = _trainedModel.Transform(testData); + var evaluator = new MultiClassClassifierEvaluator(env, new MultiClassClassifierEvaluator.Arguments()); + _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); } [Benchmark] - public float[] PredictIris() => _trainedModel.Predict(_example).PredictedLabels; - - [Benchmark] - public void PredictIrisBatchOf1() => Consume(_trainedModel.Predict(_batches[0])); - - [Benchmark] - public void PredictIrisBatchOf2() => Consume(_trainedModel.Predict(_batches[1])); - - [Benchmark] - public void PredictIrisBatchOf5() => Consume(_trainedModel.Predict(_batches[2])); + public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels; private void Consume(IEnumerable predictions) { From 1fb6c36d5bbcb0a977f068162d2a1d023ededaec Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Fri, 4 Jan 2019 15:20:22 -0800 Subject: [PATCH 2/4] Use model.Transform for batch prediction benchmarks --- ...sticDualCoordinateAscentClassifierBench.cs | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 5ccb7a48c4..38cca2fbad 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -21,6 +21,8 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination + private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; + private readonly IrisData _example = new IrisData() { SepalLength = 3.3f, @@ -31,6 +33,7 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics private TransformerChain> _trainedModel; private PredictionEngine _predictionEngine; + private IrisData[][] _batches; private MultiClassClassifierMetrics _metrics; protected override IEnumerable GetMetrics() @@ -132,7 +135,7 @@ public void TrainSentiment() _consumer.Consume(predicted); } - [GlobalSetup(Target = nameof(PredictIris))] + [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })] public void SetupPredictBenchmarks() { var env = new MLContext(seed: 1, conc: 1); @@ -156,15 +159,41 @@ public void SetupPredictBenchmarks() IDataView scoredTestData = _trainedModel.Transform(testData); var evaluator = new MultiClassClassifierEvaluator(env, new MultiClassClassifierEvaluator.Arguments()); _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); + + _batches = new IrisData[_batchSizes.Length][]; + for (int i = 0; i < _batches.Length; i++) + { + var batch = new IrisData[_batchSizes[i]]; + for (int bi = 0; bi < batch.Length; bi++) + { + batch[bi] = _example; + } + _batches[i] = batch; + } } [Benchmark] public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels; - private void Consume(IEnumerable predictions) + [Benchmark] + public void PredictIrisBatchOf1() + { + var env = new MLContext(seed: 1, conc: 1); + _trainedModel.Transform(env.CreateStreamingDataView(_batches[0])); + } + + [Benchmark] + public void PredictIrisBatchOf2() + { + var env = new MLContext(seed: 1, conc: 1); + _trainedModel.Transform(env.CreateStreamingDataView(_batches[1])); + } + + [Benchmark] + public void PredictIrisBatchOf5() { - foreach (var prediction in predictions) - _consumer.Consume(prediction); + var env = new MLContext(seed: 1, conc: 1); + _trainedModel.Transform(env.CreateStreamingDataView(_batches[2])); } } From b96e6883f8f0c0599d00afd37d0bcf5d4c718b30 Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Fri, 4 Jan 2019 16:20:47 -0800 Subject: [PATCH 3/4] Removing #pragma warning disable --- .../StochasticDualCoordinateAscentClassifierBench.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 38cca2fbad..635296f235 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -14,7 +14,6 @@ namespace Microsoft.ML.Benchmarks { -#pragma warning disable 612, 618 public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics { private readonly string _dataPath = Program.GetInvariantCultureDataPath("iris.txt"); @@ -220,5 +219,4 @@ public class IrisPrediction [ColumnName("Score")] public float[] PredictedLabels; } -#pragma warning restore 612, 618 } From b1dc988267803b5b01980732fe9778b8efd0d0f3 Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Fri, 4 Jan 2019 17:09:40 -0800 Subject: [PATCH 4/4] Using one mlContext for both training and prediction --- ...sticDualCoordinateAscentClassifierBench.cs | 43 +++++++------------ 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 635296f235..33939af0a8 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -20,6 +20,8 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination + private readonly MLContext _env = new MLContext(seed: 1); + private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; private readonly IrisData _example = new IrisData() @@ -48,8 +50,7 @@ protected override IEnumerable GetMetrics() private TransformerChain> Train(string dataPath) { - var env = new MLContext(seed: 1, conc: 1); - var reader = new TextLoader(env, + var reader = new TextLoader(_env, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), @@ -63,8 +64,8 @@ private TransformerChain(env); + _predictionEngine = _trainedModel.CreatePredictionEngine(_env); _consumer.Consume(_predictionEngine.Predict(_example)); - var reader = new TextLoader(env, + var reader = new TextLoader(_env, columns: new[] { new TextLoader.Column("Label", DataKind.R4, 0), @@ -156,7 +155,7 @@ public void SetupPredictBenchmarks() IDataView testData = reader.Read(_dataPath); IDataView scoredTestData = _trainedModel.Transform(testData); - var evaluator = new MultiClassClassifierEvaluator(env, new MultiClassClassifierEvaluator.Arguments()); + var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments()); _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; @@ -175,25 +174,13 @@ public void SetupPredictBenchmarks() public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels; [Benchmark] - public void PredictIrisBatchOf1() - { - var env = new MLContext(seed: 1, conc: 1); - _trainedModel.Transform(env.CreateStreamingDataView(_batches[0])); - } + public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[0])); [Benchmark] - public void PredictIrisBatchOf2() - { - var env = new MLContext(seed: 1, conc: 1); - _trainedModel.Transform(env.CreateStreamingDataView(_batches[1])); - } + public void PredictIrisBatchOf2() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[1])); [Benchmark] - public void PredictIrisBatchOf5() - { - var env = new MLContext(seed: 1, conc: 1); - _trainedModel.Transform(env.CreateStreamingDataView(_batches[2])); - } + public void PredictIrisBatchOf5() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[2])); } public class IrisData