diff --git a/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs deleted file mode 100644 index 24ee741b72..0000000000 --- a/test/Microsoft.ML.Benchmarks/LegacyPredictionEngineBench.cs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using BenchmarkDotNet.Attributes; -using Microsoft.ML.Legacy; -using Microsoft.ML.Legacy.Data; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; - -namespace Microsoft.ML.Benchmarks -{ -#pragma warning disable 612, 618 - public class LegacyPredictionEngineBench - { - private IrisData _irisExample; - private PredictionModel _irisModel; - - private SentimentData _sentimentExample; - private PredictionModel _sentimentModel; - - private BreastCancerData _breastCancerExample; - private PredictionModel _breastCancerModel; - - [GlobalSetup(Target = nameof(MakeIrisPredictions))] - public void SetupIrisPipeline() - { - _irisExample = new IrisData() - { - SepalLength = 3.3f, - SepalWidth = 1.6f, - PetalLength = 0.2f, - PetalWidth = 5.1f, - }; - - string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_irisDataPath).CreateFrom(useHeader: true, separator: '\t')); - pipeline.Add(new ColumnConcatenator("Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })); - pipeline.Add(new StochasticDualCoordinateAscentClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _irisModel = pipeline.Train(); - } - - [GlobalSetup(Target = nameof(MakeSentimentPredictions))] - public void SetupSentimentPipeline() - { - _sentimentExample = new SentimentData() - { - SentimentText = "Not a big fan of this." - }; - - string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_sentimentDataPath).CreateFrom(useHeader: true, separator: '\t')); - pipeline.Add(new TextFeaturizer("Features", "SentimentText")); - pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _sentimentModel = pipeline.Train(); - } - - [GlobalSetup(Target = nameof(MakeBreastCancerPredictions))] - public void SetupBreastCancerPipeline() - { - _breastCancerExample = new BreastCancerData() - { - Features = new[] { 5f, 1f, 1f, 1f, 2f, 1f, 3f, 1f, 1f } - }; - - string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt"); - - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_breastCancerDataPath).CreateFrom(useHeader: false, separator: '\t')); - pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier() { NumThreads = 1, ConvergenceTolerance = 1e-2f }); - - _breastCancerModel = pipeline.Train(); - } - - [Benchmark] - public void MakeIrisPredictions() - { - for (int i = 0; i < 10000; i++) - { - _irisModel.Predict(_irisExample); - } - } - - [Benchmark] - public void MakeSentimentPredictions() - { - for (int i = 0; i < 10000; i++) - { - _sentimentModel.Predict(_sentimentExample); - } - } - - [Benchmark] - public void MakeBreastCancerPredictions() - { - for (int i = 0; i < 10000; i++) - { - _breastCancerModel.Predict(_breastCancerExample); - } - } - } -#pragma warning restore 612, 618 -} diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index fe16701fe7..33939af0a8 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -7,22 +7,23 @@ using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Engines; using Microsoft.ML.Data; -using Microsoft.ML.Legacy.Models; -using Microsoft.ML.Legacy.Trainers; -using Microsoft.ML.Legacy.Transforms; +using Microsoft.ML.Learners; using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; namespace Microsoft.ML.Benchmarks { -#pragma warning disable 612, 618 public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics { private readonly string _dataPath = Program.GetInvariantCultureDataPath("iris.txt"); private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination + private readonly MLContext _env = new MLContext(seed: 1); + private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; + private readonly IrisData _example = new IrisData() { SepalLength = 3.3f, @@ -31,37 +32,47 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics PetalWidth = 5.1f, }; - private Legacy.PredictionModel _trainedModel; + private TransformerChain> _trainedModel; + private PredictionEngine _predictionEngine; private IrisData[][] _batches; - private ClassificationMetrics _metrics; + private MultiClassClassifierMetrics _metrics; protected override IEnumerable GetMetrics() { if (_metrics != null) yield return new Metric( - nameof(ClassificationMetrics.AccuracyMacro), + nameof(MultiClassClassifierMetrics.AccuracyMacro), _metrics.AccuracyMacro.ToString("0.##", CultureInfo.InvariantCulture)); } [Benchmark] - public Legacy.PredictionModel TrainIris() => Train(_dataPath); + public TransformerChain> TrainIris() => Train(_dataPath); - private Legacy.PredictionModel Train(string dataPath) + private TransformerChain> Train(string dataPath) { - var pipeline = new Legacy.LearningPipeline(); + var reader = new TextLoader(_env, + columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4), + }, + hasHeader: true + ); - pipeline.Add(new Legacy.Data.TextLoader(dataPath).CreateFrom(useHeader: true)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + IDataView data = reader.Read(dataPath); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); + var pipeline = new ColumnConcatenatingEstimator(_env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) + .Append(new SdcaMultiClassTrainer(_env, "Label", "Features")); - return pipeline.Train(); + return pipeline.Fit(data); } [Benchmark] public void TrainSentiment() { - var env = new MLContext(seed: 1); // Pipeline var arguments = new TextLoader.Arguments() { @@ -85,9 +96,9 @@ public void TrainSentiment() AllowQuoting = false, AllowSparse = false }; - var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments); + var loader = _env.Data.ReadFromTextFile(_sentimentDataPath, arguments); - var text = TextFeaturizingEstimator.Create(env, + var text = TextFeaturizingEstimator.Create(_env, new TextFeaturizingEstimator.Arguments() { Column = new TextFeaturizingEstimator.Column @@ -103,7 +114,7 @@ public void TrainSentiment() WordFeatureExtractor = null, }, loader); - var trans = WordEmbeddingsExtractingTransformer.Create(env, + var trans = WordEmbeddingsExtractingTransformer.Create(_env, new WordEmbeddingsExtractingTransformer.Arguments() { Column = new WordEmbeddingsExtractingTransformer.Column[1] @@ -118,7 +129,7 @@ public void TrainSentiment() }, text); // Train - var trainer = new SdcaMultiClassTrainer(env, "Label", "Features", maxIterations: 20); + var trainer = new SdcaMultiClassTrainer(_env, "Label", "Features", maxIterations: 20); var predicted = trainer.Fit(trans); _consumer.Consume(predicted); } @@ -127,41 +138,49 @@ public void TrainSentiment() public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); - _consumer.Consume(_trainedModel.Predict(_example)); + _predictionEngine = _trainedModel.CreatePredictionEngine(_env); + _consumer.Consume(_predictionEngine.Predict(_example)); + + var reader = new TextLoader(_env, + columns: new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("SepalLength", DataKind.R4, 1), + new TextLoader.Column("SepalWidth", DataKind.R4, 2), + new TextLoader.Column("PetalLength", DataKind.R4, 3), + new TextLoader.Column("PetalWidth", DataKind.R4, 4), + }, + hasHeader: true + ); - var testData = new Legacy.Data.TextLoader(_dataPath).CreateFrom(useHeader: true); - var evaluator = new ClassificationEvaluator(); - _metrics = evaluator.Evaluate(_trainedModel, testData); + IDataView testData = reader.Read(_dataPath); + IDataView scoredTestData = _trainedModel.Transform(testData); + var evaluator = new MultiClassClassifierEvaluator(_env, new MultiClassClassifierEvaluator.Arguments()); + _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel); _batches = new IrisData[_batchSizes.Length][]; for (int i = 0; i < _batches.Length; i++) { var batch = new IrisData[_batchSizes[i]]; - _batches[i] = batch; for (int bi = 0; bi < batch.Length; bi++) { batch[bi] = _example; } + _batches[i] = batch; } } [Benchmark] - public float[] PredictIris() => _trainedModel.Predict(_example).PredictedLabels; + public float[] PredictIris() => _predictionEngine.Predict(_example).PredictedLabels; [Benchmark] - public void PredictIrisBatchOf1() => Consume(_trainedModel.Predict(_batches[0])); + public void PredictIrisBatchOf1() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[0])); [Benchmark] - public void PredictIrisBatchOf2() => Consume(_trainedModel.Predict(_batches[1])); + public void PredictIrisBatchOf2() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[1])); [Benchmark] - public void PredictIrisBatchOf5() => Consume(_trainedModel.Predict(_batches[2])); - - private void Consume(IEnumerable predictions) - { - foreach (var prediction in predictions) - _consumer.Consume(prediction); - } + public void PredictIrisBatchOf5() => _trainedModel.Transform(_env.CreateStreamingDataView(_batches[2])); } public class IrisData @@ -187,5 +206,4 @@ public class IrisPrediction [ColumnName("Score")] public float[] PredictedLabels; } -#pragma warning restore 612, 618 }