diff --git a/build.proj b/build.proj index de8c507de7..15fea4e309 100644 --- a/build.proj +++ b/build.proj @@ -78,7 +78,7 @@ - https://aka.ms/tlc-resources/benchmarks/%(Identity) + https://aka.ms/mlnet-resources/benchmarks/%(Identity) $(MSBuildThisFileDirectory)/test/data/external/%(Identity) diff --git a/build/ExternalBenchmarkDataFiles.props b/build/ExternalBenchmarkDataFiles.props index ad3d350d60..42df4ccd96 100644 --- a/build/ExternalBenchmarkDataFiles.props +++ b/build/ExternalBenchmarkDataFiles.props @@ -1,5 +1,6 @@ + diff --git a/test/Microsoft.ML.Benchmarks/RffTransform.cs b/test/Microsoft.ML.Benchmarks/RffTransform.cs new file mode 100644 index 0000000000..aee30ddea8 --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/RffTransform.cs @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Attributes; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.RunTests; +using Microsoft.ML.Transforms.Conversions; +using System.IO; + +namespace Microsoft.ML.Benchmarks +{ + public class RffTransformTrain + { + private string _dataPath_Digits; + + [GlobalSetup] + public void SetupTrainingSpeedTests() + { + _dataPath_Digits = Path.GetFullPath(TestDatasets.Digits.trainFilename); + + if (!File.Exists(_dataPath_Digits)) + throw new FileNotFoundException(string.Format(Errors.DatasetNotFound, _dataPath_Digits)); + } + + [Benchmark] + public void CV_Multiclass_Digits_RffTransform_OVAAveragedPerceptron() + { + var mlContext = new MLContext(); + var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments + { + Column = new[] + { + new TextLoader.Column("Label", DataKind.R4, 64), + new TextLoader.Column("Features", DataKind.R4, new [] { new TextLoader.Range() { Min = 0, Max = 63 }}) + }, + HasHeader = false, + Separator = "," + }); + + var data = reader.Read(_dataPath_Digits); + + var pipeline = mlContext.Transforms.Projection.CreateRandomFourierFeatures("Features", "FeaturesRFF") + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Concatenate("Features", "FeaturesRFF")) + .Append(new ValueToKeyMappingEstimator(mlContext, "Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10))); + + var cvResults = mlContext.MulticlassClassification.CrossValidate(data, pipeline, numFolds: 5); + } + } +} diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs index 5760df712d..3a1f5df3be 100644 --- a/test/Microsoft.ML.TestFramework/Datasets.cs +++ b/test/Microsoft.ML.TestFramework/Datasets.cs @@ -137,6 +137,15 @@ public static class TestDatasets loaderSettings = "xf=expr{col=Features expr=x:float(x>4?1:0)}" }; + // The data set contains images of hand-written digits. + // The input is given in the form of matrix id 8x8 where + // each element is an integer in the range 0..16 + public static TestDataset Digits = new TestDataset + { + name = "Digits", + trainFilename = @"external/digits.csv", + }; + public static TestDataset vw = new TestDataset { name = "vw", diff --git a/test/data/README.md b/test/data/README.md index 6a21ece35f..a5e2870da4 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -16,6 +16,12 @@ The datasets are provided under the original terms that Microsoft received such > >Original readme: https://meta.wikimedia.org/wiki/Research:Detox +### Digits +> This dataset is provided under http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits. +> +> References: C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their Applications to Handwritten Digit Recognition, MSc Thesis, Institute of Graduate Studies in Science and Engineering, Bogazici University. +> E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika. + ### UCI Adult Dataset >Dua, D. and Karra Taniskidou, E. (2017). UCI Machine Learning Repository [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.