dotnet · Anipik · Dec 21, 2018 · Dec 10, 2018 · Dec 10, 2018 · Dec 14, 2018
diff --git a/build.proj b/build.proj
@@ -78,7 +78,7 @@
 
   <ItemGroup Condition="'$(IncludeBenchmarkData)' == 'true'">
     <BenchmarkFile Update="@(BenchmarkFile)">
-      <Url>https://aka.ms/tlc-resources/benchmarks/%(Identity)</Url>
+      <Url>https://aka.ms/mlnet-resources/benchmarks/%(Identity)</Url>
       <DestinationFile>$(MSBuildThisFileDirectory)/test/data/external/%(Identity)</DestinationFile>
     </BenchmarkFile>
 

diff --git a/build/ExternalBenchmarkDataFiles.props b/build/ExternalBenchmarkDataFiles.props
@@ -1,5 +1,6 @@
 <Project>
   <ItemGroup>
+    <BenchmarkFile Include="digits.csv" />
     <BenchmarkFile Include="MSLRWeb10KTest240kRows.tsv" />
     <BenchmarkFile Include="MSLRWeb10KTrain720kRows.tsv" />
     <BenchmarkFile Include="MSLRWeb10KValidate240kRows.tsv" />

diff --git a/test/Microsoft.ML.Benchmarks/RffTransform.cs b/test/Microsoft.ML.Benchmarks/RffTransform.cs
@@ -0,0 +1,52 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using BenchmarkDotNet.Attributes;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.RunTests;
+using Microsoft.ML.Transforms.Conversions;
+using System.IO;
+
+namespace Microsoft.ML.Benchmarks
+{
+    public class RffTransformTrain
+    {
+        private string _dataPath_Digits;
+
+        [GlobalSetup]
+        public void SetupTrainingSpeedTests()
+        {
+            _dataPath_Digits = Path.GetFullPath(TestDatasets.Digits.trainFilename);
+
+            if (!File.Exists(_dataPath_Digits))
+                throw new FileNotFoundException(string.Format(Errors.DatasetNotFound, _dataPath_Digits));
+        }
+
+        [Benchmark]
+        public void CV_Multiclass_Digits_RffTransform_OVAAveragedPerceptron()
+        {
+            var mlContext = new MLContext();
+            var reader = mlContext.Data.CreateTextReader(new TextLoader.Arguments
+            {
+                Column = new[]
+                {
+                    new TextLoader.Column("Label", DataKind.R4, 64),
+                    new TextLoader.Column("Features", DataKind.R4, new [] { new TextLoader.Range() { Min = 0, Max = 63 }})
+                },
+                HasHeader = false,
+                Separator = ","
+            });
+
+            var data = reader.Read(_dataPath_Digits);
+
+            var pipeline = mlContext.Transforms.Projection.CreateRandomFourierFeatures("Features", "FeaturesRFF")
+            .AppendCacheCheckpoint(mlContext)
+            .Append(mlContext.Transforms.Concatenate("Features", "FeaturesRFF"))
+            .Append(new ValueToKeyMappingEstimator(mlContext, "Label"))
+            .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10)));
+
+            var cvResults = mlContext.MulticlassClassification.CrossValidate(data, pipeline, numFolds: 5);
+        }
+    }
+}
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -137,6 +137,15 @@ public static class TestDatasets
             loaderSettings = "xf=expr{col=Features expr=x:float(x>4?1:0)}"
         };
 
+        // The data set contains images of hand-written digits.
+        // The input is given in the form of matrix id 8x8 where
+        // each element is an integer in the range 0..16
+        public static TestDataset Digits = new TestDataset
+        {
+            name = "Digits",
+            trainFilename = @"external/digits.csv",
+        };
+
         public static TestDataset vw = new TestDataset
         {
             name = "vw",

diff --git a/test/data/README.md b/test/data/README.md
@@ -16,6 +16,12 @@ The datasets are provided under the original terms that Microsoft received such
 >
 >Original readme: https://meta.wikimedia.org/wiki/Research:Detox
 
+### Digits
+> This dataset is provided under http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits.
+>
+> References: C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their Applications to Handwritten Digit Recognition, MSc Thesis, Institute of Graduate Studies in Science and Engineering, Bogazici University.
+> E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.
+
 ### UCI Adult Dataset 
 
 >Dua, D. and Karra Taniskidou, E. (2017). UCI Machine Learning Repository [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.