add sample

yaeldMS · yaeldMS · commit 6266a48b53ff · 2019-12-27T16:30:56.000+02:00
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvm.cs
@@ -0,0 +1,143 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+namespace Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class LdSvm
+    {
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for
+            // exception tracking and logging, as a catalog of available operations
+            // and as the source of randomness. Setting the seed to a fixed number
+            // in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Create a list of training data points.
+            var dataPoints = GenerateRandomDataPoints(1000);
+
+            // Convert the list of data points to an IDataView object, which is
+            // consumable by ML.NET API.
+            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
+
+            // Define the trainer.
+            var pipeline = mlContext.BinaryClassification.Trainers
+                .LdSvm();
+
+            // Train the model.
+            var model = pipeline.Fit(trainingData);
+
+            // Create testing data. Use different random seed to make it different
+            // from training data.
+            var testData = mlContext.Data
+                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+
+            // Run the model on test data set.
+            var transformedTestData = model.Transform(testData);
+
+            // Convert IDataView object to a list.
+            var predictions = mlContext.Data
+                .CreateEnumerable<Prediction>(transformedTestData,
+                reuseRowObject: false).ToList();
+
+            // Print 5 predictions.
+            foreach (var p in predictions.Take(5))
+                Console.WriteLine($"Label: {p.Label}, " 
+                    + $"Prediction: {p.PredictedLabel}");
+
+            // Expected output:
+            // Label: True, Prediction: True
+            // Label: False, Prediction: True
+            // Label: True, Prediction: True
+            // Label: True, Prediction: True
+            // Label: False, Prediction: False
+            
+            // Evaluate the overall metrics.
+            var metrics = mlContext.BinaryClassification
+                .EvaluateNonCalibrated(transformedTestData);
+
+            PrintMetrics(metrics);
+            
+            // Expected output:
+            // Accuracy: 0.82
+            // AUC: 0.85
+            // F1 Score: 0.81
+            // Negative Precision: 0.82
+            // Negative Recall: 0.82
+            // Positive Precision: 0.81
+            // Positive Recall: 0.81
+
+            // TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
+            // Confusion table
+            //           ||======================
+            // PREDICTED || positive | negative | Recall
+            // TRUTH     ||======================
+            //  positive ||      192 |       46 | 0.8067
+            //  negative ||       46 |      216 | 0.8244
+            //           ||======================
+            // Precision ||   0.8067 |   0.8244 |
+        }
+
+        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
+            int seed=0)
+
+        {
+            var random = new Random(seed);
+            float randomFloat() => (float)random.NextDouble();
+            for (int i = 0; i < count; i++)
+            {
+                var label = randomFloat() > 0.5f;
+                yield return new DataPoint
+                {
+                    Label = label,
+                    // Create random features that are correlated with the label.
+                    // For data points with false label, the feature values are
+                    // slightly increased by adding a constant.
+                    Features = Enumerable.Repeat(label, 50)
+                        .Select(x => x ? randomFloat() : randomFloat() +
+                        0.1f).ToArray()
+            
+                };
+            }
+        }
+
+        // Example with label and 50 feature values. A data set is a collection of
+        // such examples.
+        private class DataPoint
+        {
+            public bool Label { get; set; }
+            [VectorType(50)]
+            public float[] Features { get; set; }
+        }
+
+        // Class used to capture predictions.
+        private class Prediction
+        {
+            // Original label.
+            public bool Label { get; set; }
+            // Predicted label from the trainer.
+            public bool PredictedLabel { get; set; }
+        }
+
+        // Pretty-print BinaryClassificationMetrics objects.
+        private static void PrintMetrics(BinaryClassificationMetrics metrics)
+        {
+            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
+            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
+            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
+            Console.WriteLine($"Negative Precision: " + 
+                $"{metrics.NegativePrecision:F2}");
+
+            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
+            Console.WriteLine($"Positive Precision: " +
+                $"{metrics.PositivePrecision:F2}");
+
+            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
+            Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
+        }
+    }
+}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvm.tt
@@ -0,0 +1,39 @@
+﻿<#@ include file="BinaryClassification.ttinclude"#>
+<#+
+string ClassName = "LdSvm";
+string Trainer = "LdSvm";
+string TrainerOptions = null;
+bool IsCalibrated = false;
+bool CacheData = false;
+
+string LabelThreshold = "0.5f";
+string DataSepValue = "0.1f";
+string OptionsInclude = "";
+string Comments = "";
+
+string ExpectedOutputPerInstance = @"// Expected output:
+            // Label: True, Prediction: True
+            // Label: False, Prediction: True
+            // Label: True, Prediction: True
+            // Label: True, Prediction: True
+            // Label: False, Prediction: False";
+
+string ExpectedOutput = @"// Expected output:
+            // Accuracy: 0.82
+            // AUC: 0.85
+            // F1 Score: 0.81
+            // Negative Precision: 0.82
+            // Negative Recall: 0.82
+            // Positive Precision: 0.81
+            // Positive Recall: 0.81
+
+            // TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
+            // Confusion table
+            //           ||======================
+            // PREDICTED || positive | negative | Recall
+            // TRUTH     ||======================
+            //  positive ||      192 |       46 | 0.8067
+            //  negative ||       46 |      216 | 0.8244
+            //           ||======================
+            // Precision ||   0.8067 |   0.8244 |";
+#>
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvmWithOptions.cs
@@ -0,0 +1,152 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using Microsoft.ML.Trainers;
+
+namespace Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class LdSvmWithOptions
+    {
+        public static void Example()
+        {
+            // Create a new context for ML.NET operations. It can be used for
+            // exception tracking and logging, as a catalog of available operations
+            // and as the source of randomness. Setting the seed to a fixed number
+            // in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Create a list of training data points.
+            var dataPoints = GenerateRandomDataPoints(1000);
+
+            // Convert the list of data points to an IDataView object, which is
+            // consumable by ML.NET API.
+            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
+
+            // Define trainer options.
+            var options = new LdSvmTrainer.Options
+            {
+                TreeDepth = 5,
+                NumberOfIterations = 10000,
+                Sigma = 0.1f,
+            };
+
+            // Define the trainer.
+            var pipeline = mlContext.BinaryClassification.Trainers
+                .LdSvm(options);
+
+            // Train the model.
+            var model = pipeline.Fit(trainingData);
+
+            // Create testing data. Use different random seed to make it different
+            // from training data.
+            var testData = mlContext.Data
+                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+
+            // Run the model on test data set.
+            var transformedTestData = model.Transform(testData);
+
+            // Convert IDataView object to a list.
+            var predictions = mlContext.Data
+                .CreateEnumerable<Prediction>(transformedTestData,
+                reuseRowObject: false).ToList();
+
+            // Print 5 predictions.
+            foreach (var p in predictions.Take(5))
+                Console.WriteLine($"Label: {p.Label}, " 
+                    + $"Prediction: {p.PredictedLabel}");
+
+            // Expected output:
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: True
+            //   Label: True, Prediction: True
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: False
+            
+            // Evaluate the overall metrics.
+            var metrics = mlContext.BinaryClassification
+                .EvaluateNonCalibrated(transformedTestData);
+
+            PrintMetrics(metrics);
+            
+            // Expected output:
+            //   Accuracy: 0.80
+            //   AUC: 0.89
+            //   F1 Score: 0.79
+            //   Negative Precision: 0.81
+            //   Negative Recall: 0.81
+            //   Positive Precision: 0.79
+            //   Positive Recall: 0.79
+
+            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
+            //   Confusion table
+            //             ||======================
+            //   PREDICTED || positive | negative | Recall
+            //   TRUTH     ||======================
+            //    positive ||      189 |       49 | 0.7941
+            //    negative ||       50 |      212 | 0.8092
+            //             ||======================
+            //   Precision ||   0.7908 |   0.8123 |
+        }
+
+        private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
+            int seed=0)
+
+        {
+            var random = new Random(seed);
+            float randomFloat() => (float)random.NextDouble();
+            for (int i = 0; i < count; i++)
+            {
+                var label = randomFloat() > 0.5f;
+                yield return new DataPoint
+                {
+                    Label = label,
+                    // Create random features that are correlated with the label.
+                    // For data points with false label, the feature values are
+                    // slightly increased by adding a constant.
+                    Features = Enumerable.Repeat(label, 50)
+                        .Select(x => x ? randomFloat() : randomFloat() +
+                        0.1f).ToArray()
+            
+                };
+            }
+        }
+
+        // Example with label and 50 feature values. A data set is a collection of
+        // such examples.
+        private class DataPoint
+        {
+            public bool Label { get; set; }
+            [VectorType(50)]
+            public float[] Features { get; set; }
+        }
+
+        // Class used to capture predictions.
+        private class Prediction
+        {
+            // Original label.
+            public bool Label { get; set; }
+            // Predicted label from the trainer.
+            public bool PredictedLabel { get; set; }
+        }
+
+        // Pretty-print BinaryClassificationMetrics objects.
+        private static void PrintMetrics(BinaryClassificationMetrics metrics)
+        {
+            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
+            Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
+            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
+            Console.WriteLine($"Negative Precision: " + 
+                $"{metrics.NegativePrecision:F2}");
+
+            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
+            Console.WriteLine($"Positive Precision: " +
+                $"{metrics.PositivePrecision:F2}");
+
+            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
+            Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
+        }
+    }
+}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvmWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LdSvmWithOptions.tt
@@ -0,0 +1,45 @@
+﻿<#@ include file="BinaryClassification.ttinclude"#>
+<#+
+string ClassName="LdSvmWithOptions";
+string Trainer = "LdSvm";
+bool IsCalibrated = false;
+
+string LabelThreshold = "0.5f";
+string DataSepValue = "0.1f";
+string OptionsInclude = "using Microsoft.ML.Trainers;";
+string Comments= "";
+bool CacheData = false;
+
+string TrainerOptions = @"LdSvmTrainer.Options
+            {
+                TreeDepth = 5,
+                NumberOfIterations = 10000,
+                Sigma = 0.1f,
+            }";
+
+string ExpectedOutputPerInstance= @"// Expected output:
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: True
+            //   Label: True, Prediction: True
+            //   Label: True, Prediction: True
+            //   Label: False, Prediction: False";
+
+string ExpectedOutput = @"// Expected output:
+            //   Accuracy: 0.80
+            //   AUC: 0.89
+            //   F1 Score: 0.79
+            //   Negative Precision: 0.81
+            //   Negative Recall: 0.81
+            //   Positive Precision: 0.79
+            //   Positive Recall: 0.79
+
+            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
+            //   Confusion table
+            //             ||======================
+            //   PREDICTED || positive | negative | Recall
+            //   TRUTH     ||======================
+            //    positive ||      189 |       49 | 0.7941
+            //    negative ||       50 |      212 | 0.8092
+            //             ||======================
+            //   Precision ||   0.7908 |   0.8123 |";
+#>
diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj
diff --git a/src/Microsoft.ML.StandardTrainers/LdSvm/LdSvmTrainer.cs b/src/Microsoft.ML.StandardTrainers/LdSvm/LdSvmTrainer.cs