From 9f5a8b75608f5db555bf48c79b40032205d4b46a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Sat, 17 Nov 2018 19:03:07 +0100 Subject: [PATCH 01/10] Move evaluation results to stand alone metrics classes --- .../Evaluators/BinaryClassifierEvaluator.cs | 137 +----------------- .../Evaluators/ClusteringEvaluator.cs | 40 ----- .../Metrics/BinaryClassificationMetrics.cs | 96 ++++++++++++ .../CalibratedBinaryClassificationMetrics.cs | 54 +++++++ .../Evaluators/Metrics/ClusteringMetrics.cs | 49 +++++++ .../Metrics/MulticlassClassifierMetrics.cs | 100 +++++++++++++ .../Evaluators/Metrics/RankerMetrics.cs | 43 ++++++ .../Evaluators/Metrics/RegressionMetrics.cs | 66 +++++++++ .../MulticlassClassifierEvaluator.cs | 93 +----------- .../Evaluators/RankerEvaluator.cs | 34 ----- .../Evaluators/RegressionEvaluator.cs | 59 +------- 11 files changed, 411 insertions(+), 360 deletions(-) create mode 100644 src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs create mode 100644 src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs create mode 100644 src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs create mode 100644 src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs create mode 100644 src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs create mode 100644 src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs index 47e6dda091..28492fb922 100644 --- a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs @@ -789,141 +789,6 @@ private void ComputePrCurves() } } - /// - /// Evaluation results for binary classifiers, excluding probabilistic metrics. - /// - public class Result - { - /// - /// Gets the area under the ROC curve. - /// - /// - /// The area under the ROC curve is equal to the probability that the classifier ranks - /// a randomly chosen positive instance higher than a randomly chosen negative one - /// (assuming 'positive' ranks higher than 'negative'). - /// - public double Auc { get; } - - /// - /// Gets the accuracy of a classifier which is the proportion of correct predictions in the test set. - /// - public double Accuracy { get; } - - /// - /// Gets the positive precision of a classifier which is the proportion of correctly predicted - /// positive instances among all the positive predictions (i.e., the number of positive instances - /// predicted as positive, divided by the total number of instances predicted as positive). - /// - public double PositivePrecision { get; } - - /// - /// Gets the positive recall of a classifier which is the proportion of correctly predicted - /// positive instances among all the positive instances (i.e., the number of positive instances - /// predicted as positive, divided by the total number of positive instances). - /// - public double PositiveRecall { get; private set; } - - /// - /// Gets the negative precision of a classifier which is the proportion of correctly predicted - /// negative instances among all the negative predictions (i.e., the number of negative instances - /// predicted as negative, divided by the total number of instances predicted as negative). - /// - public double NegativePrecision { get; } - - /// - /// Gets the negative recall of a classifier which is the proportion of correctly predicted - /// negative instances among all the negative instances (i.e., the number of negative instances - /// predicted as negative, divided by the total number of negative instances). - /// - public double NegativeRecall { get; } - - /// - /// Gets the F1 score of the classifier. - /// - /// - /// F1 score is the harmonic mean of precision and recall: 2 * precision * recall / (precision + recall). - /// - public double F1Score { get; } - - /// - /// Gets the area under the precision/recall curve of the classifier. - /// - /// - /// The area under the precision/recall curve is a single number summary of the information in the - /// precision/recall curve. It is increasingly used in the machine learning community, particularly - /// for imbalanced datasets where one class is observed more frequently than the other. On these - /// datasets, AUPRC can highlight performance differences that are lost with AUC. - /// - public double Auprc { get; } - - protected private static T Fetch(IExceptionContext ectx, IRow row, string name) - { - if (!row.Schema.TryGetColumnIndex(name, out int col)) - throw ectx.Except($"Could not find column '{name}'"); - T val = default; - row.GetGetter(col)(ref val); - return val; - } - - internal Result(IExceptionContext ectx, IRow overallResult) - { - double Fetch(string name) => Fetch(ectx, overallResult, name); - Auc = Fetch(BinaryClassifierEvaluator.Auc); - Accuracy = Fetch(BinaryClassifierEvaluator.Accuracy); - PositivePrecision = Fetch(BinaryClassifierEvaluator.PosPrecName); - PositiveRecall = Fetch(BinaryClassifierEvaluator.PosRecallName); - NegativePrecision = Fetch(BinaryClassifierEvaluator.NegPrecName); - NegativeRecall = Fetch(BinaryClassifierEvaluator.NegRecallName); - F1Score = Fetch(BinaryClassifierEvaluator.F1); - Auprc = Fetch(BinaryClassifierEvaluator.AuPrc); - } - } - - /// - /// Evaluation results for binary classifiers, including probabilistic metrics. - /// - public sealed class CalibratedResult : Result - { - /// - /// Gets the log-loss of the classifier. - /// - /// - /// The log-loss metric, is computed as follows: - /// LL = - (1/m) * sum( log(p[i])) - /// where m is the number of instances in the test set. - /// p[i] is the probability returned by the classifier if the instance belongs to class 1, - /// and 1 minus the probability returned by the classifier if the instance belongs to class 0. - /// - public double LogLoss { get; } - - /// - /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) - /// of the classifier. - /// - /// - /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: - /// (LL(prior) - LL(classifier)) / LL(prior) - /// This metric can be interpreted as the advantage of the classifier over a random prediction. - /// For example, if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is - /// 20% better than random guessing." - /// - public double LogLossReduction { get; } - - /// - /// Gets the test-set entropy (prior Log-Loss/instance) of the classifier. - /// - public double Entropy { get; } - - internal CalibratedResult(IExceptionContext ectx, IRow overallResult) - : base(ectx, overallResult) - { - double Fetch(string name) => Fetch(ectx, overallResult, name); - LogLoss = Fetch(BinaryClassifierEvaluator.LogLoss); - LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); - Entropy = Fetch(BinaryClassifierEvaluator.Entropy); - } - } - /// /// Evaluates scored binary classification data. /// @@ -933,7 +798,7 @@ internal CalibratedResult(IExceptionContext ectx, IRow overallResult) /// The name of the probability column in , the calibrated version of . /// The name of the predicted label column in . /// The evaluation results for these calibrated outputs. - public CalibratedResult Evaluate(IDataView data, string label, string score, string probability, string predictedLabel) + public CalibratedBinaryClassificationMetrics Evaluate(IDataView data, string label, string score, string probability, string predictedLabel) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); diff --git a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs index 34113e36fd..c52e60b9b5 100644 --- a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs @@ -558,46 +558,6 @@ private void AssertValid(bool assertGetters) } } } - - /// - /// The metrics generated after evaluating the clustering predictions. - /// - public sealed class Result - { - /// - /// Normalized Mutual Information - /// NMI is a measure of the mutual dependence of the variables. - /// Normalized variants work on data that already has cluster labels. - /// Its value ranged from 0 to 1, where higher numbers are better. - /// - public double Nmi { get; } - - /// - /// Average Score. For the K-Means algorithm, the 'score' is the distance from the centroid to the example. - /// The average score is, therefore, a measure of proximity of the examples to cluster centroids. - /// In other words, it's the 'cluster tightness' measure. - /// Note however, that this metric will only decrease if the number of clusters is increased, - /// and in the extreme case (where each distinct example is its own cluster) it will be equal to zero. - /// - public double AvgMinScore { get; } - - /// - /// Davies-Bouldin Index - /// DBI is a measure of the how much scatter is in the cluster and the cluster separation. - /// - public double Dbi { get; } - - internal Result(IExceptionContext ectx, IRow overallResult, bool calculateDbi) - { - double Fetch(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); - - Nmi = Fetch(ClusteringEvaluator.Nmi); - AvgMinScore = Fetch(ClusteringEvaluator.AvgMinScore); - - if (calculateDbi) - Dbi = Fetch(ClusteringEvaluator.Dbi); - } - } } public sealed class ClusteringPerInstanceEvaluator : PerInstanceEvaluatorBase diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs new file mode 100644 index 0000000000..a8ceb3ce89 --- /dev/null +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs @@ -0,0 +1,96 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// Evaluation results for binary classifiers, excluding probabilistic metrics. + /// + public class BinaryClassificationMetrics + { + /// + /// Gets the area under the ROC curve. + /// + /// + /// The area under the ROC curve is equal to the probability that the classifier ranks + /// a randomly chosen positive instance higher than a randomly chosen negative one + /// (assuming 'positive' ranks higher than 'negative'). + /// + public double Auc { get; } + + /// + /// Gets the accuracy of a classifier which is the proportion of correct predictions in the test set. + /// + public double Accuracy { get; } + + /// + /// Gets the positive precision of a classifier which is the proportion of correctly predicted + /// positive instances among all the positive predictions (i.e., the number of positive instances + /// predicted as positive, divided by the total number of instances predicted as positive). + /// + public double PositivePrecision { get; } + + /// + /// Gets the positive recall of a classifier which is the proportion of correctly predicted + /// positive instances among all the positive instances (i.e., the number of positive instances + /// predicted as positive, divided by the total number of positive instances). + /// + public double PositiveRecall { get; private set; } + + /// + /// Gets the negative precision of a classifier which is the proportion of correctly predicted + /// negative instances among all the negative predictions (i.e., the number of negative instances + /// predicted as negative, divided by the total number of instances predicted as negative). + /// + public double NegativePrecision { get; } + + /// + /// Gets the negative recall of a classifier which is the proportion of correctly predicted + /// negative instances among all the negative instances (i.e., the number of negative instances + /// predicted as negative, divided by the total number of negative instances). + /// + public double NegativeRecall { get; } + + /// + /// Gets the F1 score of the classifier. + /// + /// + /// F1 score is the harmonic mean of precision and recall: 2 * precision * recall / (precision + recall). + /// + public double F1Score { get; } + + /// + /// Gets the area under the precision/recall curve of the classifier. + /// + /// + /// The area under the precision/recall curve is a single number summary of the information in the + /// precision/recall curve. It is increasingly used in the machine learning community, particularly + /// for imbalanced datasets where one class is observed more frequently than the other. On these + /// datasets, AUPRC can highlight performance differences that are lost with AUC. + /// + public double Auprc { get; } + + protected private static T Fetch(IExceptionContext ectx, IRow row, string name) + { + if (!row.Schema.TryGetColumnIndex(name, out int col)) + throw ectx.Except($"Could not find column '{name}'"); + T val = default; + row.GetGetter(col)(ref val); + return val; + } + + internal BinaryClassificationMetrics(IExceptionContext ectx, IRow overallResult) + { + double Fetch(string name) => Fetch(ectx, overallResult, name); + Auc = Fetch(BinaryClassifierEvaluator.Auc); + Accuracy = Fetch(BinaryClassifierEvaluator.Accuracy); + PositivePrecision = Fetch(BinaryClassifierEvaluator.PosPrecName); + PositiveRecall = Fetch(BinaryClassifierEvaluator.PosRecallName); + NegativePrecision = Fetch(BinaryClassifierEvaluator.NegPrecName); + NegativeRecall = Fetch(BinaryClassifierEvaluator.NegRecallName); + F1Score = Fetch(BinaryClassifierEvaluator.F1); + Auprc = Fetch(BinaryClassifierEvaluator.AuPrc); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs new file mode 100644 index 0000000000..6a1b7593da --- /dev/null +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// Evaluation results for binary classifiers, including probabilistic metrics. + /// + public sealed class CalibratedBinaryClassificationMetrics : BinaryClassificationMetrics + { + /// + /// Gets the log-loss of the classifier. + /// + /// + /// The log-loss metric, is computed as follows: + /// LL = - (1/m) * sum( log(p[i])) + /// where m is the number of instances in the test set. + /// p[i] is the probability returned by the classifier if the instance belongs to class 1, + /// and 1 minus the probability returned by the classifier if the instance belongs to class 0. + /// + public double LogLoss { get; } + + /// + /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) + /// of the classifier. + /// + /// + /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: + /// (LL(prior) - LL(classifier)) / LL(prior) + /// This metric can be interpreted as the advantage of the classifier over a random prediction. + /// For example, if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is + /// 20% better than random guessing." + /// + public double LogLossReduction { get; } + + /// + /// Gets the test-set entropy (prior Log-Loss/instance) of the classifier. + /// + public double Entropy { get; } + + internal CalibratedBinaryClassificationMetrics(IExceptionContext ectx, IRow overallResult) + : base(ectx, overallResult) + { + double Fetch(string name) => Fetch(ectx, overallResult, name); + LogLoss = Fetch(BinaryClassifierEvaluator.LogLoss); + LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); + Entropy = Fetch(BinaryClassifierEvaluator.Entropy); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs new file mode 100644 index 0000000000..e53d16d4b5 --- /dev/null +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// The metrics generated after evaluating the clustering predictions. + /// + public sealed class ClusteringMetrics + { + /// + /// Normalized Mutual Information + /// NMI is a measure of the mutual dependence of the variables. + /// Normalized variants work on data that already has cluster labels. + /// Its value ranged from 0 to 1, where higher numbers are better. + /// + public double Nmi { get; } + + /// + /// Average Score. For the K-Means algorithm, the 'score' is the distance from the centroid to the example. + /// The average score is, therefore, a measure of proximity of the examples to cluster centroids. + /// In other words, it's the 'cluster tightness' measure. + /// Note however, that this metric will only decrease if the number of clusters is increased, + /// and in the extreme case (where each distinct example is its own cluster) it will be equal to zero. + /// + public double AvgMinScore { get; } + + /// + /// Davies-Bouldin Index + /// DBI is a measure of the how much scatter is in the cluster and the cluster separation. + /// + public double Dbi { get; } + + internal ClusteringMetrics(IExceptionContext ectx, IRow overallResult, bool calculateDbi) + { + double Fetch(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); + + Nmi = Fetch(ClusteringEvaluator.Nmi); + AvgMinScore = Fetch(ClusteringEvaluator.AvgMinScore); + + if (calculateDbi) + Dbi = Fetch(ClusteringEvaluator.Dbi); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs new file mode 100644 index 0000000000..5b402856bd --- /dev/null +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs @@ -0,0 +1,100 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; + +namespace Microsoft.ML.Runtime.Data +{ + public sealed class MulticlassClassifierMetrics + { + /// + /// Gets the micro-average accuracy of the model. + /// + /// + /// The micro-average is the fraction of instances predicted correctly. + /// + /// The micro-average metric weighs each class according to the number of instances that belong + /// to it in the dataset. + /// + public double AccuracyMicro { get; } + + /// + /// Gets the macro-average accuracy of the model. + /// + /// + /// The macro-average is computed by taking the average over all the classes of the fraction + /// of correct predictions in this class (the number of correctly predicted instances in the class, + /// divided by the total number of instances in the class). + /// + /// The macro-average metric gives the same weight to each class, no matter how many instances from + /// that class the dataset contains. + /// + public double AccuracyMacro { get; } + + /// + /// Gets the average log-loss of the classifier. + /// + /// + /// The log-loss metric, is computed as follows: + /// LL = - (1/m) * sum( log(p[i])) + /// where m is the number of instances in the test set. + /// p[i] is the probability returned by the classifier if the instance belongs to class 1, + /// and 1 minus the probability returned by the classifier if the instance belongs to class 0. + /// + public double LogLoss { get; } + + /// + /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) + /// of the classifier. + /// + /// + /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: + /// (LL(prior) - LL(classifier)) / LL(prior) + /// This metric can be interpreted as the advantage of the classifier over a random prediction. + /// For example, if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is + /// 20% better than random guessing". + /// + public double LogLossReduction { get; private set; } + + /// + /// If positive, this is the top-K for which the is calculated. + /// + public int TopK { get; } + + /// + /// If is positive, this is the relative number of examples where + /// the true label is one of the top k predicted labels by the predictor. + /// + public double TopKAccuracy { get; } + + /// + /// Gets the log-loss of the classifier for each class. + /// + /// + /// The log-loss metric, is computed as follows: + /// LL = - (1/m) * sum( log(p[i])) + /// where m is the number of instances in the test set. + /// p[i] is the probability returned by the classifier if the instance belongs to the class, + /// and 1 minus the probability returned by the classifier if the instance does not belong to the class. + /// + public double[] PerClassLogLoss { get; } + + internal MulticlassClassifierMetrics(IExceptionContext ectx, IRow overallResult, int topK) + { + double FetchDouble(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); + AccuracyMicro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro); + AccuracyMacro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro); + LogLoss = FetchDouble(MultiClassClassifierEvaluator.LogLoss); + LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction); + TopK = topK; + if (topK > 0) + TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy); + + var perClassLogLoss = RowCursorUtils.Fetch>(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss); + PerClassLogLoss = new double[perClassLogLoss.Length]; + perClassLogLoss.CopyTo(PerClassLogLoss); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs new file mode 100644 index 0000000000..8d8a93f24d --- /dev/null +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs @@ -0,0 +1,43 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; + +namespace Microsoft.ML.Runtime.Data +{ + public sealed class RankerMetrics + { + /// + /// Normalized Discounted Cumulative Gain + /// + /// + public double[] Ndcg { get; } + + /// + /// Discounted Cumulative gain + /// is the sum of the gains, for all the instances i, normalized by the natural logarithm of the instance + 1. + /// Note that unline the Wikipedia article, ML.Net uses the natural logarithm. + /// + /// + public double[] Dcg { get; } + + private static T Fetch(IExceptionContext ectx, IRow row, string name) + { + if (!row.Schema.TryGetColumnIndex(name, out int col)) + throw ectx.Except($"Could not find column '{name}'"); + T val = default; + row.GetGetter(col)(ref val); + return val; + } + + internal RankerMetrics(IExceptionContext ectx, IRow overallResult) + { + VBuffer Fetch(string name) => Fetch>(ectx, overallResult, name); + + Dcg = Fetch(RankerEvaluator.Dcg).GetValues().ToArray(); + Ndcg = Fetch(RankerEvaluator.Ndcg).GetValues().ToArray(); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs new file mode 100644 index 0000000000..356e936403 --- /dev/null +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs @@ -0,0 +1,66 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; + +namespace Microsoft.ML.Runtime.Data +{ + public sealed class RegressionMetrics + { + /// + /// Gets the absolute loss of the model. + /// + /// + /// The absolute loss is defined as + /// L1 = (1/m) * sum( abs( yi - y'i)) + /// where m is the number of instances in the test set. + /// y'i are the predicted labels for each instance. + /// yi are the correct labels of each instance. + /// + public double L1 { get; } + + /// + /// Gets the squared loss of the model. + /// + /// + /// The squared loss is defined as + /// L2 = (1/m) * sum(( yi - y'i)^2) + /// where m is the number of instances in the test set. + /// y'i are the predicted labels for each instance. + /// yi are the correct labels of each instance. + /// + public double L2 { get; } + + /// + /// Gets the root mean square loss (or RMS) which is the square root of the L2 loss. + /// + public double Rms { get; } + + /// + /// Gets the user defined loss function. + /// + /// + /// This is the average of a loss function defined by the user, + /// computed over all the instances in the test set. + /// + public double LossFn { get; } + + /// + /// Gets the R squared value of the model, which is also known as + /// the coefficient of determination​. + /// + public double RSquared { get; } + + internal RegressionMetrics(IExceptionContext ectx, IRow overallResult) + { + double Fetch(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); + L1 = Fetch(RegressionEvaluator.L1); + L2 = Fetch(RegressionEvaluator.L2); + Rms = Fetch(RegressionEvaluator.Rms); + LossFn = Fetch(RegressionEvaluator.Loss); + RSquared = Fetch(RegressionEvaluator.RSquared); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs index 49bbea535a..f2b41c1677 100644 --- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs @@ -497,97 +497,6 @@ public void GetSlotNames(ref VBuffer> slotNames) } } - public sealed class Result - { - /// - /// Gets the micro-average accuracy of the model. - /// - /// - /// The micro-average is the fraction of instances predicted correctly. - /// - /// The micro-average metric weighs each class according to the number of instances that belong - /// to it in the dataset. - /// - public double AccuracyMicro { get; } - - /// - /// Gets the macro-average accuracy of the model. - /// - /// - /// The macro-average is computed by taking the average over all the classes of the fraction - /// of correct predictions in this class (the number of correctly predicted instances in the class, - /// divided by the total number of instances in the class). - /// - /// The macro-average metric gives the same weight to each class, no matter how many instances from - /// that class the dataset contains. - /// - public double AccuracyMacro { get; } - - /// - /// Gets the average log-loss of the classifier. - /// - /// - /// The log-loss metric, is computed as follows: - /// LL = - (1/m) * sum( log(p[i])) - /// where m is the number of instances in the test set. - /// p[i] is the probability returned by the classifier if the instance belongs to class 1, - /// and 1 minus the probability returned by the classifier if the instance belongs to class 0. - /// - public double LogLoss { get; } - - /// - /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) - /// of the classifier. - /// - /// - /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: - /// (LL(prior) - LL(classifier)) / LL(prior) - /// This metric can be interpreted as the advantage of the classifier over a random prediction. - /// For example, if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is - /// 20% better than random guessing". - /// - public double LogLossReduction { get; private set; } - - /// - /// If positive, this is the top-K for which the is calculated. - /// - public int TopK { get; } - - /// - /// If is positive, this is the relative number of examples where - /// the true label is one of the top k predicted labels by the predictor. - /// - public double TopKAccuracy { get; } - - /// - /// Gets the log-loss of the classifier for each class. - /// - /// - /// The log-loss metric, is computed as follows: - /// LL = - (1/m) * sum( log(p[i])) - /// where m is the number of instances in the test set. - /// p[i] is the probability returned by the classifier if the instance belongs to the class, - /// and 1 minus the probability returned by the classifier if the instance does not belong to the class. - /// - public double[] PerClassLogLoss { get; } - - internal Result(IExceptionContext ectx, IRow overallResult, int topK) - { - double FetchDouble(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); - AccuracyMicro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro); - AccuracyMacro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMacro); - LogLoss = FetchDouble(MultiClassClassifierEvaluator.LogLoss); - LogLossReduction = FetchDouble(MultiClassClassifierEvaluator.LogLossReduction); - TopK = topK; - if (topK > 0) - TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy); - - var perClassLogLoss = RowCursorUtils.Fetch>(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss); - PerClassLogLoss = new double[perClassLogLoss.Length]; - perClassLogLoss.CopyTo(PerClassLogLoss); - } - } - /// /// Evaluates scored multiclass classification data. /// @@ -596,7 +505,7 @@ internal Result(IExceptionContext ectx, IRow overallResult, int topK) /// The name of the score column in . /// The name of the predicted label column in . /// The evaluation results for these outputs. - public Result Evaluate(IDataView data, string label, string score, string predictedLabel) + public MulticlassClassifierMetrics Evaluate(IDataView data, string label, string score, string predictedLabel) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); diff --git a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs index 5755b285be..10543ca0dd 100644 --- a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs @@ -543,40 +543,6 @@ public void GetSlotNames(ref VBuffer> slotNames) slotNames = new VBuffer>(UnweightedCounters.TruncationLevel, values); } } - - public sealed class Result - { - /// - /// Normalized Discounted Cumulative Gain - /// - /// - public double[] Ndcg { get; } - - /// - /// Discounted Cumulative gain - /// is the sum of the gains, for all the instances i, normalized by the natural logarithm of the instance + 1. - /// Note that unline the Wikipedia article, ML.Net uses the natural logarithm. - /// - /// - public double[] Dcg { get; } - - private static T Fetch(IExceptionContext ectx, IRow row, string name) - { - if (!row.Schema.TryGetColumnIndex(name, out int col)) - throw ectx.Except($"Could not find column '{name}'"); - T val = default; - row.GetGetter(col)(ref val); - return val; - } - - internal Result(IExceptionContext ectx, IRow overallResult) - { - VBuffer Fetch(string name) => Fetch>(ectx, overallResult, name); - - Dcg = Fetch(RankerEvaluator.Dcg).GetValues().ToArray(); - Ndcg = Fetch(RankerEvaluator.Ndcg).GetValues().ToArray(); - } - } } public sealed class RankerPerInstanceTransform : IDataTransform diff --git a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs index 4c8c36496e..4d96eba578 100644 --- a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs @@ -157,63 +157,6 @@ public override void AddColumn(ArrayDataViewBuilder dvBldr, string metricName, p } } - public sealed class Result - { - /// - /// Gets the absolute loss of the model. - /// - /// - /// The absolute loss is defined as - /// L1 = (1/m) * sum( abs( yi - y'i)) - /// where m is the number of instances in the test set. - /// y'i are the predicted labels for each instance. - /// yi are the correct labels of each instance. - /// - public double L1 { get; } - - /// - /// Gets the squared loss of the model. - /// - /// - /// The squared loss is defined as - /// L2 = (1/m) * sum(( yi - y'i)^2) - /// where m is the number of instances in the test set. - /// y'i are the predicted labels for each instance. - /// yi are the correct labels of each instance. - /// - public double L2 { get; } - - /// - /// Gets the root mean square loss (or RMS) which is the square root of the L2 loss. - /// - public double Rms { get; } - - /// - /// Gets the user defined loss function. - /// - /// - /// This is the average of a loss function defined by the user, - /// computed over all the instances in the test set. - /// - public double LossFn { get; } - - /// - /// Gets the R squared value of the model, which is also known as - /// the coefficient of determination​. - /// - public double RSquared { get; } - - internal Result(IExceptionContext ectx, IRow overallResult) - { - double Fetch(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); - L1 = Fetch(RegressionEvaluator.L1); - L2 = Fetch(RegressionEvaluator.L2); - Rms = Fetch(RegressionEvaluator.Rms); - LossFn = Fetch(RegressionEvaluator.Loss); - RSquared = Fetch(RegressionEvaluator.RSquared); - } - } - /// /// Evaluates scored regression data. /// @@ -221,7 +164,7 @@ internal Result(IExceptionContext ectx, IRow overallResult) /// The name of the label column. /// The name of the predicted score column. /// The evaluation metrics for these outputs. - public Result Evaluate(IDataView data, string label, string score) + public RegressionMetrics Evaluate(IDataView data, string label, string score) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); From af8b8e1744ab292fc7a1db0f44d72466b9d85236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Sat, 17 Nov 2018 19:04:53 +0100 Subject: [PATCH 02/10] Update metrics types --- .../Evaluators/BinaryClassifierEvaluator.cs | 10 +++---- .../Evaluators/ClusteringEvaluator.cs | 6 ++-- .../Evaluators/EvaluatorStaticExtensions.cs | 14 +++++----- .../MulticlassClassifierEvaluator.cs | 4 +-- .../Evaluators/RankerEvaluator.cs | 6 ++-- .../Evaluators/RegressionEvaluator.cs | 4 +-- src/Microsoft.ML.Data/TrainContext.cs | 28 +++++++++---------- .../Training/TrainingStaticExtensions.cs | 8 +++--- 8 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs index 28492fb922..58c9dd07bb 100644 --- a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs @@ -816,12 +816,12 @@ public CalibratedBinaryClassificationMetrics Evaluate(IDataView data, string lab Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - CalibratedResult result; + CalibratedBinaryClassificationMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new CalibratedResult(Host, cursor); + result = new CalibratedBinaryClassificationMetrics(Host, cursor); moved = cursor.MoveNext(); Host.Assert(!moved); } @@ -837,7 +837,7 @@ public CalibratedBinaryClassificationMetrics Evaluate(IDataView data, string lab /// The name of the predicted label column in . /// The evaluation results for these uncalibrated outputs. /// - public Result Evaluate(IDataView data, string label, string score, string predictedLabel) + public BinaryClassificationMetrics Evaluate(IDataView data, string label, string score, string predictedLabel) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); @@ -853,12 +853,12 @@ public Result Evaluate(IDataView data, string label, string score, string predic Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - Result result; + BinaryClassificationMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new Result(Host, cursor); + result = new BinaryClassificationMetrics(Host, cursor); moved = cursor.MoveNext(); Host.Assert(!moved); } diff --git a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs index c52e60b9b5..f35484d4ea 100644 --- a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs @@ -61,7 +61,7 @@ public ClusteringEvaluator(IHostEnvironment env, Arguments args) /// The name of the optional label column in . /// The name of the optional feature column in . /// The evaluation results. - public Result Evaluate(IDataView data, string score, string label = null, string features = null) + public ClusteringMetrics Evaluate(IDataView data, string score, string label = null, string features = null) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(score, nameof(score)); @@ -81,12 +81,12 @@ public Result Evaluate(IDataView data, string score, string label = null, string Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - Result result; + ClusteringMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new Result(Host, cursor, _calculateDbi); + result = new ClusteringMetrics(Host, cursor, _calculateDbi); moved = cursor.MoveNext(); Host.Assert(!moved); } diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs index df58e4d46c..ea8b78b99f 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs @@ -24,7 +24,7 @@ public static class EvaluatorStaticExtensions /// The index delegate for columns from calibrated prediction of a binary classifier. /// Under typical scenarios, this will just be the same tuple of results returned from the trainer. /// The evaluation results for these calibrated outputs. - public static BinaryClassifierEvaluator.CalibratedResult Evaluate( + public static CalibratedBinaryClassificationMetrics Evaluate( this BinaryClassificationContext ctx, DataView data, Func> label, @@ -60,7 +60,7 @@ public static BinaryClassifierEvaluator.CalibratedResult Evaluate( /// The index delegate for columns from uncalibrated prediction of a binary classifier. /// Under typical scenarios, this will just be the same tuple of results returned from the trainer. /// The evaluation results for these uncalibrated outputs. - public static BinaryClassifierEvaluator.Result Evaluate( + public static BinaryClassificationMetrics Evaluate( this BinaryClassificationContext ctx, DataView data, Func> label, @@ -94,7 +94,7 @@ public static BinaryClassifierEvaluator.Result Evaluate( /// The optional index delegate for the label column. /// The optional index delegate for the features column. /// The evaluation metrics. - public static ClusteringEvaluator.Result Evaluate( + public static ClusteringMetrics Evaluate( this ClusteringContext ctx, DataView data, Func> score, @@ -127,11 +127,11 @@ public static ClusteringEvaluator.Result Evaluate( /// The index delegate for the label column. /// The index delegate for columns from the prediction of a multiclass classifier. /// Under typical scenarios, this will just be the same tuple of results returned from the trainer. - /// If given a positive value, the will be filled with + /// If given a positive value, the will be filled with /// the top-K accuracy, that is, the accuracy assuming we consider an example with the correct class within /// the top-K values as being stored "correctly." /// The evaluation metrics. - public static MultiClassClassifierEvaluator.Result Evaluate( + public static MulticlassClassifierMetrics Evaluate( this MulticlassClassificationContext ctx, DataView data, Func> label, @@ -178,7 +178,7 @@ private sealed class TrivialRegressionLossFactory : ISupportRegressionLossFactor /// The index delegate for predicted score column. /// Potentially custom loss function. If left unspecified defaults to . /// The evaluation metrics. - public static RegressionEvaluator.Result Evaluate( + public static RegressionMetrics Evaluate( this RegressionContext ctx, DataView data, Func> label, @@ -212,7 +212,7 @@ public static RegressionEvaluator.Result Evaluate( /// The index delegate for the groupId column. /// The index delegate for predicted score column. /// The evaluation metrics. - public static RankerEvaluator.Result Evaluate( + public static RankerMetrics Evaluate( this RankingContext ctx, DataView data, Func> label, diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs index f2b41c1677..f0deff610d 100644 --- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs @@ -521,12 +521,12 @@ public MulticlassClassifierMetrics Evaluate(IDataView data, string label, string Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - Result result; + MulticlassClassifierMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new Result(Host, cursor, _outputTopKAcc ?? 0); + result = new MulticlassClassifierMetrics(Host, cursor, _outputTopKAcc ?? 0); moved = cursor.MoveNext(); Host.Assert(!moved); } diff --git a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs index 10543ca0dd..c8eea4f381 100644 --- a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs @@ -242,7 +242,7 @@ protected override void GetAggregatorConsolidationFuncs(Aggregator aggregator, A /// The name of the groupId column. /// The name of the predicted score column. /// The evaluation metrics for these outputs. - public Result Evaluate(IDataView data, string label, string groupId, string score) + public RankerMetrics Evaluate(IDataView data, string label, string groupId, string score) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); @@ -256,12 +256,12 @@ public Result Evaluate(IDataView data, string label, string groupId, string scor Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - Result result; + RankerMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new Result(Host, cursor); + result = new RankerMetrics(Host, cursor); moved = cursor.MoveNext(); Host.Assert(!moved); } diff --git a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs index 4d96eba578..ff862f2250 100644 --- a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs @@ -177,12 +177,12 @@ public RegressionMetrics Evaluate(IDataView data, string label, string score) Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - Result result; + RegressionMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new Result(Host, cursor); + result = new RegressionMetrics(Host, cursor); moved = cursor.MoveNext(); Host.Assert(!moved); } diff --git a/src/Microsoft.ML.Data/TrainContext.cs b/src/Microsoft.ML.Data/TrainContext.cs index ff4648156c..a0708b0059 100644 --- a/src/Microsoft.ML.Data/TrainContext.cs +++ b/src/Microsoft.ML.Data/TrainContext.cs @@ -204,7 +204,7 @@ internal BinaryClassificationTrainers(BinaryClassificationContext ctx) /// The name of the probability column in , the calibrated version of . /// The name of the predicted label column in . /// The evaluation results for these calibrated outputs. - public BinaryClassifierEvaluator.CalibratedResult Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, + public CalibratedBinaryClassificationMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, string probability = DefaultColumnNames.Probability, string predictedLabel = DefaultColumnNames.PredictedLabel) { Host.CheckValue(data, nameof(data)); @@ -225,7 +225,7 @@ public BinaryClassifierEvaluator.CalibratedResult Evaluate(IDataView data, strin /// The name of the score column in . /// The name of the predicted label column in . /// The evaluation results for these uncalibrated outputs. - public BinaryClassifierEvaluator.Result EvaluateNonCalibrated(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, + public BinaryClassificationMetrics EvaluateNonCalibrated(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, string predictedLabel = DefaultColumnNames.PredictedLabel) { Host.CheckValue(data, nameof(data)); @@ -251,7 +251,7 @@ public BinaryClassifierEvaluator.Result EvaluateNonCalibrated(IDataView data, st /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (BinaryClassifierEvaluator.Result metrics, ITransformer model, IDataView scoredTestData)[] CrossValidateNonCalibrated( + public (BinaryClassificationMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidateNonCalibrated( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = DefaultColumnNames.Label, string stratificationColumn = null) { Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); @@ -273,7 +273,7 @@ public BinaryClassifierEvaluator.Result EvaluateNonCalibrated(IDataView data, st /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (BinaryClassifierEvaluator.CalibratedResult metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( + public (CalibratedBinaryClassificationMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = DefaultColumnNames.Label, string stratificationColumn = null) { Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); @@ -315,11 +315,11 @@ internal ClusteringTrainers(ClusteringContext ctx) /// The scored data. /// The name of the score column in . /// The name of the optional label column in . - /// If present, the metric will be computed. + /// If present, the metric will be computed. /// The name of the optional features column in . - /// If present, the metric will be computed. + /// If present, the metric will be computed. /// The evaluation result. - public ClusteringEvaluator.Result Evaluate(IDataView data, + public ClusteringMetrics Evaluate(IDataView data, string label = null, string score = DefaultColumnNames.Score, string features = null ) @@ -352,7 +352,7 @@ public ClusteringEvaluator.Result Evaluate(IDataView data, /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (ClusteringEvaluator.Result metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( + public (ClusteringMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = null, string featuresColumn = null, string stratificationColumn = null) { var result = CrossValidateTrain(data, estimator, numFolds, stratificationColumn); @@ -391,11 +391,11 @@ internal MulticlassClassificationTrainers(MulticlassClassificationContext ctx) /// The name of the label column in . /// The name of the score column in . /// The name of the predicted label column in . - /// If given a positive value, the will be filled with + /// If given a positive value, the will be filled with /// the top-K accuracy, that is, the accuracy assuming we consider an example with the correct class within /// the top-K values as being stored "correctly." /// The evaluation results for these calibrated outputs. - public MultiClassClassifierEvaluator.Result Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, + public MulticlassClassifierMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, string predictedLabel = DefaultColumnNames.PredictedLabel, int topK = 0) { Host.CheckValue(data, nameof(data)); @@ -424,7 +424,7 @@ public MultiClassClassifierEvaluator.Result Evaluate(IDataView data, string labe /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (MultiClassClassifierEvaluator.Result metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( + public (MulticlassClassifierMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = DefaultColumnNames.Label, string stratificationColumn = null) { Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); @@ -464,7 +464,7 @@ internal RegressionTrainers(RegressionContext ctx) /// The name of the label column in . /// The name of the score column in . /// The evaluation results for these calibrated outputs. - public RegressionEvaluator.Result Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score) + public RegressionMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); @@ -488,7 +488,7 @@ public RegressionEvaluator.Result Evaluate(IDataView data, string label = Defaul /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (RegressionEvaluator.Result metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( + public (RegressionMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = DefaultColumnNames.Label, string stratificationColumn = null) { Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); @@ -529,7 +529,7 @@ internal RankingTrainers(RankingContext ctx) /// The name of the groupId column in . /// The name of the score column in . /// The evaluation results for these calibrated outputs. - public RankerEvaluator.Result Evaluate(IDataView data, string label, string groupId, string score = DefaultColumnNames.Score) + public RankerMetrics Evaluate(IDataView data, string label, string groupId, string score = DefaultColumnNames.Score) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); diff --git a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs b/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs index 8cfda0485e..f007ec6b95 100644 --- a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs +++ b/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs @@ -73,7 +73,7 @@ public static (DataView trainSet, DataView testSet) TrainTestSplit(this /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public static (RegressionEvaluator.Result metrics, Transformer model, DataView scoredTestData)[] CrossValidate( + public static (RegressionMetrics metrics, Transformer model, DataView scoredTestData)[] CrossValidate( this RegressionContext context, DataView data, Estimator estimator, @@ -129,7 +129,7 @@ public static (RegressionEvaluator.Result metrics, Transformer /// Per-fold results: metrics, models, scored datasets. - public static (MultiClassClassifierEvaluator.Result metrics, Transformer model, DataView scoredTestData)[] CrossValidate( + public static (MulticlassClassifierMetrics metrics, Transformer model, DataView scoredTestData)[] CrossValidate( this MulticlassClassificationContext context, DataView data, Estimator estimator, @@ -185,7 +185,7 @@ public static (MultiClassClassifierEvaluator.Result metrics, Transformer /// Per-fold results: metrics, models, scored datasets. - public static (BinaryClassifierEvaluator.Result metrics, Transformer model, DataView scoredTestData)[] CrossValidateNonCalibrated( + public static (BinaryClassificationMetrics metrics, Transformer model, DataView scoredTestData)[] CrossValidateNonCalibrated( this BinaryClassificationContext context, DataView data, Estimator estimator, @@ -241,7 +241,7 @@ public static (BinaryClassifierEvaluator.Result metrics, Transformer /// Per-fold results: metrics, models, scored datasets. - public static (BinaryClassifierEvaluator.CalibratedResult metrics, Transformer model, DataView scoredTestData)[] CrossValidate( + public static (CalibratedBinaryClassificationMetrics metrics, Transformer model, DataView scoredTestData)[] CrossValidate( this BinaryClassificationContext context, DataView data, Estimator estimator, From c8eb28765c939c463fae4663852b7d6b4e6ee072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Sat, 17 Nov 2018 20:24:45 +0100 Subject: [PATCH 03/10] Uppercase 'C' in MultiClassClassifierEvaluator and Metrics to keep consistency --- .../Evaluators/EvaluatorStaticExtensions.cs | 4 ++-- ...sClassifierMetrics.cs => MultiClassClassifierMetrics.cs} | 4 ++-- ...ssifierEvaluator.cs => MultiClassClassifierEvaluator.cs} | 6 +++--- src/Microsoft.ML.Data/TrainContext.cs | 6 +++--- src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) rename src/Microsoft.ML.Data/Evaluators/Metrics/{MulticlassClassifierMetrics.cs => MultiClassClassifierMetrics.cs} (97%) rename src/Microsoft.ML.Data/Evaluators/{MulticlassClassifierEvaluator.cs => MultiClassClassifierEvaluator.cs} (99%) diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs index ea8b78b99f..de6eec79de 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs @@ -127,11 +127,11 @@ public static ClusteringMetrics Evaluate( /// The index delegate for the label column. /// The index delegate for columns from the prediction of a multiclass classifier. /// Under typical scenarios, this will just be the same tuple of results returned from the trainer. - /// If given a positive value, the will be filled with + /// If given a positive value, the will be filled with /// the top-K accuracy, that is, the accuracy assuming we consider an example with the correct class within /// the top-K values as being stored "correctly." /// The evaluation metrics. - public static MulticlassClassifierMetrics Evaluate( + public static MultiClassClassifierMetrics Evaluate( this MulticlassClassificationContext ctx, DataView data, Func> label, diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs similarity index 97% rename from src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs rename to src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs index 5b402856bd..0c90c1db84 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassifierMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs @@ -7,7 +7,7 @@ namespace Microsoft.ML.Runtime.Data { - public sealed class MulticlassClassifierMetrics + public sealed class MultiClassClassifierMetrics { /// /// Gets the micro-average accuracy of the model. @@ -81,7 +81,7 @@ public sealed class MulticlassClassifierMetrics /// public double[] PerClassLogLoss { get; } - internal MulticlassClassifierMetrics(IExceptionContext ectx, IRow overallResult, int topK) + internal MultiClassClassifierMetrics(IExceptionContext ectx, IRow overallResult, int topK) { double FetchDouble(string name) => RowCursorUtils.Fetch(ectx, overallResult, name); AccuracyMicro = FetchDouble(MultiClassClassifierEvaluator.AccuracyMicro); diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs similarity index 99% rename from src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs rename to src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs index f0deff610d..474fdc05aa 100644 --- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs @@ -505,7 +505,7 @@ public void GetSlotNames(ref VBuffer> slotNames) /// The name of the score column in . /// The name of the predicted label column in . /// The evaluation results for these outputs. - public MulticlassClassifierMetrics Evaluate(IDataView data, string label, string score, string predictedLabel) + public MultiClassClassifierMetrics Evaluate(IDataView data, string label, string score, string predictedLabel) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); @@ -521,12 +521,12 @@ public MulticlassClassifierMetrics Evaluate(IDataView data, string label, string Host.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); var overall = resultDict[MetricKinds.OverallMetrics]; - MulticlassClassifierMetrics result; + MultiClassClassifierMetrics result; using (var cursor = overall.GetRowCursor(i => true)) { var moved = cursor.MoveNext(); Host.Assert(moved); - result = new MulticlassClassifierMetrics(Host, cursor, _outputTopKAcc ?? 0); + result = new MultiClassClassifierMetrics(Host, cursor, _outputTopKAcc ?? 0); moved = cursor.MoveNext(); Host.Assert(!moved); } diff --git a/src/Microsoft.ML.Data/TrainContext.cs b/src/Microsoft.ML.Data/TrainContext.cs index a0708b0059..862615770f 100644 --- a/src/Microsoft.ML.Data/TrainContext.cs +++ b/src/Microsoft.ML.Data/TrainContext.cs @@ -391,11 +391,11 @@ internal MulticlassClassificationTrainers(MulticlassClassificationContext ctx) /// The name of the label column in . /// The name of the score column in . /// The name of the predicted label column in . - /// If given a positive value, the will be filled with + /// If given a positive value, the will be filled with /// the top-K accuracy, that is, the accuracy assuming we consider an example with the correct class within /// the top-K values as being stored "correctly." /// The evaluation results for these calibrated outputs. - public MulticlassClassifierMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, + public MultiClassClassifierMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score, string predictedLabel = DefaultColumnNames.PredictedLabel, int topK = 0) { Host.CheckValue(data, nameof(data)); @@ -424,7 +424,7 @@ public MulticlassClassifierMetrics Evaluate(IDataView data, string label = Defau /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (MulticlassClassifierMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( + public (MultiClassClassifierMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = DefaultColumnNames.Label, string stratificationColumn = null) { Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); diff --git a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs b/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs index f007ec6b95..d5f871de08 100644 --- a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs +++ b/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs @@ -129,7 +129,7 @@ public static (RegressionMetrics metrics, Transformer /// Per-fold results: metrics, models, scored datasets. - public static (MulticlassClassifierMetrics metrics, Transformer model, DataView scoredTestData)[] CrossValidate( + public static (MultiClassClassifierMetrics metrics, Transformer model, DataView scoredTestData)[] CrossValidate( this MulticlassClassificationContext context, DataView data, Estimator estimator, From ab3ddeeb672076c148af288e4f091047f4223803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Sat, 17 Nov 2018 21:47:17 +0100 Subject: [PATCH 04/10] Fix metrics types --- .../Scenarios/SentimentPredictionTests.cs | 6 +++--- .../IrisPlantClassificationTests.cs | 2 +- .../SentimentPredictionTests.cs | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index a67834cda0..9f0c2acc55 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -188,7 +188,7 @@ public void CrossValidateSentimentModelTest() Assert.True(predictions.ElementAt(1).Sentiment); } - private void ValidateBinaryMetricsSymSGD(BinaryClassificationMetrics metrics) + private void ValidateBinaryMetricsSymSGD(Microsoft.ML.Legacy.Models.BinaryClassificationMetrics metrics) { Assert.Equal(.8889, metrics.Accuracy, 4); @@ -221,7 +221,7 @@ private void ValidateBinaryMetricsSymSGD(BinaryClassificationMetrics metrics) } - private void ValidateBinaryMetricsLightGBM(BinaryClassificationMetrics metrics) + private void ValidateBinaryMetricsLightGBM(Microsoft.ML.Legacy.Models.BinaryClassificationMetrics metrics) { Assert.Equal(0.61111111111111116, metrics.Accuracy, 4); @@ -254,7 +254,7 @@ private void ValidateBinaryMetricsLightGBM(BinaryClassificationMetrics metrics) } - private void ValidateBinaryMetrics(BinaryClassificationMetrics metrics) + private void ValidateBinaryMetrics(Microsoft.ML.Legacy.Models.BinaryClassificationMetrics metrics) { Assert.Equal(0.6111, metrics.Accuracy, 4); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index f4f95243d5..ee23e45242 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -98,7 +98,7 @@ private void ComparePredictions(PredictionFunction mod Assert.Equal(0, prediction.PredictedLabels[2], 2); } - private void CompareMatrics(MultiClassClassifierEvaluator.Result metrics) + private void CompareMatrics(MultiClassClassifierMetrics metrics) { Assert.Equal(.98, metrics.AccuracyMacro); Assert.Equal(.98, metrics.AccuracyMicro, 2); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs index 820d146cc1..97aa83f509 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs @@ -151,7 +151,7 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordE Assert.Equal(1.0, (double)summary[0].Value, 1); } - private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) + private Microsoft.ML.Legacy.Models.BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) { var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true); @@ -162,7 +162,8 @@ private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataVi var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()); var metricsDic = evaluator.Evaluate(dataEval); - return BinaryClassificationMetrics.FromMetrics(env, metricsDic["OverallMetrics"], metricsDic["ConfusionMatrix"])[0]; + return Microsoft.ML.Legacy.Models.BinaryClassificationMetrics + .FromMetrics(env, metricsDic["OverallMetrics"], metricsDic["ConfusionMatrix"])[0]; } } } From 3a94abd6594c106b7bb2c177467d5520730ad71c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Mon, 19 Nov 2018 20:02:10 +0100 Subject: [PATCH 05/10] Change namespace to Microsoft.ML.Data --- .../Evaluators/Metrics/BinaryClassificationMetrics.cs | 5 ++++- .../Metrics/CalibratedBinaryClassificationMetrics.cs | 2 +- .../Evaluators/Metrics/ClusteringMetrics.cs | 2 +- .../Evaluators/Metrics/MultiClassClassifierMetrics.cs | 2 +- src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs | 2 +- .../Evaluators/Metrics/RegressionMetrics.cs | 2 +- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs index a8ceb3ce89..da6c6db497 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/BinaryClassificationMetrics.cs @@ -2,7 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -namespace Microsoft.ML.Runtime.Data +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; + +namespace Microsoft.ML.Data { /// /// Evaluation results for binary classifiers, excluding probabilistic metrics. diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs index 6a1b7593da..c4f0861224 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/CalibratedBinaryClassificationMetrics.cs @@ -5,7 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Data { /// /// Evaluation results for binary classifiers, including probabilistic metrics. diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs index e53d16d4b5..d59f8a07cf 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs @@ -5,7 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Data { /// /// The metrics generated after evaluating the clustering predictions. diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs index 0c90c1db84..4eff184abc 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs @@ -5,7 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Data { public sealed class MultiClassClassifierMetrics { diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs index 8d8a93f24d..e2cdf6213f 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs @@ -5,7 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Data { public sealed class RankerMetrics { diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs index 356e936403..5f946a2239 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/RegressionMetrics.cs @@ -5,7 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Data { public sealed class RegressionMetrics { From 61b2941c5632e2030ed7c15de258f6894a53cc99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Mon, 19 Nov 2018 20:03:22 +0100 Subject: [PATCH 06/10] Adjust metrics namespaces --- src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs | 1 + src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs | 1 + src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs | 1 + .../Evaluators/MultiClassClassifierEvaluator.cs | 1 + src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs | 1 + src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs | 1 + src/Microsoft.ML.Data/TrainContext.cs | 1 + src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs | 1 + 8 files changed, 8 insertions(+) diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs index 58c9dd07bb..9afc4bb67d 100644 --- a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; diff --git a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs index cba76448a9..6b8458dd1c 100644 --- a/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/ClusteringEvaluator.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs index de6eec79de..46972dc75d 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorStaticExtensions.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using Microsoft.ML.Data; using Microsoft.ML.Runtime.Training; using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; diff --git a/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs index 327c7f4995..d586391b39 100644 --- a/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; diff --git a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs index e859030002..a7aa5bc434 100644 --- a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs @@ -9,6 +9,7 @@ using System.Text; using System.Text.RegularExpressions; using System.Threading; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; diff --git a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs index ff862f2250..cd388cdff8 100644 --- a/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RegressionEvaluator.cs @@ -11,6 +11,7 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Data; [assembly: LoadableClass(typeof(RegressionEvaluator), typeof(RegressionEvaluator), typeof(RegressionEvaluator.Arguments), typeof(SignatureEvaluator), "Regression Evaluator", RegressionEvaluator.LoadName, "Regression")] diff --git a/src/Microsoft.ML.Data/TrainContext.cs b/src/Microsoft.ML.Data/TrainContext.cs index 862615770f..ec93921cf8 100644 --- a/src/Microsoft.ML.Data/TrainContext.cs +++ b/src/Microsoft.ML.Data/TrainContext.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Core.Data; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Transforms; diff --git a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs b/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs index d5f871de08..f52a759b74 100644 --- a/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs +++ b/src/Microsoft.ML.Data/Training/TrainingStaticExtensions.cs @@ -11,6 +11,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using Microsoft.ML.Data; namespace Microsoft.ML { From bd0495022c23ff4fcee4fd4a87b91ab5b682064d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Tue, 20 Nov 2018 18:48:25 +0100 Subject: [PATCH 07/10] XMl encode apostrophes --- src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs index d59f8a07cf..70500ed747 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/ClusteringMetrics.cs @@ -21,9 +21,9 @@ public sealed class ClusteringMetrics public double Nmi { get; } /// - /// Average Score. For the K-Means algorithm, the 'score' is the distance from the centroid to the example. + /// Average Score. For the K-Means algorithm, the 'score' is the distance from the centroid to the example. /// The average score is, therefore, a measure of proximity of the examples to cluster centroids. - /// In other words, it's the 'cluster tightness' measure. + /// In other words, it's the 'cluster tightness' measure. /// Note however, that this metric will only decrease if the number of clusters is increased, /// and in the extreme case (where each distinct example is its own cluster) it will be equal to zero. /// From ece2b6e054bb6bbc95bad5b7b4e3b2bd88fbabf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Wed, 28 Nov 2018 19:37:04 +0100 Subject: [PATCH 08/10] Use metric types --- .../RecommenderCatalog.cs | 9 ++++---- .../PermutationFeatureImportanceExtensions.cs | 21 ++++++++++--------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/Microsoft.ML.Recommender/RecommenderCatalog.cs b/src/Microsoft.ML.Recommender/RecommenderCatalog.cs index 1100b66013..2be553736f 100644 --- a/src/Microsoft.ML.Recommender/RecommenderCatalog.cs +++ b/src/Microsoft.ML.Recommender/RecommenderCatalog.cs @@ -3,11 +3,12 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Core.Data; -using Microsoft.ML.Runtime; +using Microsoft.ML.Data; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime; using Microsoft.ML.Trainers; -using System; using System.Linq; +using System; namespace Microsoft.ML { @@ -71,7 +72,7 @@ public MatrixFactorizationTrainer MatrixFactorization( /// The name of the label column in . /// The name of the score column in . /// The evaluation results for these calibrated outputs. - public RegressionEvaluator.Result Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score) + public RegressionMetrics Evaluate(IDataView data, string label = DefaultColumnNames.Label, string score = DefaultColumnNames.Score) { Host.CheckValue(data, nameof(data)); Host.CheckNonEmpty(label, nameof(label)); @@ -95,7 +96,7 @@ public RegressionEvaluator.Result Evaluate(IDataView data, string label = Defaul /// they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from /// train to the test set. /// Per-fold results: metrics, models, scored datasets. - public (RegressionEvaluator.Result metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( + public (RegressionMetrics metrics, ITransformer model, IDataView scoredTestData)[] CrossValidate( IDataView data, IEstimator estimator, int numFolds = 5, string labelColumn = DefaultColumnNames.Label, string stratificationColumn = null) { Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 5b726f2d41..07bfa4ebfc 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Transforms; @@ -51,7 +52,7 @@ public static class PermutationFeatureImportanceExtensions /// Use features weight to pre-filter features. /// Limit the number of examples to evaluate on. null means examples (up to ~ 2 bln) from input will be used. /// Array of per-feature 'contributions' to the score. - public static ImmutableArray + public static ImmutableArray PermutationFeatureImportance( this RegressionContext ctx, IPredictionTransformer model, @@ -61,7 +62,7 @@ public static ImmutableArray bool useFeatureWeightFilter = false, int? topExamples = null) { - return PermutationFeatureImportance.GetImportanceMetricsMatrix( + return PermutationFeatureImportance.GetImportanceMetricsMatrix( CatalogUtils.GetEnvironment(ctx), model, data, @@ -72,10 +73,10 @@ public static ImmutableArray topExamples); } - private static RegressionEvaluator.Result RegressionDelta( - RegressionEvaluator.Result a, RegressionEvaluator.Result b) + private static RegressionMetrics RegressionDelta( + RegressionMetrics a, RegressionMetrics b) { - return new RegressionEvaluator.Result( + return new RegressionMetrics( l1: a.L1 - b.L1, l2: a.L2 - b.L2, rms: a.Rms - b.Rms, @@ -96,7 +97,7 @@ private static RegressionEvaluator.Result RegressionDelta( /// Use features weight to pre-filter features. /// Limit the number of examples to evaluate on. null means examples (up to ~ 2 bln) from input will be used. /// Array of per-feature 'contributions' to the score. - public static ImmutableArray + public static ImmutableArray PermutationFeatureImportance( this BinaryClassificationContext ctx, IPredictionTransformer model, @@ -106,7 +107,7 @@ public static ImmutableArray bool useFeatureWeightFilter = false, int? topExamples = null) { - return PermutationFeatureImportance.GetImportanceMetricsMatrix( + return PermutationFeatureImportance.GetImportanceMetricsMatrix( CatalogUtils.GetEnvironment(ctx), model, data, @@ -117,10 +118,10 @@ public static ImmutableArray topExamples); } - private static BinaryClassifierEvaluator.Result BinaryClassifierDelta( - BinaryClassifierEvaluator.Result a, BinaryClassifierEvaluator.Result b) + private static BinaryClassificationMetrics BinaryClassifierDelta( + BinaryClassificationMetrics a, BinaryClassificationMetrics b) { - return new BinaryClassifierEvaluator.Result( + return new BinaryClassificationMetrics( auc: a.Auc - b.Auc, accuracy: a.Accuracy - b.Accuracy, positivePrecision: a.PositivePrecision - b.PositivePrecision, From ce5b4161aaa6e07182d65f36f8f1801493eedaca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Wed, 28 Nov 2018 20:48:28 +0100 Subject: [PATCH 09/10] merge submodules --- src/Native/MatrixFactorizationNative/libmf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Native/MatrixFactorizationNative/libmf b/src/Native/MatrixFactorizationNative/libmf index 1ecc365249..53a91e7e8c 160000 --- a/src/Native/MatrixFactorizationNative/libmf +++ b/src/Native/MatrixFactorizationNative/libmf @@ -1 +1 @@ -Subproject commit 1ecc365249e5cac5e72c66317a141298dc52f6e3 +Subproject commit 53a91e7e8c88463e97acfbbafb7134a6030860b3 From 33d35f46272486d597b396b4569d067ec4791ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20S=C4=99k?= Date: Wed, 28 Nov 2018 21:44:37 +0100 Subject: [PATCH 10/10] Fix descriptions --- src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs index cb19a6b630..975d4a494c 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/RankerMetrics.cs @@ -10,13 +10,13 @@ namespace Microsoft.ML.Data public sealed class RankerMetrics { /// - /// Array of normalized discounted cumulative gains where i-th element represent NDCG@i (you can also phrase it as NDCG at i). + /// Array of normalized discounted cumulative gains where i-th element represent NDCG@i. /// /// public double[] Ndcg { get; } /// - ///Array of discounted cumulative gains where i-th element represent DCG@i (you can also phrase it as DCG at i). + ///Array of discounted cumulative gains where i-th element represent DCG@i. /// Discounted Cumulative gain /// is the sum of the gains, for all the instances i, normalized by the natural logarithm of the instance + 1. /// Note that unline the Wikipedia article, ML.Net uses the natural logarithm.