diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e92aa27..0860c7ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 15.3.0 +- RSS metric added + ## 15.2.4 - Documentation for classification metrics improved diff --git a/lib/src/helpers/validate_matrix_columns.dart b/lib/src/helpers/validate_matrix_columns.dart new file mode 100644 index 00000000..e4c2b4e4 --- /dev/null +++ b/lib/src/helpers/validate_matrix_columns.dart @@ -0,0 +1,13 @@ +import 'package:ml_algo/src/common/exception/matrix_column_exception.dart'; +import 'package:ml_linalg/matrix.dart'; + +void validateMatrixColumns(Iterable matrices) { + final firstInvalidMatrix = matrices + .firstWhere((matrix) => matrix.columnsNum != 1, orElse: () => null); + + if (firstInvalidMatrix == null) { + return; + } + + throw MatrixColumnException(firstInvalidMatrix); +} diff --git a/lib/src/metric/metric_type.dart b/lib/src/metric/metric_type.dart index 669e23ef..fa977d36 100644 --- a/lib/src/metric/metric_type.dart +++ b/lib/src/metric/metric_type.dart @@ -1,7 +1,5 @@ /// Metrics for measuring the quality of the prediction. enum MetricType { - /// - /// /// Mean percentage absolute error (MAPE), a regression metric. The formula /// is: /// @@ -9,7 +7,7 @@ enum MetricType { /// ![{\mbox{Score}}={\frac{1}{n}}\sum_{{t=1}}^{n}\left|{\frac{y_{t}-\widehat{y}_{t}}{y_{t}}}\right|](https://latex.codecogs.com/gif.latex?%7B%5Cmbox%7BScore%7D%7D%3D%7B%5Cfrac%7B1%7D%7Bn%7D%7D%5Csum_%7B%7Bt%3D1%7D%7D%5E%7Bn%7D%5Cleft%7C%7B%5Cfrac%7By_%7Bt%7D-%5Cwidehat%7By%7D_%7Bt%7D%7D%7By_%7Bt%7D%7D%7D%5Cright%7C) /// /// - /// where y - original value, y with hat - predicted one + /// where `y` - original value, `y` with hat - predicted one /// /// /// The less the score produced by the metric, the better the prediction's @@ -19,9 +17,7 @@ enum MetricType { /// can produce scores which are greater than 1. mape, - /// - /// - /// Root mean squared error (RMSE), a regression metric. The formula is: + /// Root mean squared error (RMSE), a regression metric. The formula is /// /// /// ![{\mbox{Score}}=\sqrt{\frac{1}{n}\sum_{{t=1}}^{n}({\widehat{y}_{t} - y_{t}})^2}](https://latex.codecogs.com/gif.latex?%7B%5Cmbox%7BScore%7D%7D%3D%5Csqrt%7B%5Cfrac%7B1%7D%7Bn%7D%5Csum_%7B%7Bt%3D1%7D%7D%5E%7Bn%7D%28%7B%5Cwidehat%7By%7D_%7Bt%7D%20-%20y_%7Bt%7D%7D%29%5E2%7D) @@ -35,8 +31,15 @@ enum MetricType { /// scores within the range [0, +Infinity] rmse, + /// Residual sum of squares (RSS), a regression metric. The formula is + /// + /// ![{\mbox{Score}}=\sum_{t=1}^{n}(y_{t} - \widehat{y}_{t})^{2}](https://latex.codecogs.com/gif.latex?%7B%5Cmbox%7BScore%7D%7D%3D%5Csum_%7Bt%3D1%7D%5E%7Bn%7D%28y_%7Bt%7D%20-%20%5Cwidehat%7By%7D_%7Bt%7D%29%5E%7B2%7D) /// + /// where `n` is a total amount of labels, `y` is an original value, `y` with + /// hat - predicted one /// + rss, + /// A classification metric. The formula is /// /// @@ -51,17 +54,15 @@ enum MetricType { /// quality is. The metric produces scores within the range [0, 1] accuracy, - /// - /// /// A classification metric. The formula for a single-class problem is /// /// /// ![{\mbox{Score}}=\frac{TP}{TP + FP}](https://latex.codecogs.com/gif.latex?%7B%5Cmbox%7BScore%7D%7D%3D%5Cfrac%7BTP%7D%7BTP%20+%20FP%7D) /// /// - /// where TP is a number of correctly predicted positive labels (true positive), - /// FP - a number of incorrectly predicted positive labels (false positive). In - /// other words, TP + FP is a number of all the labels predicted to be positive + /// where `TP` is a number of correctly predicted positive labels (true positive), + /// `FP` - a number of incorrectly predicted positive labels (false positive). In + /// other words, `TP + FP` is a number of all the labels predicted to be positive /// /// The formula for a multi-class problem is /// @@ -76,17 +77,15 @@ enum MetricType { /// range [0, 1] precision, - /// - /// /// A classification metric. The formula for a single-class problem is /// /// /// ![{\mbox{Score}}=\frac{TP}{TP + FN}](https://latex.codecogs.com/gif.latex?%7B%5Cmbox%7BScore%7D%7D%3D%5Cfrac%7BTP%7D%7BTP%20+%20FN%7D) /// /// - /// where TP is a number of correctly predicted positive labels (true positive), - /// FN - a number of incorrectly predicted negative labels (false negative). In - /// other words, TP + FN is a total amount of positive labels for a class in + /// where `TP` is a number of correctly predicted positive labels (true positive), + /// `FN` - a number of incorrectly predicted negative labels (false negative). In + /// other words, `TP + FN` is a total amount of positive labels for a class in /// the given data /// /// The formula for a multi-class problem is diff --git a/lib/src/metric/regression/mape.dart b/lib/src/metric/regression/mape.dart index 150df299..bc5ce15d 100644 --- a/lib/src/metric/regression/mape.dart +++ b/lib/src/metric/regression/mape.dart @@ -1,4 +1,4 @@ -import 'package:ml_algo/src/common/exception/matrix_column_exception.dart'; +import 'package:ml_algo/src/helpers/validate_matrix_columns.dart'; import 'package:ml_algo/src/metric/metric.dart'; import 'package:ml_linalg/linalg.dart'; @@ -7,16 +7,12 @@ class MapeMetric implements Metric { @override double getScore(Matrix predictedLabels, Matrix originalLabels) { - if (predictedLabels.columnsNum != 1) { - throw MatrixColumnException(predictedLabels); - } + validateMatrixColumns([predictedLabels, originalLabels]); - if (originalLabels.columnsNum != 1) { - throw MatrixColumnException(originalLabels); - } - - final predicted = predictedLabels.getColumn(0); - final original = originalLabels.getColumn(0); + final predicted = predictedLabels + .toVector(); + final original = originalLabels + .toVector(); return ((original - predicted) / original) .abs() diff --git a/lib/src/metric/regression/rmse.dart b/lib/src/metric/regression/rmse.dart index fd9746de..7878ecc0 100644 --- a/lib/src/metric/regression/rmse.dart +++ b/lib/src/metric/regression/rmse.dart @@ -1,5 +1,6 @@ import 'dart:math' as math; +import 'package:ml_algo/src/helpers/validate_matrix_columns.dart'; import 'package:ml_algo/src/metric/metric.dart'; import 'package:ml_linalg/linalg.dart'; @@ -8,16 +9,14 @@ class RmseMetric implements Metric { @override double getScore(Matrix predictedLabels, Matrix origLabels) { - if (predictedLabels.columnsNum != 1 || origLabels.columnsNum != 1) { - throw Exception('Both predicted labels and original labels have to be ' - 'a matrix-column'); - } + validateMatrixColumns([predictedLabels, origLabels]); final predicted = predictedLabels .getColumn(0); final original = origLabels .getColumn(0); - return math.sqrt(((predicted - original).pow(2)).mean()); + return math + .sqrt(((predicted - original).pow(2)).mean()); } } diff --git a/lib/src/metric/regression/rss.dart b/lib/src/metric/regression/rss.dart new file mode 100644 index 00000000..7c3a7d5c --- /dev/null +++ b/lib/src/metric/regression/rss.dart @@ -0,0 +1,21 @@ +import 'package:ml_algo/src/helpers/validate_matrix_columns.dart'; +import 'package:ml_algo/src/metric/metric.dart'; +import 'package:ml_linalg/matrix.dart'; + +class RssMetric implements Metric { + const RssMetric(); + + @override + double getScore(Matrix predictedLabels, Matrix origLabels) { + validateMatrixColumns([predictedLabels, origLabels]); + + final predicted = predictedLabels + .toVector(); + final original = origLabels + .toVector(); + + return (predicted - original) + .pow(2) + .sum(); + } +} diff --git a/pubspec.yaml b/pubspec.yaml index c7a07b47..85122acd 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,6 +1,6 @@ name: ml_algo description: Machine learning algorithms, Machine learning models performance evaluation functionality -version: 15.2.4 +version: 15.3.0 homepage: https://github.com/gyrdym/ml_algo environment: diff --git a/test/metric/regression/rss_test.dart b/test/metric/regression/rss_test.dart new file mode 100644 index 00000000..30b35e41 --- /dev/null +++ b/test/metric/regression/rss_test.dart @@ -0,0 +1,46 @@ +import 'package:ml_algo/src/common/exception/matrix_column_exception.dart'; +import 'package:ml_algo/src/metric/regression/rss.dart'; +import 'package:ml_linalg/matrix.dart'; +import 'package:test/test.dart'; + +void main() { + group('RssMetric', () { + const metric = RssMetric(); + final predictedLabels = Matrix.column([12, 18, 12, 90, 78]); + final originalLabels = Matrix.column([10, 20 , 30, 60, 70]); + + test('should throw an error if predicted labels matrix\'s columns count ' + 'is empty', () { + final actual = () => metric.getScore(Matrix.empty(), originalLabels); + + expect(actual, throwsA(isA())); + }); + + test('should throw an error if predicted labels matrix\'s columns count ' + 'is greater than 1', () { + final actual = () => metric.getScore(Matrix.row([1, 2]), originalLabels); + + expect(actual, throwsA(isA())); + }); + + test('should throw an error if original labels matrix\'s columns count ' + 'is empty', () { + final actual = () => metric.getScore(predictedLabels, Matrix.empty()); + + expect(actual, throwsA(isA())); + }); + + test('should throw an error if original labels matrix\'s columns count ' + 'is greater than 1', () { + final actual = () => metric.getScore(predictedLabels, Matrix.row([1, 2])); + + expect(actual, throwsA(isA())); + }); + + test('should count score', () { + final actual = metric.getScore(predictedLabels, originalLabels); + + expect(actual, 1296); + }); + }); +}