From ce629ba6dbd3633d175d47858287c7ec1b7c66d2 Mon Sep 17 00:00:00 2001 From: Ilya Gyrdymov Date: Sat, 26 Oct 2019 12:12:44 +0300 Subject: [PATCH 1/3] KnnClassifierImpl.predictProbability: base unit tests added --- .../knn_classifier/knn_classifier_impl.dart | 24 +- .../knn_classifier_impl_test.dart | 205 ++++++++++++++++++ 2 files changed, 221 insertions(+), 8 deletions(-) diff --git a/lib/src/classifier/knn_classifier/knn_classifier_impl.dart b/lib/src/classifier/knn_classifier/knn_classifier_impl.dart index 706f8b58..6b9afcfc 100644 --- a/lib/src/classifier/knn_classifier/knn_classifier_impl.dart +++ b/lib/src/classifier/knn_classifier/knn_classifier_impl.dart @@ -34,7 +34,7 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier { DataFrame predict(DataFrame features) { validateTestFeatures(features, _dtype); - final labelsToProbabilities = _getLabelsToProbabilitiesMapping(features); + final labelsToProbabilities = _getLabelToProbabilityMapping(features); final labels = labelsToProbabilities.keys.toList(); final predictedOutcomes = _getProbabilityMatrix(labelsToProbabilities) .rows @@ -51,7 +51,7 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier { @override DataFrame predictProbabilities(DataFrame features) { - final labelsToProbabilities = _getLabelsToProbabilitiesMapping(features); + final labelsToProbabilities = _getLabelToProbabilityMapping(features); final probabilityMatrix = _getProbabilityMatrix(labelsToProbabilities); final header = labelsToProbabilities @@ -83,13 +83,21 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier { /// /// where each row is a classes probability distribution for the appropriate /// feature record from test feature matrix - Map> _getLabelsToProbabilitiesMapping(DataFrame features) { - final neighbours = _solver.findKNeighbours(features.toMatrix(_dtype)); + Map> _getLabelToProbabilityMapping(DataFrame features) { + final kNeighbourGroups = _solver.findKNeighbours(features.toMatrix(_dtype)); + final classLabelsAsSet = Set.from(_classLabels); - return neighbours.fold>>( + return kNeighbourGroups.fold>>( {}, (allLabelsToProbabilities, kNeighbours) { - final labelsToWeights = kNeighbours - .fold>({}, _getLabelToWeightMapping); + + final labelsToWeights = kNeighbours.fold>( + {}, (mapping, neighbour) { + if (!classLabelsAsSet.contains(neighbour.label.first)) { + throw Exception('Wrong KNN solver provided: unexpected neighbour ' + 'class label - ${neighbour.label.first}'); + } + return _updateLabelToWeightMapping(mapping, neighbour); + }); final sumOfAllWeights = labelsToWeights .values @@ -132,7 +140,7 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier { .fromColumns(probabilityVectors, dtype: _dtype); } - Map _getLabelToWeightMapping( + Map _updateLabelToWeightMapping( Map labelToWeightMapping, Neighbour neighbour, ) { diff --git a/test/classifier/knn_classifier/knn_classifier_impl_test.dart b/test/classifier/knn_classifier/knn_classifier_impl_test.dart index 5510bca5..312f6c42 100644 --- a/test/classifier/knn_classifier/knn_classifier_impl_test.dart +++ b/test/classifier/knn_classifier/knn_classifier_impl_test.dart @@ -2,6 +2,7 @@ import 'package:ml_algo/src/classifier/knn_classifier/knn_classifier_impl.dart'; import 'package:ml_algo/src/knn_solver/neigbour.dart'; import 'package:ml_dataframe/ml_dataframe.dart'; import 'package:ml_linalg/linalg.dart'; +import 'package:ml_tech/unit_testing/matchers/iterable_2d_almost_equal_to.dart'; import 'package:mockito/mockito.dart'; import 'package:test/test.dart'; @@ -241,5 +242,209 @@ void main() { expect(actual.rows, equals(expectedOutcomes)); }); }); + + group('predictProbability', () { + final solverMock = KnnSolverMock(); + final kernelMock = KernelMock(); + + setUp(() => when(kernelMock.getWeightByDistance(any, any)).thenReturn(1)); + + tearDown(() { + reset(solverMock); + reset(kernelMock); + }); + + test('should return probability distribution of classes for each feature ' + 'row, probability of absent class label should be 0.0', () { + final classLabels = [1, 2, 3]; + final classifier = KnnClassifierImpl( + 'target', + classLabels, + kernelMock, + solverMock, + DType.float32, + ); + + final testFeatureMatrix = Matrix.fromList( + [ + [10, 10, 10, 10], + [20, 20, 20, 20], + [30, 30, 30, 30], + ], + ); + + final testFeatures = DataFrame.fromMatrix(testFeatureMatrix); + + final mockedNeighbours = [ + [ + Neighbour(1, Vector.fromList([1])), + Neighbour(20, Vector.fromList([2])), + Neighbour(21, Vector.fromList([1])), + ], + [ + Neighbour(33, Vector.fromList([1])), + Neighbour(44, Vector.fromList([3])), + Neighbour(93, Vector.fromList([3])), + ], + [ + Neighbour(-1, Vector.fromList([2])), + Neighbour(-30, Vector.fromList([2])), + Neighbour(-40, Vector.fromList([1])), + ], + ]; + + when(kernelMock.getWeightByDistance(1)).thenReturn(10); + when(kernelMock.getWeightByDistance(20)).thenReturn(15); + when(kernelMock.getWeightByDistance(21)).thenReturn(10); + + when(kernelMock.getWeightByDistance(33)).thenReturn(11); + when(kernelMock.getWeightByDistance(44)).thenReturn(15); + when(kernelMock.getWeightByDistance(93)).thenReturn(15); + + when(kernelMock.getWeightByDistance(-1)).thenReturn(5); + when(kernelMock.getWeightByDistance(-30)).thenReturn(5); + when(kernelMock.getWeightByDistance(-40)).thenReturn(1); + + when(solverMock.findKNeighbours(testFeatureMatrix)) + .thenReturn(mockedNeighbours); + + final actual = classifier.predictProbabilities(testFeatures); + + final expectedOutcomes = [ + [ 20 / 35, 15 / 35, 0.0 ], + [ 11 / 41, 0.0, 30 / 41 ], + [ 1 / 11, 10 / 11, 0.0 ], + ]; + + expect(actual.rows, iterable2dAlmostEqualTo(expectedOutcomes)); + }); + + test('should return a dataframe with a header, containing proper column ' + 'names', () { + final classLabels = [1, 2, 3]; + + final classifier = KnnClassifierImpl( + 'target', + classLabels, + kernelMock, + solverMock, + DType.float32, + ); + + final testFeatureMatrix = Matrix.fromList( + [ + [10, 10, 10, 10], + ], + ); + + final testFeatures = DataFrame.fromMatrix(testFeatureMatrix); + + final mockedNeighbours = [ + [ + Neighbour(1, Vector.fromList([1])), + ], + ]; + + when(solverMock.findKNeighbours(testFeatureMatrix)) + .thenReturn(mockedNeighbours); + + final actual = classifier.predictProbabilities(testFeatures); + + expect(actual.header, + equals(['Class label 1', 'Class label 2', 'Class label 3'])); + }); + + test('should consider initial order of column labels', () { + final firstClassLabel = 1; + final secondClassLabel = 2; + final thirdClassLabel = 3; + + final classLabels = [thirdClassLabel, firstClassLabel, secondClassLabel]; + + final classifier = KnnClassifierImpl( + 'target', + classLabels, + kernelMock, + solverMock, + DType.float32, + ); + + final testFeatureMatrix = Matrix.fromList( + [ + [10, 10, 10, 10], + ], + ); + + final testFeatures = DataFrame.fromMatrix(testFeatureMatrix); + + final mockedNeighbours = [ + [ + Neighbour(1, Vector.fromList([firstClassLabel])), + Neighbour(10, Vector.fromList([secondClassLabel])), + Neighbour(20, Vector.fromList([thirdClassLabel])), + ], + ]; + + when(solverMock.findKNeighbours(testFeatureMatrix)) + .thenReturn(mockedNeighbours); + + final firstClassWeight = 100; + final secondClassWeight = 90; + final thirdClassWeight = 70; + + when(kernelMock.getWeightByDistance(1)).thenReturn(firstClassWeight); + when(kernelMock.getWeightByDistance(10)).thenReturn(secondClassWeight); + when(kernelMock.getWeightByDistance(20)).thenReturn(thirdClassWeight); + + final actual = classifier.predictProbabilities(testFeatures); + final predictedProbabilities = actual.rows; + + expect(actual.header, + equals(['Class label 3', 'Class label 1', 'Class label 2'])); + expect(predictedProbabilities, iterable2dAlmostEqualTo([ + [thirdClassWeight / 260, firstClassWeight / 260, secondClassWeight / 260], + ])); + }); + + test('should throw an exception if provided knn solver learned on wrong ' + 'class labels', () { + final firstClassLabel = 1; + final secondClassLabel = 2; + final thirdClassLabel = 3; + + final unexpectedClassLabel = 100; + + final classLabels = [thirdClassLabel, firstClassLabel, secondClassLabel]; + + final classifier = KnnClassifierImpl( + 'target', + classLabels, + kernelMock, + solverMock, + DType.float32, + ); + + final testFeatureMatrix = Matrix.fromList( + [ + [10, 10, 10, 10], + ], + ); + + final testFeatures = DataFrame.fromMatrix(testFeatureMatrix); + + final mockedNeighbours = [ + [ + Neighbour(20, Vector.fromList([unexpectedClassLabel])), + ], + ]; + + when(solverMock.findKNeighbours(testFeatureMatrix)) + .thenReturn(mockedNeighbours); + + final actual = () => classifier.predictProbabilities(testFeatures); + + expect(actual, throwsException); + }); + }); }); } From 88b846ad8d1d8b9bf082ef9510f6302d972e85e7 Mon Sep 17 00:00:00 2001 From: Ilya Gyrdymov Date: Sat, 26 Oct 2019 12:25:35 +0300 Subject: [PATCH 2/3] KnnClassifierImpl.predictProbabilities: rest of unit tests added, version updated and changelog record added --- CHANGELOG.md | 3 + pubspec.yaml | 2 +- .../knn_classifier_impl_test.dart | 67 ++++++++++++++++--- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 449bc0b1..99680fa0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 13.3.3 +- `KnnClassifierImpl`: unit tests for `predictProbability` method added + ## 13.3.2 - `KnnClassifier`: classifier instantiating refactored diff --git a/pubspec.yaml b/pubspec.yaml index 166c2e83..b136d8de 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,6 +1,6 @@ name: ml_algo description: Machine learning algorithms written in native dart -version: 13.3.2 +version: 13.3.3 author: Ilia Gyrdymov homepage: https://github.com/gyrdym/ml_algo diff --git a/test/classifier/knn_classifier/knn_classifier_impl_test.dart b/test/classifier/knn_classifier/knn_classifier_impl_test.dart index 312f6c42..210090fa 100644 --- a/test/classifier/knn_classifier/knn_classifier_impl_test.dart +++ b/test/classifier/knn_classifier/knn_classifier_impl_test.dart @@ -255,7 +255,7 @@ void main() { }); test('should return probability distribution of classes for each feature ' - 'row, probability of absent class label should be 0.0', () { + 'row', () { final classLabels = [1, 2, 3]; final classifier = KnnClassifierImpl( 'target', @@ -279,17 +279,17 @@ void main() { [ Neighbour(1, Vector.fromList([1])), Neighbour(20, Vector.fromList([2])), - Neighbour(21, Vector.fromList([1])), + Neighbour(21, Vector.fromList([3])), ], [ Neighbour(33, Vector.fromList([1])), Neighbour(44, Vector.fromList([3])), - Neighbour(93, Vector.fromList([3])), + Neighbour(93, Vector.fromList([2])), ], [ Neighbour(-1, Vector.fromList([2])), - Neighbour(-30, Vector.fromList([2])), - Neighbour(-40, Vector.fromList([1])), + Neighbour(-30, Vector.fromList([1])), + Neighbour(-40, Vector.fromList([3])), ], ]; @@ -311,14 +311,65 @@ void main() { final actual = classifier.predictProbabilities(testFeatures); final expectedOutcomes = [ - [ 20 / 35, 15 / 35, 0.0 ], - [ 11 / 41, 0.0, 30 / 41 ], - [ 1 / 11, 10 / 11, 0.0 ], + [ 10 / 35, 15 / 35, 10 / 35 ], + [ 11 / 41, 15 / 41, 15 / 41 ], + [ 5 / 11, 5 / 11, 1 / 11 ], ]; expect(actual.rows, iterable2dAlmostEqualTo(expectedOutcomes)); }); + test('should return probability distribution of classes where ' + 'probabilities of absent class labels are 0.0', () { + final classLabels = [1, 2, 3]; + final classifier = KnnClassifierImpl( + 'target', + classLabels, + kernelMock, + solverMock, + DType.float32, + ); + + final testFeatureMatrix = Matrix.fromList( + [ + [10, 10, 10, 10], + [20, 20, 20, 20], + [30, 30, 30, 30], + ], + ); + + final testFeatures = DataFrame.fromMatrix(testFeatureMatrix); + + final mockedNeighbours = [ + [ + Neighbour(1, Vector.fromList([2])), + Neighbour(20, Vector.fromList([2])), + Neighbour(21, Vector.fromList([1])), + ], + [ + Neighbour(1, Vector.fromList([3])), + Neighbour(20, Vector.fromList([3])), + Neighbour(21, Vector.fromList([3])), + ], + ]; + + when(kernelMock.getWeightByDistance(1)).thenReturn(10); + when(kernelMock.getWeightByDistance(20)).thenReturn(15); + when(kernelMock.getWeightByDistance(21)).thenReturn(10); + + when(solverMock.findKNeighbours(testFeatureMatrix)) + .thenReturn(mockedNeighbours); + + final actual = classifier.predictProbabilities(testFeatures); + + final expectedProbabilities = [ + [ 10 / 35, 25 / 35, 0.0 ], + [ 0.0, 0.0, 1.0 ], + ]; + + expect(actual.rows, iterable2dAlmostEqualTo(expectedProbabilities)); + }); + test('should return a dataframe with a header, containing proper column ' 'names', () { final classLabels = [1, 2, 3]; From 4bfe222b2e4cfb0f42b81cc8d0b74bf23a8ebf58 Mon Sep 17 00:00:00 2001 From: Ilya Gyrdymov Date: Sat, 26 Oct 2019 12:34:06 +0300 Subject: [PATCH 3/3] KnnClassifierImpl unit tests: expectedOutcomes renamed to expectedProbabilities --- .../classifier/knn_classifier/knn_classifier_impl_test.dart | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/classifier/knn_classifier/knn_classifier_impl_test.dart b/test/classifier/knn_classifier/knn_classifier_impl_test.dart index 210090fa..126db5c1 100644 --- a/test/classifier/knn_classifier/knn_classifier_impl_test.dart +++ b/test/classifier/knn_classifier/knn_classifier_impl_test.dart @@ -310,13 +310,13 @@ void main() { final actual = classifier.predictProbabilities(testFeatures); - final expectedOutcomes = [ + final expectedProbabilities = [ [ 10 / 35, 15 / 35, 10 / 35 ], [ 11 / 41, 15 / 41, 15 / 41 ], - [ 5 / 11, 5 / 11, 1 / 11 ], + [ 5 / 11, 5 / 11, 1 / 11 ], ]; - expect(actual.rows, iterable2dAlmostEqualTo(expectedOutcomes)); + expect(actual.rows, iterable2dAlmostEqualTo(expectedProbabilities)); }); test('should return probability distribution of classes where '