Skip to content

Commit

Permalink
Merge 4bfe222 into 3624f04
Browse files Browse the repository at this point in the history
  • Loading branch information
gyrdym committed Oct 26, 2019
2 parents 3624f04 + 4bfe222 commit bbdd2d9
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 9 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog

## 13.3.3
- `KnnClassifierImpl`: unit tests for `predictProbability` method added

## 13.3.2
- `KnnClassifier`: classifier instantiating refactored

Expand Down
24 changes: 16 additions & 8 deletions lib/src/classifier/knn_classifier/knn_classifier_impl.dart
Expand Up @@ -34,7 +34,7 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier {
DataFrame predict(DataFrame features) {
validateTestFeatures(features, _dtype);

final labelsToProbabilities = _getLabelsToProbabilitiesMapping(features);
final labelsToProbabilities = _getLabelToProbabilityMapping(features);
final labels = labelsToProbabilities.keys.toList();
final predictedOutcomes = _getProbabilityMatrix(labelsToProbabilities)
.rows
Expand All @@ -51,7 +51,7 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier {

@override
DataFrame predictProbabilities(DataFrame features) {
final labelsToProbabilities = _getLabelsToProbabilitiesMapping(features);
final labelsToProbabilities = _getLabelToProbabilityMapping(features);
final probabilityMatrix = _getProbabilityMatrix(labelsToProbabilities);

final header = labelsToProbabilities
Expand Down Expand Up @@ -83,13 +83,21 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier {
///
/// where each row is a classes probability distribution for the appropriate
/// feature record from test feature matrix
Map<num, List<num>> _getLabelsToProbabilitiesMapping(DataFrame features) {
final neighbours = _solver.findKNeighbours(features.toMatrix(_dtype));
Map<num, List<num>> _getLabelToProbabilityMapping(DataFrame features) {
final kNeighbourGroups = _solver.findKNeighbours(features.toMatrix(_dtype));
final classLabelsAsSet = Set<num>.from(_classLabels);

return neighbours.fold<Map<num, List<num>>>(
return kNeighbourGroups.fold<Map<num, List<num>>>(
{}, (allLabelsToProbabilities, kNeighbours) {
final labelsToWeights = kNeighbours
.fold<Map<num, num>>({}, _getLabelToWeightMapping);

final labelsToWeights = kNeighbours.fold<Map<num, num>>(
{}, (mapping, neighbour) {
if (!classLabelsAsSet.contains(neighbour.label.first)) {
throw Exception('Wrong KNN solver provided: unexpected neighbour '
'class label - ${neighbour.label.first}');
}
return _updateLabelToWeightMapping(mapping, neighbour);
});

final sumOfAllWeights = labelsToWeights
.values
Expand Down Expand Up @@ -132,7 +140,7 @@ class KnnClassifierImpl with AssessablePredictorMixin implements KnnClassifier {
.fromColumns(probabilityVectors, dtype: _dtype);
}

Map<num, num> _getLabelToWeightMapping(
Map<num, num> _updateLabelToWeightMapping(
Map<num, num> labelToWeightMapping,
Neighbour<Vector> neighbour,
) {
Expand Down
2 changes: 1 addition & 1 deletion pubspec.yaml
@@ -1,6 +1,6 @@
name: ml_algo
description: Machine learning algorithms written in native dart
version: 13.3.2
version: 13.3.3
author: Ilia Gyrdymov <ilgyrd@gmail.com>
homepage: https://github.com/gyrdym/ml_algo

Expand Down
256 changes: 256 additions & 0 deletions test/classifier/knn_classifier/knn_classifier_impl_test.dart
Expand Up @@ -2,6 +2,7 @@ import 'package:ml_algo/src/classifier/knn_classifier/knn_classifier_impl.dart';
import 'package:ml_algo/src/knn_solver/neigbour.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/linalg.dart';
import 'package:ml_tech/unit_testing/matchers/iterable_2d_almost_equal_to.dart';
import 'package:mockito/mockito.dart';
import 'package:test/test.dart';

Expand Down Expand Up @@ -241,5 +242,260 @@ void main() {
expect(actual.rows, equals(expectedOutcomes));
});
});

group('predictProbability', () {
final solverMock = KnnSolverMock();
final kernelMock = KernelMock();

setUp(() => when(kernelMock.getWeightByDistance(any, any)).thenReturn(1));

tearDown(() {
reset(solverMock);
reset(kernelMock);
});

test('should return probability distribution of classes for each feature '
'row', () {
final classLabels = [1, 2, 3];
final classifier = KnnClassifierImpl(
'target',
classLabels,
kernelMock,
solverMock,
DType.float32,
);

final testFeatureMatrix = Matrix.fromList(
[
[10, 10, 10, 10],
[20, 20, 20, 20],
[30, 30, 30, 30],
],
);

final testFeatures = DataFrame.fromMatrix(testFeatureMatrix);

final mockedNeighbours = [
[
Neighbour(1, Vector.fromList([1])),
Neighbour(20, Vector.fromList([2])),
Neighbour(21, Vector.fromList([3])),
],
[
Neighbour(33, Vector.fromList([1])),
Neighbour(44, Vector.fromList([3])),
Neighbour(93, Vector.fromList([2])),
],
[
Neighbour(-1, Vector.fromList([2])),
Neighbour(-30, Vector.fromList([1])),
Neighbour(-40, Vector.fromList([3])),
],
];

when(kernelMock.getWeightByDistance(1)).thenReturn(10);
when(kernelMock.getWeightByDistance(20)).thenReturn(15);
when(kernelMock.getWeightByDistance(21)).thenReturn(10);

when(kernelMock.getWeightByDistance(33)).thenReturn(11);
when(kernelMock.getWeightByDistance(44)).thenReturn(15);
when(kernelMock.getWeightByDistance(93)).thenReturn(15);

when(kernelMock.getWeightByDistance(-1)).thenReturn(5);
when(kernelMock.getWeightByDistance(-30)).thenReturn(5);
when(kernelMock.getWeightByDistance(-40)).thenReturn(1);

when(solverMock.findKNeighbours(testFeatureMatrix))
.thenReturn(mockedNeighbours);

final actual = classifier.predictProbabilities(testFeatures);

final expectedProbabilities = [
[ 10 / 35, 15 / 35, 10 / 35 ],
[ 11 / 41, 15 / 41, 15 / 41 ],
[ 5 / 11, 5 / 11, 1 / 11 ],
];

expect(actual.rows, iterable2dAlmostEqualTo(expectedProbabilities));
});

test('should return probability distribution of classes where '
'probabilities of absent class labels are 0.0', () {
final classLabels = [1, 2, 3];
final classifier = KnnClassifierImpl(
'target',
classLabels,
kernelMock,
solverMock,
DType.float32,
);

final testFeatureMatrix = Matrix.fromList(
[
[10, 10, 10, 10],
[20, 20, 20, 20],
[30, 30, 30, 30],
],
);

final testFeatures = DataFrame.fromMatrix(testFeatureMatrix);

final mockedNeighbours = [
[
Neighbour(1, Vector.fromList([2])),
Neighbour(20, Vector.fromList([2])),
Neighbour(21, Vector.fromList([1])),
],
[
Neighbour(1, Vector.fromList([3])),
Neighbour(20, Vector.fromList([3])),
Neighbour(21, Vector.fromList([3])),
],
];

when(kernelMock.getWeightByDistance(1)).thenReturn(10);
when(kernelMock.getWeightByDistance(20)).thenReturn(15);
when(kernelMock.getWeightByDistance(21)).thenReturn(10);

when(solverMock.findKNeighbours(testFeatureMatrix))
.thenReturn(mockedNeighbours);

final actual = classifier.predictProbabilities(testFeatures);

final expectedProbabilities = [
[ 10 / 35, 25 / 35, 0.0 ],
[ 0.0, 0.0, 1.0 ],
];

expect(actual.rows, iterable2dAlmostEqualTo(expectedProbabilities));
});

test('should return a dataframe with a header, containing proper column '
'names', () {
final classLabels = [1, 2, 3];

final classifier = KnnClassifierImpl(
'target',
classLabels,
kernelMock,
solverMock,
DType.float32,
);

final testFeatureMatrix = Matrix.fromList(
[
[10, 10, 10, 10],
],
);

final testFeatures = DataFrame.fromMatrix(testFeatureMatrix);

final mockedNeighbours = [
[
Neighbour(1, Vector.fromList([1])),
],
];

when(solverMock.findKNeighbours(testFeatureMatrix))
.thenReturn(mockedNeighbours);

final actual = classifier.predictProbabilities(testFeatures);

expect(actual.header,
equals(['Class label 1', 'Class label 2', 'Class label 3']));
});

test('should consider initial order of column labels', () {
final firstClassLabel = 1;
final secondClassLabel = 2;
final thirdClassLabel = 3;

final classLabels = [thirdClassLabel, firstClassLabel, secondClassLabel];

final classifier = KnnClassifierImpl(
'target',
classLabels,
kernelMock,
solverMock,
DType.float32,
);

final testFeatureMatrix = Matrix.fromList(
[
[10, 10, 10, 10],
],
);

final testFeatures = DataFrame.fromMatrix(testFeatureMatrix);

final mockedNeighbours = [
[
Neighbour(1, Vector.fromList([firstClassLabel])),
Neighbour(10, Vector.fromList([secondClassLabel])),
Neighbour(20, Vector.fromList([thirdClassLabel])),
],
];

when(solverMock.findKNeighbours(testFeatureMatrix))
.thenReturn(mockedNeighbours);

final firstClassWeight = 100;
final secondClassWeight = 90;
final thirdClassWeight = 70;

when(kernelMock.getWeightByDistance(1)).thenReturn(firstClassWeight);
when(kernelMock.getWeightByDistance(10)).thenReturn(secondClassWeight);
when(kernelMock.getWeightByDistance(20)).thenReturn(thirdClassWeight);

final actual = classifier.predictProbabilities(testFeatures);
final predictedProbabilities = actual.rows;

expect(actual.header,
equals(['Class label 3', 'Class label 1', 'Class label 2']));
expect(predictedProbabilities, iterable2dAlmostEqualTo([
[thirdClassWeight / 260, firstClassWeight / 260, secondClassWeight / 260],
]));
});

test('should throw an exception if provided knn solver learned on wrong '
'class labels', () {
final firstClassLabel = 1;
final secondClassLabel = 2;
final thirdClassLabel = 3;

final unexpectedClassLabel = 100;

final classLabels = [thirdClassLabel, firstClassLabel, secondClassLabel];

final classifier = KnnClassifierImpl(
'target',
classLabels,
kernelMock,
solverMock,
DType.float32,
);

final testFeatureMatrix = Matrix.fromList(
[
[10, 10, 10, 10],
],
);

final testFeatures = DataFrame.fromMatrix(testFeatureMatrix);

final mockedNeighbours = [
[
Neighbour(20, Vector.fromList([unexpectedClassLabel])),
],
];

when(solverMock.findKNeighbours(testFeatureMatrix))
.thenReturn(mockedNeighbours);

final actual = () => classifier.predictProbabilities(testFeatures);

expect(actual, throwsException);
});
});
});
}

0 comments on commit bbdd2d9

Please sign in to comment.