Skip to content

Commit

Permalink
Merge pull request #98 from gyrdym/weighted-knn
Browse files Browse the repository at this point in the history
Weighted knn regression
  • Loading branch information
gyrdym committed Apr 20, 2019
2 parents 29fa449 + 34e9da4 commit 2d92f4c
Show file tree
Hide file tree
Showing 16 changed files with 371 additions and 27 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,13 @@
# Changelog

## 10.3.0
- kernels added:
- uniform
- epanechnikov
- cosine
- gaussian
- `NoNParametricRegressor.nearestNeighbour`: added possibility to specify the kernel function

## 10.2.1
- test coverage restored

Expand Down
2 changes: 0 additions & 2 deletions README.md
Expand Up @@ -32,8 +32,6 @@ the lib, please, do not use it in a browser.

## The library's structure

To provide main purposes of machine learning, the library exposes the following classes:

- [CrossValidator](https://github.com/gyrdym/ml_algo/blob/master/lib/src/model_selection/cross_validator/cross_validator.dart). Factory, that creates
instances of a cross validator. In a few words, this entity allows researchers to fit different [hyperparameters](https://en.wikipedia.org/wiki/Hyperparameter_(machine_learning)) of machine learning
algorithms, assessing prediction quality on different parts of a dataset.
Expand Down
51 changes: 51 additions & 0 deletions benchmark/knn_regression.dart
@@ -0,0 +1,51 @@
// 5.7 sec
import 'dart:async';

import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/ml_algo.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

const observationsNum = 500;
const featuresNum = 20;

class KnnRegressorBenchmark extends BenchmarkBase {
KnnRegressorBenchmark() : super('KNN regression benchmark');

Matrix features;
Matrix testFeatures;
Matrix labels;
Matrix testLabels;
NoNParametricRegressor regressor;


static void main() {
KnnRegressorBenchmark().report();
}

@override
void run() {
regressor.predict(testFeatures);
}

@override
void setup() {
regressor = NoNParametricRegressor.nearestNeighbor(k: 7);

features = Matrix.fromRows(List.generate(observationsNum * 2,
(i) => Vector.randomFilled(featuresNum)));
labels = Matrix.fromColumns([Vector.randomFilled(observationsNum * 2)]);

testFeatures = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(featuresNum)));
testLabels = Matrix.fromColumns([Vector.randomFilled(observationsNum)]);

regressor.fit(features, labels);
}

void tearDown() {}
}

Future main() async {
KnnRegressorBenchmark.main();
}
1 change: 1 addition & 0 deletions lib/ml_algo.dart
@@ -1,5 +1,6 @@
library ml_algo;

export 'package:ml_algo/src/algorithms/knn/kernel_type.dart';
export 'package:ml_algo/src/classifier/classifier.dart';
export 'package:ml_algo/src/classifier/linear_classifier.dart';
export 'package:ml_algo/src/metric/classification/type.dart';
Expand Down
12 changes: 12 additions & 0 deletions lib/src/algorithms/knn/kernel.dart
@@ -0,0 +1,12 @@
import 'dart:math' as math;

typedef KernelFn = double Function(double u);

double uniformKernel(double u) => 1;

double epanechnikovKernel(double u) => 0.75 * (1 - u * u);

double cosineKernel(double u) => math.pi / 4 * math.cos(math.pi / 2 * u);

double gaussianKernel(double u) => 1 / math.sqrt(2 * math.pi) *
math.exp(-0.5 * u * u);
6 changes: 6 additions & 0 deletions lib/src/algorithms/knn/kernel_function_factory.dart
@@ -0,0 +1,6 @@
import 'package:ml_algo/src/algorithms/knn/kernel.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_type.dart';

abstract class KernelFunctionFactory {
KernelFn createByType(Kernel type);
}
23 changes: 23 additions & 0 deletions lib/src/algorithms/knn/kernel_function_factory_impl.dart
@@ -0,0 +1,23 @@
import 'package:ml_algo/src/algorithms/knn/kernel.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_function_factory.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_type.dart';

class KernelFunctionFactoryImpl implements KernelFunctionFactory {
const KernelFunctionFactoryImpl();

@override
KernelFn createByType(Kernel type) {
switch (type) {
case Kernel.uniform:
return uniformKernel;
case Kernel.epanechnikov:
return epanechnikovKernel;
case Kernel.cosine:
return cosineKernel;
case Kernel.gaussian:
return gaussianKernel;
default:
throw UnsupportedError('Unsupported kernel type - $type');
}
}
}
1 change: 1 addition & 0 deletions lib/src/algorithms/knn/kernel_type.dart
@@ -0,0 +1 @@
enum Kernel { uniform, epanechnikov, cosine, gaussian, }
57 changes: 41 additions & 16 deletions lib/src/regressor/knn_regressor.dart
@@ -1,45 +1,70 @@
import 'package:ml_algo/src/algorithms/knn/kernel.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_function_factory.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_function_factory_impl.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_type.dart';
import 'package:ml_algo/src/algorithms/knn/knn.dart';
import 'package:ml_algo/src/metric/factory.dart';
import 'package:ml_algo/src/metric/metric_type.dart';
import 'package:ml_algo/src/regressor/non_parametric_regressor.dart';
import 'package:ml_algo/src/utils/default_parameter_values.dart';
import 'package:ml_linalg/distance.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

class KNNRegressor implements NoNParametricRegressor {
KNNRegressor({
this.k,
this.distanceType = Distance.euclidean,
this.solverFn = findKNeighbours,
});
int k,
Distance distance = Distance.euclidean,
FindKnnFn solverFn = findKNeighbours,
Kernel kernel = Kernel.uniform,
Type dtype = DefaultParameterValues.dtype,

final Distance distanceType;
final int k;
final FindKnnFn solverFn;
KernelFunctionFactory kernelFnFactory = const KernelFunctionFactoryImpl(),
}) :
_k = k,
_distanceType = distance,
_solverFn = solverFn,
_dtype = dtype,
_kernelFn = kernelFnFactory.createByType(kernel);

Matrix _observations;
Matrix _outcomes;
final Distance _distanceType;
final int _k;
final FindKnnFn _solverFn;
final KernelFn _kernelFn;
final Type _dtype;

Matrix _trainingObservations;
Matrix _trainingOutcomes;

Vector get _zeroVector => _cachedZeroVector ??= Vector.zero(
_outcomes.columnsNum);
_trainingOutcomes.columnsNum, dtype: _dtype);
Vector _cachedZeroVector;

@override
void fit(Matrix observations, Matrix outcomes, {Matrix initialWeights,
bool isDataNormalized}) {
_observations = observations;
_outcomes = outcomes;
if (observations.rowsNum != outcomes.rowsNum) {
throw Exception('Number of observations and number of outcomes have to be'
'equal');
}
if (_k > observations.rowsNum) {
throw Exception('Parameter k should be less than or equal to the number '
'of training observations');
}
_trainingObservations = observations;
_trainingOutcomes = outcomes;
}

@override
Matrix predict(Matrix observations) => Matrix.fromRows(
_generateOutcomes(observations).toList(growable: false));
_generateOutcomes(observations).toList(growable: false), dtype: _dtype);

Iterable<Vector> _generateOutcomes(Matrix observations) sync* {
for (final kNeighbours in solverFn(k, _observations, _outcomes,
observations, distance: distanceType)) {
for (final kNeighbours in _solverFn(_k, _trainingObservations, _trainingOutcomes,
observations, distance: _distanceType)) {
yield kNeighbours
.fold<Vector>(_zeroVector, (sum, pair) => sum + pair.label) / k;
.fold<Vector>(_zeroVector,
(sum, pair) => sum + pair.label * _kernelFn(pair.distance)) / _k;
}
}

Expand Down
14 changes: 12 additions & 2 deletions lib/src/regressor/non_parametric_regressor.dart
@@ -1,14 +1,24 @@
import 'package:ml_algo/src/algorithms/knn/kernel_type.dart';
import 'package:ml_algo/src/regressor/knn_regressor.dart';
import 'package:ml_algo/src/regressor/regressor.dart';
import 'package:ml_linalg/distance.dart';

/// A factory for all the non parametric family of Machine Learning algorithms
abstract class NoNParametricRegressor implements Regressor {
/// Creates an instance of KNN regressor
///
/// KNN here means "K nearest neighbor"
/// [k] a number of neighbors
///
/// [k] a number of nearest neighbours
///
/// [kernel] a type of kernel function, that will be used to find an outcome
/// for a new observation
///
/// [distance] a distance type, that will be used to measure a distance
/// between two observation vectors
factory NoNParametricRegressor.nearestNeighbor({
int k,
Distance distanceType,
Kernel kernel,
Distance distance,
}) = KNNRegressor;
}
4 changes: 2 additions & 2 deletions pubspec.yaml
@@ -1,6 +1,6 @@
name: ml_algo
description: Machine learning algorithms written in native dart (without bindings to any popular ML libraries, just pure Dart implementation)
version: 10.2.1
description: Machine learning algorithms written in native dart
version: 10.3.0
author: Ilia Gyrdymov <ilgyrd@gmail.com>
homepage: https://github.com/gyrdym/ml_algo

Expand Down
19 changes: 19 additions & 0 deletions test/algorithms/kernel_function_factory_test.dart
@@ -0,0 +1,19 @@
import 'package:ml_algo/src/algorithms/knn/kernel.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_function_factory_impl.dart';
import 'package:ml_algo/src/algorithms/knn/kernel_type.dart';
import 'package:test/test.dart';

void main() {
group('KernelFunctionFactoryImpl', () {
final factory = const KernelFunctionFactoryImpl();

test('should create proper instance for kernels', () {
expect([
factory.createByType(Kernel.uniform) is KernelFn,
factory.createByType(Kernel.epanechnikov) is KernelFn,
factory.createByType(Kernel.cosine) is KernelFn,
factory.createByType(Kernel.gaussian) is KernelFn,
], equals(List<bool>.filled(4, true)));
});
});
}
30 changes: 30 additions & 0 deletions test/algorithms/kernel_test.dart
@@ -0,0 +1,30 @@
import 'package:ml_algo/src/algorithms/knn/kernel.dart';
import 'package:test/test.dart';

void main() {
group('Kernel', () {
test('uniform should always return 1', () {
expect(uniformKernel(0), 1);
expect(uniformKernel(100000), 1);
});

test('epanechnikov should return proper value', () {
expect(epanechnikovKernel(0), 0.75);
expect(epanechnikovKernel(1), 0);
expect(epanechnikovKernel(10), -74.25);
});

test('cosine should return proper value', () {
expect(cosineKernel(0), closeTo(0.7853, 1e-4));
expect(cosineKernel(1), closeTo(0.0000, 1e-4));
expect(cosineKernel(20), closeTo(0.7853, 1e-4));
});

test('gaussian should return proper value', () {
expect(gaussianKernel(0), closeTo(0.3989, 1e-4));
expect(gaussianKernel(1), closeTo(0.2419, 1e-4));
expect(gaussianKernel(3), closeTo(0.0044, 1e-4));
expect(gaussianKernel(10), closeTo(0.0000, 1e-4));
});
});
}
35 changes: 31 additions & 4 deletions test/regressor/knn_regressor_integration_test.dart
Expand Up @@ -3,8 +3,8 @@ import 'package:ml_linalg/matrix.dart';
import 'package:test/test.dart';

void main() {
group('KNNRegressor', () {
test('should consider', () {
group('KNNRegressor (integration)', () {
test('should predict values with help of uniform kernel', () {
final k = 2;
final features = Matrix.from([
[20, 20, 20, 20, 20],
Expand All @@ -23,11 +23,38 @@ void main() {
final testFeatures = Matrix.from([
[9.0, 9.0, 9.0, 9.0, 9.0],
]);
final regressor = NoNParametricRegressor.nearestNeighbor(k: k);
regressor.fit(features, outcomes);
final regressor = NoNParametricRegressor.nearestNeighbor(k: k)
..fit(features, outcomes);

final actual = regressor.predict(testFeatures);
expect(actual, equals([[4.0]]));
});

test('should predict values with help of epanechnikov kernel', () {
final k = 2;
final features = Matrix.from([
[20, 20, 20, 20, 20],
[30, 30, 30, 30, 30],
[15, 15, 15, 15, 15],
[25, 25, 25, 25, 25],
[10, 10, 10, 10, 10],
]);
final outcomes = Matrix.from([
[1.0],
[2.0],
[3.0],
[4.0],
[5.0],
]);
final testFeatures = Matrix.from([
[9.0, 9.0, 9.0, 9.0, 9.0],
]);
final regressor = NoNParametricRegressor.nearestNeighbor(k: k,
kernel: Kernel.epanechnikov)
..fit(features, outcomes);

final actual = regressor.predict(testFeatures);
expect(actual, equals([[-208.875]]));
});
});
}

0 comments on commit 2d92f4c

Please sign in to comment.