Skip to content

Commit

Permalink
Merge b3edad7 into 50ab81e
Browse files Browse the repository at this point in the history
  • Loading branch information
gyrdym committed Oct 23, 2019
2 parents 50ab81e + b3edad7 commit 480fa89
Show file tree
Hide file tree
Showing 74 changed files with 2,001 additions and 733 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog

## 13.3.0
- `KnnClassifier` class added

## 13.2.0
- `KNN algorithm`: standardization for distance added
- `KnnRegressor`:
Expand Down
10 changes: 7 additions & 3 deletions README.md
Expand Up @@ -20,16 +20,20 @@ the lib, please, do not use it in a browser.
assessing prediction quality on different parts of a dataset.

- #### Classification algorithms
- [LogisticRegressor](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/logistic_regressor.dart).
- [LogisticRegressor](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/logistic_regressor/logistic_regressor.dart).
A class, that performs linear binary classification of data. To use this kind of classifier your data have to be
[linearly separable](https://en.wikipedia.org/wiki/Linear_separability).

- [SoftmaxRegressor](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/softmax_regressor.dart).
- [SoftmaxRegressor](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/softmax_regressor/softmax_regressor.dart).
A class, that performs linear multiclass classification of data. To use this kind of classifier your data have to be
[linearly separable](https://en.wikipedia.org/wiki/Linear_separability).

- [DecisionTreeClassifier](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/decision_tree_classifier.dart)
- [DecisionTreeClassifier](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/decision_tree_classifier/decision_tree_classifier.dart)
A class, that performs classification, using decision trees. May work with data with non-linear patterns.

- [KnnClassifier](https://github.com/gyrdym/ml_algo/blob/master/lib/src/classifier/knn_classifier/knn_classifier.dart)
A class, that performs classification, using `k nearest neighbours algorithm` - it makes prediction basing on
first `k` closest observations to the given one.

- #### Regression algorithms
- [LinearRegressor](https://github.com/gyrdym/ml_algo/blob/master/lib/src/regressor/linear_regressor.dart). A
Expand Down
39 changes: 0 additions & 39 deletions benchmark/algorithms/knn.dart

This file was deleted.

18 changes: 10 additions & 8 deletions benchmark/knn_regressor.dart
@@ -1,7 +1,6 @@
// 10.0 sec (MacBook Air mid 2017)
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/ml_algo.dart';
import 'package:ml_algo/src/regressor/knn_regressor_impl.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';
Expand All @@ -12,10 +11,7 @@ const featuresNum = 20;
class KnnRegressorBenchmark extends BenchmarkBase {
KnnRegressorBenchmark() : super('Knn regression benchmark');

Matrix features;
DataFrame testFeatures;
Matrix labels;
Matrix testLabels;
KnnRegressor regressor;


Expand All @@ -30,9 +26,16 @@ class KnnRegressorBenchmark extends BenchmarkBase {

@override
void setup() {
features = Matrix.fromRows(List.generate(observationsNum * 2,
final featureMatrix = Matrix.fromRows(List.generate(observationsNum * 2,
(i) => Vector.randomFilled(featuresNum)));
labels = Matrix.fromColumns([Vector.randomFilled(observationsNum * 2)]);

final labelMatrix = Matrix
.fromColumns([Vector.randomFilled(observationsNum * 2)]);

final observations = DataFrame.fromMatrix(Matrix.fromColumns([
...featureMatrix.columns,
...labelMatrix.columns,
]));

testFeatures = DataFrame.fromMatrix(
Matrix.fromRows(
Expand All @@ -42,9 +45,8 @@ class KnnRegressorBenchmark extends BenchmarkBase {
),
),
);
testLabels = Matrix.fromColumns([Vector.randomFilled(observationsNum)]);

regressor = KnnRegressorImpl(features, labels, 'target', k: 7);
regressor = KnnRegressor(observations, observations.header.last, 7);
}

void tearDown() {}
Expand Down
45 changes: 45 additions & 0 deletions benchmark/knn_solver.dart
@@ -0,0 +1,45 @@
// MacBook Air 13.3 mid 2017: ~ 5 sec
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/src/knn_solver/knn_solver.dart';
import 'package:ml_algo/src/knn_solver/knn_solver_impl.dart';
import 'package:ml_linalg/distance.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

const k = 10;
const trainObservationsNum = 2000;
const observationsNum = 100;
const featuresNum = 100;

class KnnSolverBenchmark extends BenchmarkBase {
KnnSolverBenchmark() : super('KnnSolver benchmark');

KnnSolver solver;
Matrix features;

static void main() {
KnnSolverBenchmark().report();
}

@override
void run() {
solver.findKNeighbours(features).toList(growable: false);
}

@override
void setup() {
final trainFeatures = Matrix.fromRows(List.generate(trainObservationsNum,
(i) => Vector.randomFilled(featuresNum)));
final trainLabels = Matrix.fromColumns([Vector.randomFilled(trainObservationsNum)]);

solver = KnnSolverImpl(trainFeatures, trainLabels, k, Distance.euclidean,
false);

features = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(featuresNum)));
}
}

void main() {
KnnSolverBenchmark.main();
}
2 changes: 1 addition & 1 deletion benchmark/main.dart
@@ -1,6 +1,6 @@
import 'linear_regressor.dart' as gradient_descent_regression_benchmark;
import 'logistic_regressor.dart' as logistic_regression_benchmark;
import 'algorithms/knn.dart' as knn_regressor_benchmark;
import 'knn_solver.dart' as knn_regressor_benchmark;

Future main() async {
// (MacBook Air mid 2017)
Expand Down
13 changes: 7 additions & 6 deletions lib/ml_algo.dart
@@ -1,13 +1,14 @@
export 'package:ml_algo/src/algorithms/knn/kernel_type.dart';
export 'package:ml_algo/src/classifier/decision_tree_classifier.dart';
export 'package:ml_algo/src/classifier/logistic_regressor.dart';
export 'package:ml_algo/src/classifier/softmax_regressor.dart';
export 'package:ml_algo/src/classifier/decision_tree_classifier/decision_tree_classifier.dart';
export 'package:ml_algo/src/classifier/knn_classifier/knn_classifier.dart';
export 'package:ml_algo/src/classifier/logistic_regressor/logistic_regressor.dart';
export 'package:ml_algo/src/classifier/softmax_regressor/softmax_regressor.dart';
export 'package:ml_algo/src/knn_kernel/kernel_type.dart';
export 'package:ml_algo/src/linear_optimizer/gradient_optimizer/learning_rate_generator/learning_rate_type.dart';
export 'package:ml_algo/src/linear_optimizer/linear_optimizer_type.dart';
export 'package:ml_algo/src/linear_optimizer/regularization_type.dart';
export 'package:ml_algo/src/metric/classification/type.dart';
export 'package:ml_algo/src/metric/metric_type.dart';
export 'package:ml_algo/src/metric/regression/type.dart';
export 'package:ml_algo/src/model_selection/cross_validator/cross_validator.dart';
export 'package:ml_algo/src/regressor/knn_regressor.dart';
export 'package:ml_algo/src/regressor/linear_regressor.dart';
export 'package:ml_algo/src/regressor/knn_regressor/knn_regressor.dart';
export 'package:ml_algo/src/regressor/linear_regressor/linear_regressor.dart';
18 changes: 0 additions & 18 deletions lib/src/algorithms/knn/kernel.dart

This file was deleted.

6 changes: 0 additions & 6 deletions lib/src/algorithms/knn/kernel_function_factory.dart

This file was deleted.

27 changes: 0 additions & 27 deletions lib/src/algorithms/knn/kernel_function_factory_impl.dart

This file was deleted.

62 changes: 0 additions & 62 deletions lib/src/algorithms/knn/knn.dart

This file was deleted.

@@ -1,6 +1,6 @@
import 'package:ml_algo/src/cost_function/cost_function_factory.dart';
import 'package:ml_algo/src/cost_function/cost_function_type.dart';
import 'package:ml_algo/src/di/injector.dart';
import 'package:ml_algo/src/di/dependencies.dart';
import 'package:ml_algo/src/helpers/add_intercept_if.dart';
import 'package:ml_algo/src/helpers/features_target_split.dart';
import 'package:ml_algo/src/linear_optimizer/gradient_optimizer/learning_rate_generator/learning_rate_type.dart';
Expand Down Expand Up @@ -43,8 +43,6 @@ LinearOptimizer createLogLikelihoodOptimizer(
final points = splits[0].toMatrix();
final labels = splits[1].toMatrix();

final dependencies = getDependencies();

final optimizerFactory = dependencies
.getDependency<LinearOptimizerFactory>();

Expand Down
@@ -1,5 +1,5 @@
import 'package:ml_algo/src/classifier/classifier.dart';
import 'package:ml_algo/src/classifier/decision_tree_classifier_impl.dart';
import 'package:ml_algo/src/classifier/decision_tree_classifier/decision_tree_classifier_impl.dart';
import 'package:ml_algo/src/decision_tree_solver/solver_factory/greedy_solver.dart';
import 'package:ml_algo/src/model_selection/assessable.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
Expand Down
@@ -1,4 +1,4 @@
import 'package:ml_algo/src/classifier/decision_tree_classifier.dart';
import 'package:ml_algo/src/classifier/decision_tree_classifier/decision_tree_classifier.dart';
import 'package:ml_algo/src/predictor/assessable_predictor_mixin.dart';
import 'package:ml_algo/src/decision_tree_solver/decision_tree_solver.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
Expand Down
55 changes: 55 additions & 0 deletions lib/src/classifier/knn_classifier/knn_classifier.dart
@@ -0,0 +1,55 @@
import 'package:ml_algo/src/classifier/classifier.dart';
import 'package:ml_algo/src/classifier/knn_classifier/knn_classifier_factory.dart';
import 'package:ml_algo/src/di/dependencies.dart';
import 'package:ml_algo/src/knn_kernel/kernel_type.dart';
import 'package:ml_algo/src/model_selection/assessable.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/distance.dart';
import 'package:ml_linalg/dtype.dart';

/// A class that performs classification basing on `k nearest neighbours` (KNN)
/// algorithm
///
/// K nearest neighbours algorithm is an algorithm that is targeted to search
/// most similar labelled observations (number of these observations equals `k`)
/// for the given unlabelled one.
///
/// It is possible to use majority class among the k found observations as a
/// prediction for the given unlabelled observation, but it may lead to the
/// imprecise result. Thus the weighted version of KNN algorithm is used in the
/// classifier. To get weight of a particular observation one may use a kernel
/// function.
abstract class KnnClassifier implements Assessable, Classifier {
/// Parameters:
///
/// [fittingData] Labelled observations, among which will be searched [k]
/// nearest to the given unlabelled observations neighbours. Must contain
/// [targetName] column.
///
/// [targetName] A string, that serves as a name of the column, that contains
/// labels (or outcomes).
///
/// [k] a number of nearest neighbours to be found among [fittingData]
///
/// [kernel] a type of a kernel function, that will be used to predict an
/// outcome for a new observation
///
/// [distance] a distance type, that will be used to measure a distance
/// between two observation vectors
///
/// [dtype] A data type for all the numeric values, used by the algorithm. Can
/// affect performance or accuracy of the computations. Default value is
/// [DType.float32]
factory KnnClassifier(
DataFrame fittingData,
String targetName,
int k,
{
KernelType kernel = KernelType.gaussian,
Distance distance = Distance.euclidean,
DType dtype = DType.float32,
}
) => dependencies
.getDependency<KnnClassifierFactory>()
.create(fittingData, targetName, k, kernel, distance, dtype);
}

0 comments on commit 480fa89

Please sign in to comment.