Skip to content

Commit

Permalink
Merge pull request #111 from gyrdym/dataframe-driven-predictor-constr…
Browse files Browse the repository at this point in the history
…uctor-api

Dataframe driven API for predictors
  • Loading branch information
gyrdym committed Oct 6, 2019
2 parents e443d63 + 28ce099 commit 4cd3f46
Show file tree
Hide file tree
Showing 154 changed files with 4,173 additions and 3,486 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -7,3 +7,5 @@ build/
# Directory created by dartdoc
doc/api/
/.dart_tool/

pubspec.lock
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
# Changelog

## 13.0.0
- Predictor's API: `DataFrame` used instead of `Matrix`
- `DecisionTreeSolver`: data splitting logic fixed

## 12.1.2
- `xrange` package version locked

Expand Down
305 changes: 80 additions & 225 deletions README.md

Large diffs are not rendered by default.

23 changes: 11 additions & 12 deletions benchmark/cross_validator.dart
@@ -1,19 +1,16 @@
// 8.5 sec
import 'dart:async';

import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/ml_algo.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

const observationsNum = 1000;
const featuresNum = 20;
const columnsNum = 21;

class CrossValidatorBenchmark extends BenchmarkBase {
CrossValidatorBenchmark() : super('Cross validator benchmark');

Matrix features;
Matrix labels;
CrossValidator crossValidator;

static void main() {
Expand All @@ -22,18 +19,20 @@ class CrossValidatorBenchmark extends BenchmarkBase {

@override
void run() {
crossValidator.evaluate((trainFeatures, trainLabels) =>
ParameterlessRegressor.knn(trainFeatures, trainLabels, k: 7),
features, labels, MetricType.mape);
crossValidator.evaluate((trainSamples, targetFeatureNames) =>
KnnRegressor(trainSamples, targetFeatureNames.first, k: 7),
MetricType.mape);
}

@override
void setup() {
features = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(featuresNum)));
labels = Matrix.fromColumns([Vector.randomFilled(observationsNum)]);
final samples = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(columnsNum)));

final dataFrame = DataFrame.fromMatrix(samples);

crossValidator = CrossValidator.kFold(numberOfFolds: 5);
crossValidator = CrossValidator.kFold(dataFrame, ['col_20'],
numberOfFolds: 5);
}

void tearDown() {}
Expand Down
34 changes: 0 additions & 34 deletions benchmark/gradient_descent_regression.dart

This file was deleted.

24 changes: 15 additions & 9 deletions benchmark/knn_regression.dart → benchmark/knn_regressor.dart
@@ -1,22 +1,22 @@
// 5.7 sec
import 'dart:async';

// 10.0 sec (MacBook Air mid 2017)
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/ml_algo.dart';
import 'package:ml_algo/src/regressor/knn_regressor_impl.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

const observationsNum = 500;
const featuresNum = 20;

class KnnRegressorBenchmark extends BenchmarkBase {
KnnRegressorBenchmark() : super('KNN regression benchmark');
KnnRegressorBenchmark() : super('Knn regression benchmark');

Matrix features;
Matrix testFeatures;
DataFrame testFeatures;
Matrix labels;
Matrix testLabels;
ParameterlessRegressor regressor;
KnnRegressor regressor;


static void main() {
Expand All @@ -34,11 +34,17 @@ class KnnRegressorBenchmark extends BenchmarkBase {
(i) => Vector.randomFilled(featuresNum)));
labels = Matrix.fromColumns([Vector.randomFilled(observationsNum * 2)]);

testFeatures = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(featuresNum)));
testFeatures = DataFrame.fromMatrix(
Matrix.fromRows(
List.generate(
observationsNum,
(i) => Vector.randomFilled(featuresNum),
),
),
);
testLabels = Matrix.fromColumns([Vector.randomFilled(observationsNum)]);

regressor = ParameterlessRegressor.knn(features, labels, k: 7);
regressor = KnnRegressorImpl(features, labels, 'target', k: 7);
}

void tearDown() {}
Expand Down
44 changes: 44 additions & 0 deletions benchmark/linear_regressor.dart
@@ -0,0 +1,44 @@
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/ml_algo.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

const observationsNum = 200;
const featuresNum = 20;

class LinearRegressorBenchmark extends BenchmarkBase {
LinearRegressorBenchmark() : super('Linear regressor');

DataFrame fittingData;

static void main() {
LinearRegressorBenchmark().report();
}

@override
void run() {
LinearRegressor(fittingData, 'col_20');
}

@override
void setup() {
final features = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(featuresNum)));

final labels = Matrix.fromColumns([Vector.randomFilled(observationsNum)]);

fittingData = DataFrame.fromMatrix(
Matrix.fromColumns([
...features.columns,
...labels.columns,
]),
);
}

void tearDown() {}
}

Future main() async {
LinearRegressorBenchmark.main();
}
37 changes: 0 additions & 37 deletions benchmark/logistic_regression.dart

This file was deleted.

42 changes: 42 additions & 0 deletions benchmark/logistic_regressor.dart
@@ -0,0 +1,42 @@
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:ml_algo/ml_algo.dart';
import 'package:ml_dataframe/ml_dataframe.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_linalg/vector.dart';

const observationsNum = 200;
const columnsNum = 21;

class LogisticRegressorBenchmark extends BenchmarkBase {
LogisticRegressorBenchmark() : super('Logistic regressor');

DataFrame _data;

static void main() {
LogisticRegressorBenchmark().report();
}

@override
void run() {
LogisticRegressor(
_data,
'col_20',
minCoefficientsUpdate: null,
iterationsLimit: 200,
);
}

@override
void setup() {
final Matrix observations = Matrix.fromRows(List.generate(observationsNum,
(i) => Vector.randomFilled(columnsNum)));

_data = DataFrame.fromMatrix(observations);
}

void tearDown() {}
}

Future main() async {
LogisticRegressorBenchmark.main();
}
14 changes: 6 additions & 8 deletions benchmark/main.dart
@@ -1,12 +1,10 @@
import 'dart:async';

import 'gradient_descent_regression.dart' as gradientDescentRegressionBenchmark;
import 'logistic_regression.dart' as logisticRegressionBenchmark;
import 'algorithms/knn.dart' as knnBenchmark;
import 'linear_regressor.dart' as gradient_descent_regression_benchmark;
import 'logistic_regressor.dart' as logistic_regression_benchmark;
import 'algorithms/knn.dart' as knn_regressor_benchmark;

Future main() async {
// (MacBook Air mid 2017)
await gradientDescentRegressionBenchmark.main(); // 0.07 sec
await logisticRegressionBenchmark.main(); // 0.12 sec
await knnBenchmark.main(); // 5 sec
await gradient_descent_regression_benchmark.main(); // 0.07 sec
await logistic_regression_benchmark.main(); // 0.12 sec
await knn_regressor_benchmark.main(); // 5 sec
}
29 changes: 10 additions & 19 deletions example/main.dart
@@ -1,29 +1,20 @@
import 'dart:async';

import 'package:ml_algo/ml_algo.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:ml_dataframe/ml_dataframe.dart';

/// A simple usage example using synthetic data. To see more complex examples,
/// please, visit other directories in this folder
Future main() async {
// Let's create a feature matrix (a set of independent variables)
final features = Matrix.fromList([
[2.0, 3.0, 4.0, 5.0],
[12.0, 32.0, 1.0, 3.0],
[27.0, 3.0, 0.0, 59.0],
]);

// Let's create dependent variables vector. It will be used as `true` values
// to adjust regression coefficients
final labels = Matrix.fromList([
[4.3],
[3.5],
[2.1],
]);
// Let's create a dataframe with fitting data, let's assume, that the target
// column is the fifth column (column with index 4)
final dataFrame = DataFrame(<Iterable<num>>[
[ 2, 3, 4, 5, 4.3],
[12, 32, 1, 3, 3.5],
[27, 3, 0, 59, 2.1],
], headerExists: false);

// Let's create a regressor itself and train it
final regressor = LinearRegressor.gradient(
features, labels,
final regressor = LinearRegressor(
dataFrame, 'col_4',
iterationsLimit: 100,
initialLearningRate: 0.0005,
learningRateType: LearningRateType.constant);
Expand Down
11 changes: 7 additions & 4 deletions lib/ml_algo.dart
@@ -1,10 +1,13 @@
export 'package:ml_algo/src/algorithms/knn/kernel_type.dart';
export 'package:ml_algo/src/classifier/linear/logistic_regressor/logistic_regressor.dart';
export 'package:ml_algo/src/classifier/linear/softmax_regressor/softmax_regressor.dart';
export 'package:ml_algo/src/classifier/decision_tree_classifier.dart';
export 'package:ml_algo/src/classifier/logistic_regressor.dart';
export 'package:ml_algo/src/classifier/softmax_regressor.dart';
export 'package:ml_algo/src/linear_optimizer/gradient_optimizer/learning_rate_generator/learning_rate_type.dart';
export 'package:ml_algo/src/linear_optimizer/linear_optimizer_type.dart';
export 'package:ml_algo/src/linear_optimizer/regularization_type.dart';
export 'package:ml_algo/src/metric/classification/type.dart';
export 'package:ml_algo/src/metric/metric_type.dart';
export 'package:ml_algo/src/metric/regression/type.dart';
export 'package:ml_algo/src/model_selection/cross_validator/cross_validator.dart';
export 'package:ml_algo/src/regressor/knn_regressor.dart';
export 'package:ml_algo/src/regressor/linear_regressor.dart';
export 'package:ml_algo/src/regressor/parameterless_regressor.dart';
export 'package:ml_algo/src/solver/linear/gradient/learning_rate_generator/learning_rate_type.dart';

0 comments on commit 4cd3f46

Please sign in to comment.