Skip to content

Commit

Permalink
Merge abb31df into a20bf61
Browse files Browse the repository at this point in the history
  • Loading branch information
gyrdym committed Jun 25, 2020
2 parents a20bf61 + abb31df commit 0496ff4
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog

## 0.2.0
- `DataFrame`: `shuffle` method added

## 0.1.1
- `DataFrame`: addSeries method added

Expand Down
3 changes: 3 additions & 0 deletions lib/src/data_frame/data_frame.dart
Expand Up @@ -169,4 +169,7 @@ abstract class DataFrame implements Serializable {
/// The method may throw an error if the [DataFrame] contains data that
/// cannot be converted to numeric representation
Matrix toMatrix([DType dtype]);

/// Returns a new [DataFrame] with shuffled rows of this [DataFrame]
DataFrame shuffle({int seed});
}
11 changes: 11 additions & 0 deletions lib/src/data_frame/data_frame_impl.dart
Expand Up @@ -3,6 +3,7 @@ import 'package:ml_dataframe/src/data_frame/data_frame.dart';
import 'package:ml_dataframe/src/data_frame/data_frame_helpers.dart';
import 'package:ml_dataframe/src/data_frame/data_frame_json_keys.dart';
import 'package:ml_dataframe/src/data_frame/errors/wrong_series_shape_exception.dart';
import 'package:ml_dataframe/src/data_frame/helpers/generate_unordered_indices.dart';
import 'package:ml_dataframe/src/data_frame/series.dart';
import 'package:ml_dataframe/src/numerical_converter/helpers/from_numerical_converter_json.dart';
import 'package:ml_dataframe/src/numerical_converter/helpers/numerical_converter_to_json.dart';
Expand All @@ -12,6 +13,7 @@ import 'package:ml_linalg/dtype.dart';
import 'package:ml_linalg/linalg.dart';
import 'package:ml_linalg/matrix.dart';
import 'package:quiver/iterables.dart';
import 'dart:math' as math;

part 'data_frame_impl.g.dart';

Expand Down Expand Up @@ -135,6 +137,15 @@ class DataFrameImpl with SerializableMixin implements DataFrame {
dtype: dtype,
);

@override
DataFrame shuffle({int seed}) {
final rowsAsList = rows.toList();
final indices = generateUnorderedIndices(shape.first, seed);
final shuffledRows = indices.map((index) => rowsAsList[index]);

return DataFrame(shuffledRows, header: header, headerExists: false);
}

DataFrame _sampleFromSeries(Iterable ids) =>
DataFrame.fromSeries(ids.map((dynamic id) => this[id]));

Expand Down
23 changes: 23 additions & 0 deletions lib/src/data_frame/helpers/generate_unordered_indices.dart
@@ -0,0 +1,23 @@
import 'dart:math' as math;

import 'package:quiver/iterables.dart';

List<int> generateUnorderedIndices(int length, [int seed]) {
if (length <= 0) {
return [];
}

final generator = math.Random(seed);
final orderedIndices = List.generate(length, (i) => i);
final indices = [...orderedIndices]
.toList()
..shuffle(generator);
final ensureIndicesAreUnordered = () => zip([indices, orderedIndices])
.any((pair) => pair.first != pair.last);

while (!ensureIndicesAreUnordered()) {
indices.shuffle(generator);
}

return indices;
}
2 changes: 1 addition & 1 deletion pubspec.yaml
@@ -1,6 +1,6 @@
name: ml_dataframe
description: Dataframe - a way to store and manipulate data
version: 0.1.1
version: 0.2.0
homepage: https://github.com/gyrdym/ml_dataframe

environment:
Expand Down
32 changes: 32 additions & 0 deletions test/data_frame/data_frame_test.dart
Expand Up @@ -331,6 +331,38 @@ void main() {
});
});

group('shuffle', () {
test('should return a new dataframe with the same header', () {
final dataframe = DataFrame(data);
final shuffled = dataframe.shuffle();

expect(shuffled.header, dataframe.header);
});

test('should return a new dataframe with the same number of rows', () {
final dataframe = DataFrame(data);
final shuffled = dataframe.shuffle();

expect(shuffled.rows, hasLength(dataframe.rows.length));
});

test('should return a new dataframe with the different order of rows', () {
final dataframe = DataFrame(data);
final shuffled = dataframe.shuffle();

expect(shuffled.rows, isNot(equals(dataframe.rows)));
});

test('should return a new dataframe containing the same rows', () {
final dataframe = DataFrame(data);
final shuffled = dataframe.shuffle();

dataframe.rows.forEach((row) {
expect(shuffled.rows, contains(equals(row)));
});
});
});

group('serialization', () {
final json = {
dataFrameHeaderJsonKey: ['first', 'second', 'third'],
Expand Down

0 comments on commit 0496ff4

Please sign in to comment.