Skip to content

Commit

Permalink
Merge 6ebe3b9 into 4ed244a
Browse files Browse the repository at this point in the history
  • Loading branch information
gyrdym committed Oct 5, 2019
2 parents 4ed244a + 6ebe3b9 commit d2f01f1
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 94 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog

## 0.0.10
- `DataFrame.sampleFromSeries` method's signature changed

## 0.0.9
- `dtype` field added to `DataFrame` interface

Expand Down
17 changes: 9 additions & 8 deletions lib/src/data_frame/data_frame.dart
Expand Up @@ -92,7 +92,7 @@ abstract class DataFrame {

Iterable<Iterable<dynamic>> get rows;

/// Returns series (columns) collection of the [DataFrame].
/// Returns a lazy series (columns) collection of the [DataFrame].
///
/// [Series] is roughly a column and its header (name)
Iterable<Series> get series;
Expand All @@ -103,14 +103,15 @@ abstract class DataFrame {
/// series)
Series operator [](Object key);

/// Returns a collection of dataframes, sampled from series, that are
/// obtained from provided [indices] parts or [names] parts.
/// Returns a dataframe, sampled from series, that are obtained from provided
/// series [indices] or series [names].
///
/// Series id (index or name) may repeat in different parts but not in the
/// same part
Iterable<DataFrame> sampleFromSeries({
Iterable<Iterable<int>> indices,
Iterable<Iterable<String>> names,
/// If [indices] are specified, [names] parameter will be ignored.
///
/// Series indices or series names may be repeating.
DataFrame sampleFromSeries({
Iterable<int> indices,
Iterable<String> names,
});

/// Returns a new [DataFrame] without specified series (columns)
Expand Down
29 changes: 21 additions & 8 deletions lib/src/data_frame/data_frame_impl.dart
Expand Up @@ -56,13 +56,26 @@ class DataFrameImpl implements DataFrame {
}

@override
Iterable<DataFrame> sampleFromSeries({
Iterable<Iterable<int>> indices = const [],
Iterable<Iterable<String>> names = const [],
DataFrame sampleFromSeries({
Iterable<int> indices = const [],
Iterable<String> names = const [],
}) {
if (indices.isNotEmpty) {
final maxIdx = series.length - 1;
final outRangedIndices = indices.where((idx) => idx < 0 || idx > maxIdx);
if (outRangedIndices.isNotEmpty) {
throw RangeError('Some of provided indices are out of range: '
'$outRangedIndices, while the valid range is 0..$maxIdx (both '
'inclusive)');
}
return _sampleFromSeries(indices);
}
final absentNames = Set<String>
.from(names)
.difference(Set.from(header));
if (absentNames.isNotEmpty) {
throw Exception('Columns with the names $absentNames do not exist');
};
return _sampleFromSeries(names);
}

Expand All @@ -88,11 +101,11 @@ class DataFrameImpl implements DataFrame {
dtype: dtype,
);

Iterable<DataFrame> _sampleFromSeries(Iterable<Iterable> allIds) =>
allIds.map((ids) {
final uniqueIds = Set<dynamic>.from(ids);
return DataFrame.fromSeries(uniqueIds.map((dynamic id) => this[id]));
});
DataFrame _sampleFromSeries(Iterable ids) =>
DataFrame.fromSeries(
ids.map((dynamic id) => this[id]),
dtype: dtype,
);

DataFrame _dropByIndices(Iterable<int> indices, Iterable<Series> series) {
final uniqueIndices = Set<int>.from(indices);
Expand Down
8 changes: 8 additions & 0 deletions lib/src/data_frame/series.dart
Expand Up @@ -5,9 +5,17 @@ class Series {
? Set<dynamic>.from(data)
: const <dynamic>[];

/// A name of the [Series]
final String name;

/// Return a lazy iterable of data, containing in the [Series]
final Iterable data;

/// Returns a lazy iterable of the [data]'s unique values if the [Series]
/// marked as [isDiscrete]. If [isDiscrete] is `false`, an empty list will be
/// returned
final Iterable discreteValues;

/// Shows if [Series] contains just discrete values instead of continuous ones
bool get isDiscrete => discreteValues.isNotEmpty;
}
2 changes: 1 addition & 1 deletion pubspec.yaml
@@ -1,6 +1,6 @@
name: ml_dataframe
description: Dataframe - a way to store and manipulate data
version: 0.0.9
version: 0.0.10
author: Ilia Gyrdymov <ilgyrd@gmail.com>
homepage: https://github.com/gyrdym/ml_dataframe

Expand Down
140 changes: 63 additions & 77 deletions test/data_frame/data_frame_test.dart
Expand Up @@ -193,122 +193,108 @@ void main() {
];

final dataFrame = DataFrame(data, headerExists: false);
final dataFrames = dataFrame.sampleFromSeries(indices: [
[0, 1],
[2, 3],
[4],
]).toList();

expect(dataFrames, hasLength(3));

expect(dataFrames[0].rows, equals([
[ '1', 2 ],
[ 10, 12 ],
[ -10, 202 ],
]));
final sampled = dataFrame.sampleFromSeries(indices: [0, 1, 2, 3, 4]);

expect(dataFrames[1].rows, equals([
[ 3, 0 ],
[ 323, 1.5 ],
[ 1000, '1.5' ],
expect(sampled.rows, equals([
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
]));
});

test('should support repeating indices', () {
final data = [
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
];

expect(dataFrames[2].rows, equals([
[ 32 ],
[ 1132 ],
[ 0.005 ],
final dataFrame = DataFrame(data, headerExists: false);
final sampled = dataFrame.sampleFromSeries(indices: [0, 1, 0, 1]);

expect(sampled.rows, equals([
[ '1', 2, '1', 2 ],
[ 10, 12, 10, 12 ],
[ -10, 202, -10, 202 ],
]));
});

test('should support repeating indices in different parts', () {
test('should sample dataframe by series names', () {
final data = [
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
];

final dataFrame = DataFrame(data, headerExists: false);
final dataFrames = dataFrame.sampleFromSeries(indices: [
[0, 1],
[0, 1],
]).toList();
final sampled = dataFrame.sampleFromSeries(
names: ['col_0', 'col_1', 'col_2', 'col_3', 'col_4']);

expect(dataFrames, hasLength(2));

expect(dataFrames[0].rows, equals([
[ '1', 2 ],
[ 10, 12 ],
[ -10, 202 ],
expect(sampled.rows, equals([
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
]));
});

expect(dataFrames[1].rows, equals([
[ '1', 2 ],
[ 10, 12 ],
[ -10, 202 ],
test('should ignore names parameter if indices parameter is '
'provided', () {
final data = [
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
];

final dataFrame = DataFrame(data, headerExists: false);
final sampled = dataFrame.sampleFromSeries(
indices: [1],
names: ['col_0', 'col_1', 'col_2', 'col_3', 'col_4']);

expect(sampled.rows, equals([
[ 2, ],
[ 12, ],
[ 202, ],
]));
});

test('should skip repeating indices in the same part', () {
test('should throw an error if outranged indices are provided', () {
final data = [
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
];

final dataFrame = DataFrame(data, headerExists: false);
final dataFrames = dataFrame.sampleFromSeries(indices: [
[0, 0],
[2, 2],
]).toList();
final actual = () => dataFrame.sampleFromSeries(indices: [1, 40]);

expect(dataFrames, hasLength(2));
expect(actual, throwsRangeError);
});

expect(dataFrames[0].rows, equals([
[ '1' ],
[ 10 ],
[ -10 ],
]));
test('should throw an error if outranged negative indices are provided', () {
final data = [
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
];

expect(dataFrames[1].rows, equals([
[ 3 ],
[ 323 ],
[ 1000 ],
]));
final dataFrame = DataFrame(data, headerExists: false);
final actual = () => dataFrame.sampleFromSeries(indices: [2, -1]);

expect(actual, throwsRangeError);
});

test('should sample dataframe by series names', () {
test('should throw an error if names of non existent columns are '
'provided', () {
final data = [
[ '1', 2, 3, 0, 32 ],
[ 10, 12, 323, 1.5, 1132 ],
[ -10, 202, 1000, '1.5', 0.005 ],
];

final dataFrame = DataFrame(data, headerExists: false);
final dataFrames = dataFrame.sampleFromSeries(names: [
['col_0', 'col_1'],
['col_2', 'col_3'],
['col_4'],
]).toList();

expect(dataFrames, hasLength(3));

expect(dataFrames[0].rows, equals([
[ '1', 2 ],
[ 10, 12 ],
[ -10, 202 ],
]));
final actual = () => dataFrame.sampleFromSeries(names: ['col_0', 'col_100']);

expect(dataFrames[1].rows, equals([
[ 3, 0 ],
[ 323, 1.5 ],
[ 1000, '1.5' ],
]));

expect(dataFrames[2].rows, equals([
[ 32 ],
[ 1132 ],
[ 0.005 ],
]));
expect(actual, throwsException);
});
});
});
Expand Down

0 comments on commit d2f01f1

Please sign in to comment.