Skip to content

Commit

Permalink
Merge 833518f into 3d9135c
Browse files Browse the repository at this point in the history
  • Loading branch information
gyrdym committed Oct 5, 2019
2 parents 3d9135c + 833518f commit 12cc895
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 68 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
# Changelog

## 0.0.11
- `dtype` parameter removed from the DataFrame's constructor
- `dtype` parameter added to `toMatrix` method

## 0.0.10
- `DataFrame.sampleFromSeries` method's signature changed

Expand Down
2 changes: 1 addition & 1 deletion lib/ml_dataframe.dart
@@ -1,3 +1,3 @@
export 'package:ml_dataframe/src/data_frame/data_frame.dart';
export 'package:ml_dataframe/src/data_frame/factories/from_csv.dart';
export 'package:ml_dataframe/src/data_frame/series.dart';
export 'package:ml_dataframe/src/data_frame/series.dart';
22 changes: 6 additions & 16 deletions lib/src/data_frame/data_frame.dart
Expand Up @@ -43,7 +43,6 @@ abstract class DataFrame {
String autoHeaderPrefix = defaultHeaderPrefix,
Iterable<int> columns = const [],
Iterable<String> columnNames = const [],
DType dtype = DType.float32,
}
) => fromRawData(
data,
Expand All @@ -52,19 +51,13 @@ abstract class DataFrame {
autoHeaderPrefix: autoHeaderPrefix,
columns: columns,
columnNames: columnNames,
dtype: dtype,
);

factory DataFrame.fromSeries(
Iterable<Series> series,
{
DType dtype = DType.float32,
}
) => DataFrameImpl.fromSeries(
series,
NumericalConverterImpl(false),
dtype,
);
factory DataFrame.fromSeries(Iterable<Series> series) =>
DataFrameImpl.fromSeries(
series,
NumericalConverterImpl(false),
);

factory DataFrame.fromMatrix(
Matrix matrix,
Expand All @@ -84,9 +77,6 @@ abstract class DataFrame {
discreteColumnNames: discreteColumnNames,
);

/// A type used for converting the [DataFrame] into [Matrix]
DType get dtype;

/// Returns a collection of names of all series (like a table header)
Iterable<String> get header;

Expand Down Expand Up @@ -124,5 +114,5 @@ abstract class DataFrame {
///
/// The method may throw an error if the [DataFrame] contains data, that
/// cannot be converted to numerical representation
Matrix toMatrix();
Matrix toMatrix([DType dtype]);
}
49 changes: 22 additions & 27 deletions lib/src/data_frame/data_frame_impl.dart
Expand Up @@ -8,28 +8,29 @@ import 'package:ml_linalg/matrix.dart';
import 'package:quiver/iterables.dart';

class DataFrameImpl implements DataFrame {
DataFrameImpl(this.rows, this.header, this._toNumber, this.dtype) :
DataFrameImpl(this.rows, this.header, this._toNumber) :
series = convertRowsToSeries(header, rows);

DataFrameImpl.fromSeries(this.series, this._toNumber, this.dtype) :
DataFrameImpl.fromSeries(this.series, this._toNumber) :
header = series.map((series) => series.name),
rows = convertSeriesToRows(series);

DataFrameImpl.fromMatrix(
this._cachedMatrix,
Matrix matrix,
this.header,
this._toNumber,
Iterable<bool> areSeriesDiscrete,
) :
dtype = _cachedMatrix.dtype,
rows = _cachedMatrix.rows,
series = zip([header, _cachedMatrix.columns,
areSeriesDiscrete ?? List.filled(_cachedMatrix.columnsNum, false)])
rows = matrix.rows,
series = zip([header, matrix.columns,
areSeriesDiscrete ?? List.filled(matrix.columnsNum, false)])
.map((seriesData) => Series(
seriesData[0] as String,
seriesData[1] as Iterable,
isDiscrete: seriesData[2] as bool,
));
)) {
_cachedMatrices[matrix.dtype] = matrix;
}

@override
final Iterable<String> header;
Expand All @@ -40,12 +41,9 @@ class DataFrameImpl implements DataFrame {
@override
final Iterable<Series> series;

@override
final DType dtype;

final NumericalConverter _toNumber;

Matrix _cachedMatrix;
final Map<DType, Matrix> _cachedMatrices = {};

@override
Series operator [](Object key) {
Expand Down Expand Up @@ -74,7 +72,7 @@ class DataFrameImpl implements DataFrame {
.from(names)
.difference(Set.from(header));
if (absentNames.isNotEmpty) {
throw Exception('Columns with the names $absentNames do not exist');
throw Exception('Columns with names $absentNames do not exist');
};
return _sampleFromSeries(names);
}
Expand All @@ -92,34 +90,31 @@ class DataFrameImpl implements DataFrame {
}

@override
Matrix toMatrix() =>
_cachedMatrix ??= Matrix.fromList(
_toNumber
.convertRawData(rows)
.map((row) => row.toList())
.toList(),
dtype: dtype,
);
Matrix toMatrix([DType dtype = DType.float32]) =>
_cachedMatrices[dtype] ??= Matrix.fromList(
_toNumber
.convertRawData(rows)
.map((row) => row.toList())
.toList(),
dtype: dtype,
);

DataFrame _sampleFromSeries(Iterable ids) =>
DataFrame.fromSeries(
ids.map((dynamic id) => this[id]),
dtype: dtype,
);
DataFrame.fromSeries(ids.map((dynamic id) => this[id]));

DataFrame _dropByIndices(Iterable<int> indices, Iterable<Series> series) {
final uniqueIndices = Set<int>.from(indices);
final newSeries = enumerate(series)
.where((indexedSeries) => !uniqueIndices.contains(indexedSeries.index))
.map((indexedSeries) => indexedSeries.value);
return DataFrame.fromSeries(newSeries, dtype: dtype);
return DataFrame.fromSeries(newSeries);
}

DataFrame _dropByNames(Iterable<String> names, Iterable<Series> series) {
final uniqueNames = Set<String>.from(names);
final newSeries = series
.where((series) => !uniqueNames.contains(series.name));
return DataFrame.fromSeries(newSeries, dtype: dtype);
return DataFrame.fromSeries(newSeries);
}

Map<String, Series> _getCachedOrCreateSeriesByName() =>
Expand Down
3 changes: 0 additions & 3 deletions lib/src/data_frame/factories/from_csv.dart
Expand Up @@ -2,7 +2,6 @@ import 'package:csv/csv.dart';
import 'package:ml_dataframe/src/data_frame/data_frame.dart';
import 'package:ml_dataframe/src/data_frame/factories/from_raw_data.dart';
import 'package:ml_dataframe/src/data_reader/data_reader.dart';
import 'package:ml_linalg/dtype.dart';

Future<DataFrame> fromCsv(
String fileName,
Expand All @@ -14,7 +13,6 @@ Future<DataFrame> fromCsv(
String autoHeaderPrefix = defaultHeaderPrefix,
Iterable<int> columns = const [],
Iterable<String> columnNames = const [],
DType dtype = DType.float32,
}
) async {
final reader = DataReader.csv(fileName, columnDelimiter, eol);
Expand All @@ -27,6 +25,5 @@ Future<DataFrame> fromCsv(
autoHeaderPrefix: autoHeaderPrefix,
columns: columns,
columnNames: columnNames,
dtype: dtype,
);
}
3 changes: 1 addition & 2 deletions lib/src/data_frame/factories/from_raw_data.dart
Expand Up @@ -12,7 +12,6 @@ DataFrame fromRawData(Iterable<Iterable<dynamic>> data, {
String autoHeaderPrefix = defaultHeaderPrefix,
Iterable<int> columns = const [],
Iterable<String> columnNames = const [],
DType dtype = DType.float32,
}) {
final columnsNum = columns.isNotEmpty
? columns.length
Expand Down Expand Up @@ -55,5 +54,5 @@ DataFrame fromRawData(Iterable<Iterable<dynamic>> data, {
.map((indexedName) => indexedName.value);

return DataFrameImpl(selectedData, selectedHeader,
NumericalConverterImpl(false), dtype);
NumericalConverterImpl(false));
}
2 changes: 1 addition & 1 deletion pubspec.yaml
@@ -1,6 +1,6 @@
name: ml_dataframe
description: Dataframe - a way to store and manipulate data
version: 0.0.10
version: 0.0.11
author: Ilia Gyrdymov <ilgyrd@gmail.com>
homepage: https://github.com/gyrdym/ml_dataframe

Expand Down
12 changes: 0 additions & 12 deletions test/data_frame/data_frame_from_matrix_test.dart
Expand Up @@ -42,7 +42,6 @@ void main() {
expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000]));

expect(dataFrame.toMatrix(), same(matrix));
expect(dataFrame.dtype, DType.float32);
});

test('should initialize from matrix with predefined header', () {
Expand Down Expand Up @@ -83,7 +82,6 @@ void main() {
expect(dataFrame['little'].data, equals([4, 40, 400, 4000]));

expect(dataFrame.toMatrix(), same(matrix));
expect(dataFrame.dtype, DType.float32);
});

test('should ignore predefined header list elements that are out of '
Expand Down Expand Up @@ -121,7 +119,6 @@ void main() {

expect(dataFrame[3].name, 'little');
expect(dataFrame['little'].data, equals([4, 40, 400, 4000]));
expect(dataFrame.dtype, DType.float32);
});

test('should ignore predefined header list elements that are out of '
Expand Down Expand Up @@ -186,7 +183,6 @@ void main() {
expect(dataFrame[1].data, equals([4, 40, 400, 4000]));
expect(dataFrame['doth'].data, equals([4, 40, 400, 4000]));

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), equals([
[1, 4 ],
[10, 40 ],
Expand Down Expand Up @@ -234,7 +230,6 @@ void main() {
expect(dataFrame[3].data, equals([4, 40, 400, 4000]));
expect(dataFrame['super_3'].data, equals([4, 40, 400, 4000]));

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), same(matrix));
});

Expand Down Expand Up @@ -266,7 +261,6 @@ void main() {
expect(dataFrame[1].data, equals([4, 40, 400, 4000]));
expect(dataFrame['col_1'].data, equals([4, 40, 400, 4000]));

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), equals([
[1, 4 ],
[10, 40 ],
Expand Down Expand Up @@ -314,7 +308,6 @@ void main() {
expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000]));
expect(dataFrame['col_3'].isDiscrete, isFalse);

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), same(matrix));
});

Expand Down Expand Up @@ -359,7 +352,6 @@ void main() {
expect(dataFrame['fourth'].data, equals([4, 40, 400, 4000]));
expect(dataFrame['fourth'].isDiscrete, isTrue);

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), same(matrix));
});

Expand Down Expand Up @@ -403,7 +395,6 @@ void main() {
expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000]));
expect(dataFrame['col_3'].isDiscrete, isTrue);

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), same(matrix));
});

Expand Down Expand Up @@ -447,7 +438,6 @@ void main() {
expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000]));
expect(dataFrame['col_3'].isDiscrete, isFalse);

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), same(matrix));
});

Expand Down Expand Up @@ -490,7 +480,6 @@ void main() {
expect(dataFrame[3].data, equals([4, 40, 400, 4000]));
expect(dataFrame['col_3'].data, equals([4, 40, 400, 4000]));
expect(dataFrame['col_3'].isDiscrete, isFalse);
expect(dataFrame.dtype, DType.float32);

expect(dataFrame.toMatrix(), same(matrix));
});
Expand Down Expand Up @@ -536,7 +525,6 @@ void main() {
expect(dataFrame['fourth'].data, equals([4, 40, 400, 4000]));
expect(dataFrame['fourth'].isDiscrete, isTrue);

expect(dataFrame.dtype, DType.float32);
expect(dataFrame.toMatrix(), same(matrix));
});
});
Expand Down
3 changes: 0 additions & 3 deletions test/data_frame/data_frame_from_raw_data_test.dart
@@ -1,5 +1,4 @@
import 'package:ml_dataframe/src/data_frame/data_frame.dart';
import 'package:ml_linalg/dtype.dart';
import 'package:test/test.dart';

void main() {
Expand All @@ -14,7 +13,6 @@ void main() {
final frame = DataFrame(
data,
headerExists: false,
dtype: DType.float64,
);

expect(frame.header,
Expand All @@ -33,7 +31,6 @@ void main() {
[true, false, true],
['32', '1132', 'abs'],
]));
expect(frame.dtype, DType.float64);
});

test('should initialize from dynamic-typed data with header row', () {
Expand Down
4 changes: 1 addition & 3 deletions test/data_frame/data_frame_from_series_test.dart
@@ -1,6 +1,5 @@
import 'package:ml_dataframe/src/data_frame/data_frame.dart';
import 'package:ml_dataframe/src/data_frame/series.dart';
import 'package:ml_linalg/dtype.dart';
import 'package:test/test.dart';

void main() {
Expand All @@ -11,7 +10,7 @@ void main() {
Series('second', <dynamic>[10, 12, 323, false, '1132']),
Series('third', <dynamic>[-10, 202, null, true, 'abs']),
];
final frame = DataFrame.fromSeries(series, dtype: DType.float64);
final frame = DataFrame.fromSeries(series);

expect(frame.header, equals(['first', 'second', 'third']));
expect(frame.rows, equals([
Expand All @@ -28,7 +27,6 @@ void main() {
<dynamic>[10, 12, 323, false, '1132'],
<dynamic>[-10, 202, null, true, 'abs'],
]));
expect(frame.dtype, DType.float64);
});
});
}

0 comments on commit 12cc895

Please sign in to comment.